diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..7152fe29 --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +exclude = .git,__pycache__,docs/source/conf.py,old,build,dist +max-line-length = 120 diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml index 8b48ddf1..90c97050 100644 --- a/.github/workflows/black.yml +++ b/.github/workflows/black.yml @@ -1,6 +1,8 @@ name: Lint -on: [pull_request] +on: + pull_request: + branches: [ main ] jobs: lint: diff --git a/.github/workflows/cli-coverage.yml b/.github/workflows/cli-coverage.yml new file mode 100644 index 00000000..8b4e90fb --- /dev/null +++ b/.github/workflows/cli-coverage.yml @@ -0,0 +1,40 @@ +name: test coverage + +on: + push: + branches: [master, dev] + +jobs: + cli-coverage-report: + strategy: + matrix: + python-version: [ "3.12" ] + os: [ ubuntu-latest ] + r: [ release ] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v5 + + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install test dependencies + run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi + + - run: pip install . + + - name: Run tests + run: coverage run -m pytest + + - name: build coverage + run: coverage html -i + + - run: smokeshow upload htmlcov + env: + SMOKESHOW_GITHUB_STATUS_DESCRIPTION: Coverage {coverage-percentage} + SMOKESHOW_GITHUB_COVERAGE_THRESHOLD: 80 + SMOKESHOW_GITHUB_CONTEXT: coverage + SMOKESHOW_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + SMOKESHOW_GITHUB_PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + SMOKESHOW_AUTH_KEY: ${{ secrets.SMOKESHOW_AUTH_KEY }} \ No newline at end of file diff --git a/.github/workflows/pytest-windows.yml b/.github/workflows/pytest-windows.yml new file mode 100644 index 00000000..34a557ed --- /dev/null +++ b/.github/workflows/pytest-windows.yml @@ -0,0 +1,35 @@ +name: Run pytests windows + +on: + push: + branches: [dev] + pull_request: + branches: [main, dev] + +jobs: + pytest: + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: ["3.11"] + os: [windows-latest] + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install all dependencies + run: pip install -r requirements/requirements-all.txt + + - name: Install test dependencies + run: pip install -r requirements/requirements-test.txt + + - name: Install package + run: python -m pip install . + + - name: Run pytest tests + run: pytest tests -v \ No newline at end of file diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/pytest.yml similarity index 65% rename from .github/workflows/run-pytest.yml rename to .github/workflows/pytest.yml index 7864e39a..d19e0e50 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/pytest.yml @@ -4,7 +4,7 @@ on: push: branches: [dev] pull_request: - branches: [master, dev] + branches: [main, dev] jobs: pytest: @@ -15,15 +15,15 @@ jobs: os: [ubuntu-latest] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v5 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Install dev dependencies - run: if [ -f requirements/requirements-dev.txt ]; then pip install -r requirements/requirements-dev.txt; fi + - name: Install all dependencies + run: if [ -f requirements/requirements-all.txt ]; then pip install -r requirements/requirements-all.txt; fi - name: Install test dependencies run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi @@ -32,4 +32,4 @@ jobs: run: python -m pip install . - name: Run pytest tests - run: pytest tests -x -vv --cov=./ --cov-report=xml --remote-data + run: pytest tests -v diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index ddf9a38b..533ef328 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -1,6 +1,3 @@ -# This workflows will upload a Python Package using Twine when a release is created -# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries - name: Upload Python Package on: @@ -14,9 +11,7 @@ jobs: permissions: id-token: write steps: - - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies @@ -27,4 +22,4 @@ jobs: run: | python setup.py sdist bdist_wheel - name: Publish package distributions to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file diff --git a/.github/workflows/run-codecov.yml b/.github/workflows/run-codecov.yml deleted file mode 100644 index a41a1fde..00000000 --- a/.github/workflows/run-codecov.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Run codecov - -on: - pull_request: - branches: [master] - -jobs: - pytest: - runs-on: ${{ matrix.os }} - strategy: - matrix: - python-version: [3.9] - os: [ubuntu-latest] - - steps: - - uses: actions/checkout@v2 - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v2 - with: - file: ./coverage.xml - name: py-${{ matrix.python-version }}-${{ matrix.os }} diff --git a/.gitignore b/.gitignore index eda9361f..0d062a9a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,87 +1,9 @@ -# ignore test results -tests/test/* - -# toy/experimental files -*.tsv -*.pkl - -# ignore eggs -.eggs/ - -# generic ignore list: -*.lst - -# Compiled source -*.com -*.class -*.dll -*.exe -*.o *.so -*.pyc - -# Packages -# it's better to unpack these files and commit the raw source -# git has its own built in compression methods -*.7z -*.dmg -*.gz -*.iso -*.jar -*.rar -*.tar -*.zip - -# Logs and databases -*.log -*.sql -*.sqlite - -# OS generated files -.DS_Store -.DS_Store? -._* -.Spotlight-V100 -.Trashes -ehthumbs.db -Thumbs.db - -# Gedit temporary files -*~ - -# libreoffice lock files: -.~lock* - -# Default-named test output -microtest/ -open_pipelines/ - -# IDE-specific items -.idea/ -.vscode/ - -# pytest-related -.cache/ -.coverage -.pytest_cache/ - -# Reserved files for comparison -*RESERVE* -# Build-related stuff build/ dist/ -peppy.egg-info/ -.ipynb_checkpoints/ -# env stuff -env/ .env venv/ -.venv/ -# swap files -.swp -.swo -.swn diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fd759dcf..53e07571 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,21 +1,2 @@ repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.0.1 hooks: - - id: trailing-whitespace - - id: check-yaml - - id: end-of-file-fixer - - id: requirements-txt-fixer - - id: trailing-whitespace - - id: check-ast - - - repo: https://github.com/PyCQA/isort - rev: 5.8.0 - hooks: - - id: isort - args: ["--profile", "black"] - - - repo: https://github.com/psf/black - rev: 21.5b2 - hooks: - - id: black diff --git a/LICENSE.txt b/LICENSE.txt index 1b78bad2..e3476396 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,5 +1,3 @@ -Copyright 2017 Nathan Sheffield - Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. diff --git a/MANIFEST.in b/MANIFEST.in index fd948e04..43e283b4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,6 @@ -include LICENSE.txt include requirements/* include README.md +include peppy/pephubclient/* +include peppy/eido/* +include peppy/pephubclient/pephub_oauth/* +include peppy/pephubclient/modules/* diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..40d1dfea --- /dev/null +++ b/Makefile @@ -0,0 +1,13 @@ +lint: + ruff format . + +run-coverage: + coverage run -m pytest + +html-report: + coverage html --omit="*/test*" + +open-coverage: + cd htmlcov && google-chrome index.html + +coverage: run-coverage html-report open-coverage diff --git a/README.md b/README.md index 3c09e42d..80b7fb54 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,4 @@ -# peppy python package - -![Run pytests](https://github.com/pepkit/peppy/workflows/Run%20pytests/badge.svg) -[![codecov](https://codecov.io/gh/pepkit/peppy/branch/master/graph/badge.svg)](https://codecov.io/gh/pepkit/peppy) -[![PEP compatible](https://pepkit.github.io/img/PEP-compatible-green.svg)](https://pep.databio.org) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) -`peppy` is the official python package for reading **Portable Encapsulated Projects** or **PEP**s in Python. -Links to complete documentation: -* Complete documentation and API for the `peppy` python package is at [peppy.databio.org](https://peppy.databio.org). -* Reference documentation for standard **PEP** format is at [pep.databio.org](https://pep.databio.org/). -* Example PEPs for testing `peppy` are in the [example_peps repository](https://github.com/pepkit/example_peps). -* The package [on PyPI](https://pypi.org/project/peppy/). diff --git a/docs/changelog.md b/docs/changelog.md index 4db79aac..2c675114 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,409 +2,33 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [0.40.2] -- 2024-05-28 -### Added -- added `sample_name` property to samples object. - -## [0.40.1] -- 2024-01-11 -### Fixed -- Initializing Project with `NaN` value instead of `None` in `from_pandas` method - - -## [0.40.0] -- 2023-12-18 - -**This version introduced backwards-incompatible changes.** - -### Changed -- Replaced `attmap` with `MutableMapping` (which removes some attributes) -- Replaced OrderedDict with dict -- Deprecated support for Python versions <= 3.7 - -_Due to the changes mentioned above, a few functionalities may be disabled. For example, the `name` and `description` project properties can no longer be accessed with `getitem`; use the `getattr` syntax instead_ - -### Added -- Constructor methods: `Project.from_dict`, `Project.from_pandas`, `Project.from_sample_yaml`, `Project.from_pep_config` - -## [0.35.7] -- 2023-07-19 -### Fixed -- incorrect setting of sample and subsample indexes using from_dict function (#452) -- clarified debug messages - -## [0.35.6] -- 2023-06-27 ### Added -- `orient` argument to `to_dict` method - -### Fixed -- The name of the raw subsample object to match the actual name (list). Commit: #442 - -### Changed -- Reduced the number of items returned in the to_dict(extended=True) method to 3, with the name and description now stored in the config key. - - -## [0.35.5] -- 2023-03-27 -### Fixed -- A [bug](https://github.com/pepkit/peppy/issues/435) with custom sample ids -- Improved performance for large tables dramatically - -## [0.35.4] -- 2023-01-17 -### Fixed -- Fixed disabling rich progress on small datasets bug -- Disabled progressbar if object variable `progressbar` is set False - -## [0.35.3] -- 2022-11-16 -### Fixed -- Returning `NaN` value in initialization project from pandas df - -## [0.35.2] -- 2022-09-13 -### Fixed -- Returning `NaN` value within `to_dict` method was fixed and method now returns `None` instead - -## [0.35.1] -- 2022-09-07 -### Changed -- Organization of test files. Separated unittests from smoketests. - -### Fixed -- The root cause of `np.nan` values showing up in Pandas dataframes. Replaced the values with None right after reading the database, which made it possible to remove all custom `np.nan` to `None` converters used later in the code. -- Typing in some methods. -- Code redundancy in fixtures in conftest. - -### Added -- New test cases with test data - -## [0.35.0] -- 2022-08-25 - -### Changed - -- Optimized converting Projects to and from dict. Now, `to_dict(extended=True)` returns only essential properties to save space and time. -- Small refactors. - -### Fixed - -- Initialization of `peppy.Project` from `pandas.DataFrame`. Now `from_pandas()` can receive sample table, subsample table and config file -- Multiple bugs introduced during initialization of the project with custom index column names - -### Added -- New test cases and test data - -## [0.34.0] -- 2022-08-17 - -### Changed - -- Way of initialization project from dictionary. Now it's possible as follows: `Project().from_dict()` ### Fixed -- Fix error that was raised when duplicated sample in `sample_table` had different read types (single-end mixed with paired-end). - -### Added -- Feature of initializing `peppy.Project` from `pandas.DataFrame` -## [0.33.0] -- 2022-07-25 -### Changed -- `pep_version` is no longer a required parameter to create a `peppy.Project` instance from a configuration file. ### Fixed -- Performance issues during sample parsing. Two list comprehensions were combined to speed up this functionality. -- `KeyError` is thrown when attempting to access the `pep_version` of a `peppy.Project` instance instatiated from a sample table (`csv`) - ### Added -- Implementation of `__eq__` for the `peppy.Project` class such that two instances of the class can be compared using python's equality operators (`==`, `!=`). -- New `from_dict` function that lets a user instatiate a new `peppy.Project` object using an in-memory representation of a PEP (a `dict`). This supports database storage of PEPs. -- New `extended` flag for the `to_dict` method on `peppy.Project` objects. This creates a **richer** dictionary representation of PEPs. -- Better sample parsing - -## [0.32.0] -- 2022-05-03 - -### Changed - -- Unify exceptions related to remote YAML file reading in `read_yaml` function. Now always a `RemoteYAMLError` is thrown. -- `Project` dict representation ### Added -- Support for PEP `2.1.0`, whichi includes support for no YAML configuration file component (CSV only), automatic sample merging if there are any duplicates in sample table index column, and new project attributes: `sample_table_index` and `subsample_table_index`. - -### Fixed - -- Project string representation; [Issue 368](https://github.com/pepkit/peppy/issues/368) - -## [0.31.2] -- 2021-11-04 -### Fixed -- Bug with setuptools 58 - -## [0.31.1] -- 2021-04-15 ### Added -- Support for remote URL config files - -### Fixed - -- Error when accessing `Project.subsample_table` property when no subsample tables were defined - -## [0.31.0] - 2020-10-07 ### Added -- `to_dict` method in `Sample` class that can include or exclude `Project` reference - -## [0.30.3] - 2020-09-22 - -### Changed - -- If there's just one `subsample_table` specified, `Project.subsample_table` property will return an object of `pandas.DataFrame` class rather than a `list` of ones - -### Fixed - -- `TypeError` when `subsample_table` is set to `null` - -## [0.30.2] - 2020-08-06 - ### Added -- Support for multiple subsample tables -- License file to the package source distribution - -## [0.30.1] - 2020-05-26 - -### Changed - -- Package authors list - -## [0.30.0] - 2020-05-26 - -**This version introduced backwards-incompatible changes.** - -### Added - -- attribute duplication functionality -- config importing functionality -- attribute removal functionality -- possibility to define multi-attribute rules in attribute implication - -### Changed - -- Project configuration file to follow [PEP2.0.0](http://pep.databio.org/en/2.0.0/specification/) specification. Browse the specification for changes related to config format -- Do not require `sample_name` attribute in the sample table - -## [0.22.3] - 2019-12-13 - -### Changed - -- Remove `is_command_callable` from `utils` module; instead, refer to [`ubiquerg`](https://pypi.org/project/ubiquerg/). -- It's now exceptional (rather than just a warning) for a sample table file to be missing a valid name column. - ### Fixed -- Empty columns in subsample tables are treated just as empty columns in sample tables (respective attributes are not included rather than populated with `nan`) - -## [0.22.2] - 2019-06-20 - -### Changed - -- Remove `ngstk` requirement. - -## [0.22.1] - 2019-06-19 - -### Changed - -- Prohibit storing reference to full `Project` object on a `Sample`. - -## [0.22.0] -- (2019-06-06) - -### Changed - -- Deprecate `Project` `constants` in favor of `constant_attributes.` -- Improved `Project` text representation for interactive/terminal display (`__repr__`): [Issue 296](https://github.com/pepkit/peppy/issues/296) - ### Fixed -- Properly use `constant_attributes` if present from subproject. [Issue 292](https://github.com/pepkit/peppy/issues/292) -- Fixed a bug with subproject activation paths -- Revert deprecation of `sample_name` to `name`; so `sample_name` is again approved. - -## [0.21.0] -- (2019-05-02) - -### Added - -- Support for Snakemake projects (particularly `SnakeProject`) -- Hook for `get_arg_string` on `Project` to omit some pipeline options/arguments from the returned argument string -- `sample_table` and `subsample_table` functions, providing a functional syntax for requesting the respective attribute values from a `Project` -- Hook on `merge_sample` for specifying name of subannotation column that stores name for each sample - -### Changed - -- Improved messaging: ["Unmatched regex-like"](https://github.com/pepkit/peppy/issues/223), ["Missing and/or empty attribute(s)"](https://github.com/pepkit/peppy/issues/282) -- On `Project`, `sheet` is deprecated in favor of `sample_table`. -- On `Project`, `sample_subannotation` is deprecated in favor of `subsample_table`. -- On `Sample`, reference to `sample_name` is deprecated in favor of simply `name`. - -## [0.20.0] -- (2019-04-17) - ### Added - -- `subsample_table` on a `Project` gives the table of sample subannotation / "units" if applicable. - -### Changed - -- Add `attribute` parameter to `fetch_samples` function to enable more general applicability. - Additionally, the attribute value matching is more strict now -- requires perfect match. -- Remove Python 3.4 support. -- Use `attmap` for implementation of attribute-style access into a key-value collection. -- Deprecate `sample_annotation` and `sample_subannotation` in favor of `sample_table` and `subsample_table`, respectively. - -## [0.19.0] -- (2019-01-16) - -### New - -- Added `activate_subproject` method to `Project`. - -### Changed - -- `Project` construction no longer requires sample annotations sheet. -- Specification of assembly/ies in project config outside of `implied_attributes` is deprecated. -- `implied_columns` and `derived_columns` are deprecated in favor of `implied_attributes` and `derived_attributes`. - -## [0.18.2] -- (2018-07-23) - -### Fixed - -- Made requirements more lenient to allow for newer versions of required packages. - -## [0.18.1] -- (2018-06-29) - -### Fixed - -- Fixed a bug that would cause sample attributes to lose order. -- Fixed a bug that caused an install error with newer `numexpr` versions. - -### New - -- Project names are now inferred with the `infer_name` function, which uses a priority lookup to infer the project name: First, the `name` attribute in the `yaml` file; otherwise, the containing folder unless it is `metadata`, in which case, it's the parent of that folder. -- Add `get_sample` and `get_samples` functions to `Project` objects. -- Add `get_subsamples`and `get_subsample` functions to both `Project` and `Sample` objects. -- Subsamples are now objects that can be retrieved individually by name, with the `subsample_name` as the index column header. - -## [0.17.2] -- (2018-04-03) - -## Fixed - -- Ensure data source path relativity is with respect to project config file's folder. - -## [0.17.1] -- (2017-12-21) - -### Changed - -- Version bump for first pypi release -- Fixed bug with packaging for pypi release - -## [0.9.0] -- (2017-12-21) - -### New - -- Separation completed, `peppy` package is now standalone -- `looper` can now rely on `peppy` - -### Changed - -- `merge_table` renamed to `sample_subannotation` -- setup changed for compatibility with PyPI - -## [0.8.1] -- (2017-11-16) - -### New - -- Separated from looper into its own python package (originally called `pep`) - -## [0.7.2] -- (2017-11-16) - -### Fixed - -- Correctly count successful command submissions when not using `--dry-run`. - -## [0.7.1] -- (2017-11-15) - -### Fixed - -- No longer falsely display that there's a submission failure. -- Allow non-string values to be unquoted in the `pipeline_args` section. - -## [0.7.0] -- (2017-11-15) - -### New - -- Add `--lump` and `--lumpn` options -- Catch submission errors from cluster resource managers -- Implied columns can now be derived -- Now protocols can be specified on the command-line `--include-protocols` -- Add rudimentary figure summaries -- Allow wildcard protocol_mapping for catch-all pipeline assignment -- New sample_subtypes section in pipeline_interface - -### Changed - -- Sample child classes are now defined explicitly in the pipeline interface. Previously, they were guessed based on presence of a class extending Sample in a pipeline script. -- Changed 'library' key sample attribute to 'protocol' -- Improve user messages -- Simplifies command-line help display - -## [0.6.0] -- (2017-07-21) - -### New - -- Add support for implied_column section of the project config file -- Add support for Python 3 -- Merges pipeline interface and protocol mappings. This means we now allow direct pointers to `pipeline_interface.yaml` files, increasing flexibility, so this relaxes the specified folder structure that was previously used for `pipelines_dir` (with `config` subfolder). -- Allow URLs as paths to sample sheets. -- Allow tsv format for sample sheets. -- Checks that the path to a pipeline actually exists before writing the submission script. - -### Changed - -- Changed LOOPERENV environment variable to PEPENV, generalizing it to generic models -- Changed name of `pipelines_dir` to `pipeline_interfaces` (but maintained backwards compatibility for now). -- Changed name of `run` column to `toggle`, since `run` can also refer to a sequencing run. -- Relaxes many constraints (like resources sections, pipelines_dir columns), making project configuration files useful outside looper. This moves us closer to dividing models from looper, and improves flexibility. -- Various small bug fixes and dev improvements. -- Require `setuptools` for installation, and `pandas 0.20.2`. If `numexpr` is installed, version `2.6.2` is required. -- Allows tilde in `pipeline_interfaces` - -## [0.5.0] -- (2017-03-01) - -### New - -- Add new looper version tracking, with `--version` and `-V` options and printing version at runtime -- Add support for asterisks in file paths -- Add support for multiple pipeline directories in priority order -- Revamp of messages make more intuitive output -- Colorize output -- Complete rehaul of logging and test infrastructure, using logging and pytest packages - -### Changed - -- Removes pipelines_dir requirement for models, making it useful outside looper -- Small bug fixes related to `all_input_files` and `required_input_files` attributes -- More robust installation and more explicit requirement of Python 2.7 - -## [0.4.0] -- (2017-01-12) - -### New - -- New command-line interface (CLI) based on sub-commands -- New subcommand (`looper summarize`) replacing the `summarizePipelineStats.R` script -- New subcommand (`looper check`) replacing the `flagCheck.sh` script -- New command (`looper destroy`) to remove all output of a project -- New command (`looper clean`) to remove intermediate files of a project flagged for deletion -- Support for portable and pipeline-independent allocation of computing resources with Looperenv. - -### Changed - -- Removed requirement to have `pipelines` repository installed in order to extend base Sample objects -- Maintenance of sample attributes as provided by user by means of reading them in as strings (to be improved further -- Improved serialization of Sample objects diff --git a/docs/templates/usage.template b/docs/templates/usage.template new file mode 100644 index 00000000..c7211be4 --- /dev/null +++ b/docs/templates/usage.template @@ -0,0 +1,6 @@ +# Usage reference + +pephubclient is a command line tool that can be used to interact with the PEPhub API. +It can be used to create, update, delete PEPs in the PEPhub database. + +Below are usage examples for the different commands that can be used with pephubclient. \ No newline at end of file diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 00000000..6fd05bdc --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,65 @@ +# Usage reference + +pephubclient is a command line tool that can be used to interact with the PEPhub API. +It can be used to create, update, delete PEPs in the PEPhub database. + +Below are usage examples for the different commands that can be used with pephubclient.## `phc --help` +```console + + Usage: pephubclient [OPTIONS] COMMAND [ARGS]... + +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ --version -v App version │ +│ --install-completion [bash|zsh|fish|powershell|pwsh] Install completion for the specified shell. [default: None] │ +│ --show-completion [bash|zsh|fish|powershell|pwsh] Show completion for the specified shell, to copy it or customize the installation. [default: None] │ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ login Login to PEPhub │ +│ logout Logout │ +│ pull Download and save project locally. │ +│ push Upload/update project in PEPhub │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + +``` + +## `phc pull --help` +```console + + Usage: pephubclient pull [OPTIONS] PROJECT_REGISTRY_PATH + + Download and save project locally. + +╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ * project_registry_path TEXT [default: None] [required] │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ --force --no-force Overwrite project if it exists. [default: no-force] │ +│ --zip --no-zip Save project as zip file. [default: no-zip] │ +│ --output TEXT Output directory. [default: None] │ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + +``` + +## `phc push --help` +```console + + Usage: pephubclient push [OPTIONS] CFG + + Upload/update project in PEPhub + +╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ * cfg TEXT Project config file (YAML) or sample table (CSV/TSV)with one row per sample to constitute project [default: None] [required] │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ * --namespace TEXT Project namespace [default: None] [required] │ +│ * --name TEXT Project name [default: None] [required] │ +│ --tag TEXT Project tag [default: None] │ +│ --force --no-force Force push to the database. Use it to update, or upload project. [default: no-force] │ +│ --is-private --no-is-private Upload project as private. [default: no-is-private] │ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + +``` + diff --git a/peppy/cli.py b/peppy/cli.py new file mode 100644 index 00000000..3ff8980c --- /dev/null +++ b/peppy/cli.py @@ -0,0 +1,33 @@ +import typer + +from ._version import __version__ +from .const import PKG_NAME +from .eido.cli import app as eido_app +from .pephubclient.cli import app as phc_app + + +def version_callback(value: bool): + if value: + typer.echo(f"{PKG_NAME} version: {__version__}") + raise typer.Exit() + + +app = typer.Typer(help=f"{PKG_NAME} - Portable Encapsulated Projects toolkit") + + +@app.callback() +def common( + ctx: typer.Context, + version: bool = typer.Option( + None, "--version", "-v", callback=version_callback, help="package version" + ), +): + pass + + +app.add_typer(phc_app, name="phc", help="Client for the PEPhub server") +app.add_typer(eido_app, name="eido", help="PEP validation, conversion, and inspection") + + +def main(): + app(prog_name=PKG_NAME) diff --git a/peppy/const.py b/peppy/const.py index 6295806f..a6dd02b7 100644 --- a/peppy/const.py +++ b/peppy/const.py @@ -123,3 +123,13 @@ SUBSAMPLE_RAW_LIST_KEY = "_subsample_list" __all__ = PROJECT_CONSTANTS + SAMPLE_CONSTANTS + OTHER_CONSTANTS + + +SCHEMA_SECTIONS = ["PROP_KEY", "TANGIBLE_KEY", "SIZING_KEY"] + +SCHEMA_VALIDAION_KEYS = [ + "MISSING_KEY", + "REQUIRED_INPUTS_KEY", + "ALL_INPUTS_KEY", + "INPUT_FILE_SIZE_KEY", +] diff --git a/peppy/eido/__init__.py b/peppy/eido/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/peppy/eido/__init__.py @@ -0,0 +1 @@ + diff --git a/peppy/eido/cli.py b/peppy/eido/cli.py new file mode 100644 index 00000000..0fc9dce1 --- /dev/null +++ b/peppy/eido/cli.py @@ -0,0 +1,355 @@ +import sys +from logging import CRITICAL, DEBUG, ERROR, INFO, WARN, Logger +from typing import Dict, List, Optional + +import typer +from logmuse import init_logger + +from ..const import PKG_NAME, SAMPLE_NAME_ATTR +from ..project import Project +from .const import CONVERT_CMD, INSPECT_CMD, LOGGING_LEVEL, SUBPARSER_MSGS, VALIDATE_CMD +from .conversion import ( + convert_project, + get_available_pep_filters, + pep_conversion_plugins, +) +from .exceptions import EidoFilterError, EidoValidationError +from .inspection import inspect_project +from .validation import validate_config, validate_project, validate_sample + +LEVEL_BY_VERBOSITY = [ERROR, CRITICAL, WARN, INFO, DEBUG] + +app = typer.Typer() + + +def _configure_logging( + verbosity: Optional[int], + logging_level: Optional[str], + dbg: bool, +) -> str: + """Mimic old verbosity / logging-level behavior.""" + if dbg: + level = logging_level or DEBUG + elif verbosity is not None: + # Verbosity-framed specification trumps logging_level. + level = LEVEL_BY_VERBOSITY[verbosity] + else: + level = LOGGING_LEVEL + return level + + +def _parse_filter_args_str(input: Optional[List[str]]) -> Dict[str, str]: + """ + Parse user input specification. + + :param Iterable[Iterable[str]] input: user command line input, + formatted as follows: [[arg=txt, arg1=txt]] + :return dict: mapping of keys, which are input names and values + """ + lst = [] + for i in input or []: + lst.extend(i) + return ( + {x.split("=")[0]: x.split("=")[1] for x in lst if "=" in x} + if lst is not None + else lst + ) + + +def print_error_summary( + errors_by_type: Dict[str, List[Dict[str, str]]], _LOGGER: Logger +): + """Print a summary of errors, organized by error type""" + n_error_types = len(errors_by_type) + _LOGGER.error(f"Found {n_error_types} types of error:") + for err_type, items in errors_by_type.items(): + n = len(items) + msg = f" - {err_type}: ({n} samples) " + if n < 50: + msg += ", ".join(x["sample_name"] for x in items) + _LOGGER.error(msg) + + if len(errors_by_type) > 1: + final_msg = f"Validation unsuccessful. {len(errors_by_type)} error types found." + else: + final_msg = f"Validation unsuccessful. {len(errors_by_type)} error type found." + + _LOGGER.error(final_msg) + + +@app.callback() +def common( + ctx: typer.Context, + verbosity: Optional[int] = typer.Option( + None, + "--verbosity", + min=0, + max=len(LEVEL_BY_VERBOSITY) - 1, + help=f"Choose level of verbosity (default: {None})", + ), + logging_level: Optional[str] = typer.Option( + None, + "--logging-level", + help="logging level", + ), + dbg: bool = typer.Option( + False, + "--dbg", + help=f"Turn on debug mode (default: {False})", + ), +): + ctx.obj = { + "verbosity": verbosity, + "logging_level": logging_level, + "dbg": dbg, + } + + logger_level = _configure_logging(verbosity, logging_level, dbg) + logger_kwargs = {"level": logger_level, "devmode": dbg} + + global _LOGGER + _LOGGER = init_logger(name=PKG_NAME, **logger_kwargs) + + +@app.command(name=CONVERT_CMD, help=SUBPARSER_MSGS[CONVERT_CMD]) +def convert( + ctx: typer.Context, + pep: Optional[str] = typer.Argument( + None, + metavar="PEP", + help="Path to a PEP configuration file in yaml format.", + ), + st_index: Optional[str] = typer.Option( + None, "--st-index", help="Sample table index to use" + ), + sst_index: Optional[str] = typer.Option( + None, "--sst-index", help="Subsample table index to use" + ), + amendments: Optional[List[str]] = typer.Option( + None, + "--amendments", + help="Names of the amendments to activate.", + ), + format_: str = typer.Option( + "yaml", + "-f", + "--format", + help="Output format (name of filter; use -l to see available).", + ), + sample_name: Optional[List[str]] = typer.Option( + None, + "-n", + "--sample-name", + help="Name of the samples to inspect.", + ), + args: Optional[List[str]] = typer.Option( + None, + "-a", + "--args", + help=( + "Provide arguments to the filter function " "(e.g. arg1=val1 arg2=val2)." + ), + ), + list_filters: bool = typer.Option( + False, + "-l", + "--list", + help="List available filters.", + ), + describe: bool = typer.Option( + False, + "-d", + "--describe", + help="Show description for a given filter.", + ), + paths_: Optional[List[str]] = typer.Option( + None, + "-p", + "--paths", + help="Paths to dump conversion result as key=value pairs.", + ), +): + filters = get_available_pep_filters() + if list_filters: + _LOGGER.info("Available filters:") + if len(filters) < 1: + _LOGGER.info("No available filters") + for filter_name in filters: + _LOGGER.info(f" - {filter_name}") + sys.exit(0) + if describe: + if format_ not in filters: + raise EidoFilterError( + f"'{format_}' filter not found. Available filters: {', '.join(filters)}" + ) + filter_functions_by_name = pep_conversion_plugins() + print(filter_functions_by_name[format_].__doc__) + sys.exit(0) + if pep is None: + typer.echo(ctx.get_help(), err=True) + _LOGGER.info("The following arguments are required: PEP") + sys.exit(1) + + if paths_: + paths = {y[0]: y[1] for y in [x.split("=") for x in paths_]} + else: + paths = None + + p = Project( + pep, + sample_table_index=st_index, + subsample_table_index=sst_index, + amendments=amendments, + ) + + plugin_kwargs = _parse_filter_args_str(args) + + # append paths + plugin_kwargs["paths"] = paths + + convert_project(p, format_, plugin_kwargs) + _LOGGER.info("Conversion successful") + sys.exit(0) + + +@app.command(name=VALIDATE_CMD, help=SUBPARSER_MSGS[VALIDATE_CMD]) +def validate( + pep: str = typer.Argument( + None, + metavar="PEP", + help="Path to a PEP configuration file in yaml format.", + ), + schema: str = typer.Option( + None, + "-s", + "--schema", + metavar="S", + help="Path to a PEP schema file in yaml format.", + ), + st_index: Optional[str] = typer.Option( + None, + "--st-index", + help=( + f"Sample table index to use; samples are identified by " + f"'{SAMPLE_NAME_ATTR}' by default." + ), + ), + sst_index: Optional[str] = typer.Option( + None, + "--sst-index", + help=( + f"Subsample table index to use; samples are identified by " + f"'{SAMPLE_NAME_ATTR}' by default." + ), + ), + amendments: Optional[List[str]] = typer.Option( + None, + "--amendments", + help="Names of the amendments to activate.", + ), + sample_name: Optional[str] = typer.Option( + None, + "-n", + "--sample-name", + metavar="S", + help=( + "Name or index of the sample to validate. " + "Only this sample will be validated." + ), + ), + just_config: bool = typer.Option( + False, + "-c", + "--just-config", + help="Whether samples should be excluded from the validation.", + ), +): + if sample_name and just_config: + raise typer.BadParameter( + "Use only one of --sample-name or --just-config for 'validate'." + ) + p = Project( + pep, + sample_table_index=st_index, + subsample_table_index=sst_index, + amendments=amendments, + ) + if sample_name: + try: + sample_name = int(sample_name) + except ValueError: + pass + _LOGGER.debug( + f"Comparing Sample ('{pep}') in Project ('{pep}') " + f"against a schema: {schema}" + ) + validator = validate_sample + arguments = [p, sample_name, schema] + elif just_config: + _LOGGER.debug(f"Comparing Project ('{pep}') against a schema: {schema}") + + validator = validate_config + arguments = [p, schema] + else: + _LOGGER.debug(f"Comparing Project ('{pep}') against a schema: {schema}") + + validator = validate_project + arguments = [p, schema] + try: + validator(*arguments) + except EidoValidationError as e: + print_error_summary(e.errors_by_type, _LOGGER) + sys.exit(1) + _LOGGER.info("Validation successful") + sys.exit(0) + + +@app.command(name=INSPECT_CMD, help=SUBPARSER_MSGS[INSPECT_CMD]) +def inspect( + pep: str = typer.Argument( + None, + metavar="PEP", + help="Path to a PEP configuration file in yaml format.", + ), + st_index: Optional[str] = typer.Option( + None, + "--st-index", + help=( + f"Sample table index to use; samples are identified by " + f"'{SAMPLE_NAME_ATTR}' by default." + ), + ), + sst_index: Optional[str] = typer.Option( + None, + "--sst-index", + help=( + f"Subsample table index to use; samples are identified by " + f"'{SAMPLE_NAME_ATTR}' by default." + ), + ), + amendments: Optional[List[str]] = typer.Option( + None, + "--amendments", + help="Names of the amendments to activate.", + ), + sample_name: Optional[List[str]] = typer.Option( + None, + "-n", + "--sample-name", + metavar="SN", + help="Name of the samples to inspect.", + ), + attr_limit: int = typer.Option( + 10, + "-l", + "--attr-limit", + help="Number of sample attributes to display.", + ), +): + p = Project( + pep, + sample_table_index=st_index, + subsample_table_index=sst_index, + amendments=amendments, + ) + inspect_project(p, sample_name, attr_limit) diff --git a/peppy/eido/const.py b/peppy/eido/const.py new file mode 100644 index 00000000..9f3f1919 --- /dev/null +++ b/peppy/eido/const.py @@ -0,0 +1,50 @@ +""" +Constant variables for eido package +""" + +LOGGING_LEVEL = "INFO" +PKG_NAME = "eido" +INSPECT_CMD = "inspect" +VALIDATE_CMD = "validate" +CONVERT_CMD = "convert" +FILTERS_CMD = "filters" +SUBPARSER_MSGS = { + VALIDATE_CMD: "Validate a PEP or its components", + INSPECT_CMD: "Inspect a PEP", + CONVERT_CMD: "Convert PEP format using filters", +} +PROP_KEY = "properties" + +SAMPLES_KEY = "samples" + +TANGIBLE_KEY = "tangible" +SIZING_KEY = "sizing" + +# sample schema input validation key names, these values are required by looper +# to refer to the dict values +MISSING_KEY = "missing" +REQUIRED_INPUTS_KEY = "required_inputs" +ALL_INPUTS_KEY = "all_inputs" +INPUT_FILE_SIZE_KEY = "input_file_size" + +# groups of constants +GENERAL = [ + "LOGGING_LEVEL", + "PKG_NAME", + "INSPECT_CMD", + "VALIDATE_CMD", + "CONVERT_CMD", + "FILTERS_CMD", + "SUBPARSER_MSGS", +] + +SCHEMA_SECTIONS = ["PROP_KEY", "TANGIBLE_KEY", "SIZING_KEY"] + +SCHEMA_VALIDATION_KEYS = [ + "MISSING_KEY", + "REQUIRED_INPUTS_KEY", + "ALL_INPUTS_KEY", + "INPUT_FILE_SIZE_KEY", +] + +__all__ = GENERAL + SCHEMA_SECTIONS + SCHEMA_VALIDATION_KEYS diff --git a/peppy/eido/conversion.py b/peppy/eido/conversion.py new file mode 100644 index 00000000..817ccfd5 --- /dev/null +++ b/peppy/eido/conversion.py @@ -0,0 +1,142 @@ +import sys + +if sys.version_info < (3, 10): + from importlib_metadata import entry_points +else: + from importlib.metadata import entry_points + +import inspect +import os +from logging import getLogger +from typing import Callable, Dict, List, NoReturn, Optional + +from ..project import Project +from .exceptions import EidoFilterError + +_LOGGER = getLogger(__name__) + + +def pep_conversion_plugins() -> Dict[str, Callable]: + """Plugins registered by entry points in the current Python env. + + Returns: + Dict which keys are names of all possible hooks and values are dicts + mapping registered functions names to their values + + Raises: + EidoFilterError: If any of the filters has an invalid signature + """ + plugins = {} + for ep in entry_points(group="pep.filters"): + plugin_fun = ep.load() + if len(list(inspect.signature(plugin_fun).parameters)) != 2: + raise EidoFilterError( + f"Invalid filter plugin signature: {ep.name}. " + f"Filter functions must take 2 arguments: peppy.Project and **kwargs" + ) + plugins[ep.name] = plugin_fun + return plugins + + +def convert_project( + prj: Project, target_format: str, plugin_kwargs: Optional[Dict] = None +) -> Dict[str, str]: + """Convert a `peppy.Project` object to a selected format. + + Args: + prj: A Project object to convert + target_format: The format to convert the Project object to + plugin_kwargs: Kwargs to pass to the plugin function + + Returns: + Dictionary with conversion results + + Raises: + EidoFilterError: If the requested filter is not defined + """ + return run_filter(prj, target_format, plugin_kwargs=plugin_kwargs or dict()) + + +def run_filter( + prj: Project, + filter_name: str, + verbose: bool = True, + plugin_kwargs: Optional[Dict] = None, +) -> Dict[str, str]: + """Run a selected filter on a peppy.Project object. + + Args: + prj: A Project to run filter on + filter_name: Name of the filter to run + verbose: Whether to print output to stdout + plugin_kwargs: Kwargs to pass to the plugin function + + Returns: + Dictionary with conversion results + + Raises: + EidoFilterError: If the requested filter is not defined + """ + # convert to empty dictionary if no plugin_kwargs are passed + plugin_kwargs = plugin_kwargs or dict() + + # get necessary objects + installed_plugins = pep_conversion_plugins() + installed_plugin_names = list(installed_plugins.keys()) + paths = plugin_kwargs.get("paths") + env = plugin_kwargs.get("env") + + # set environment + if env is not None: + for var in env: + os.environ[var] = env[var] + + # check for valid filter + if filter_name not in installed_plugin_names: + raise EidoFilterError( + f"Requested filter ({filter_name}) not found. " + f"Available: {', '.join(installed_plugin_names)}" + ) + _LOGGER.info(f"Running plugin {filter_name}") + func = installed_plugins[filter_name] + + # run filter + conv_result = func(prj, **plugin_kwargs) + + # if paths supplied, write to disk + if paths is not None: + # map conversion result to the + # specified path + for result_key in conv_result: + result_path = paths.get(result_key) + if result_path is None: + _LOGGER.warning( + f"Conversion plugin returned key that doesn't exist in specified paths: '{result_key}'." + ) + else: + # create path if it doesn't exist + if not os.path.exists(result_path) and os.path.isdir( + os.path.dirname(result_path) + ): + os.makedirs(os.path.dirname(result_path), exist_ok=True) + save_result(result_path, conv_result[result_key]) + + if verbose: + for result_key in conv_result: + sys.stdout.write(conv_result[result_key]) + + return conv_result + + +def save_result(result_path: str, content: str) -> NoReturn: + with open(result_path, "w") as f: + f.write(content) + + +def get_available_pep_filters() -> List[str]: + """Get a list of available target formats. + + Returns: + A list of available formats + """ + return list(pep_conversion_plugins().keys()) diff --git a/peppy/eido/conversion_plugins.py b/peppy/eido/conversion_plugins.py new file mode 100644 index 00000000..e4405543 --- /dev/null +++ b/peppy/eido/conversion_plugins.py @@ -0,0 +1,85 @@ +"""built-in PEP filters""" + +from typing import Dict + +from .output_formatters import MultilineOutputFormatter + + +def basic_pep_filter(p, **kwargs) -> Dict[str, str]: + """ + Basic PEP filter, that does not convert the Project object. + + This filter can save the PEP representation to file, if kwargs include `path`. + + :param peppy.Project p: a Project to run filter on + """ + return {"project": str(p)} + + +def yaml_samples_pep_filter(p, **kwargs) -> Dict[str, str]: + """ + YAML samples PEP filter, that returns only Sample object representations. + + This filter can save the YAML to file, if kwargs include `path`. + + :param peppy.Project p: a Project to run filter on + """ + from yaml import dump + + samples_yaml = [] + for s in p.samples: + samples_yaml.append(s.to_dict()) + + return {"samples": dump(samples_yaml, default_flow_style=False)} + + +def yaml_pep_filter(p, **kwargs) -> Dict[str, str]: + """ + YAML PEP filter, that returns Project object representation. + + This filter can save the YAML to file, if kwargs include `path`. + + :param peppy.Project p: a Project to run filter on + """ + from yaml import dump + + return {"project": dump(p.config, default_flow_style=False)} + + +def csv_pep_filter(p, **kwargs) -> Dict[str, str]: + """ + CSV PEP filter, that returns Sample object representations + + This filter can save the CSVs to files, if kwargs include + `sample_table_path` and/or `subsample_table_path`. + + :param peppy.Project p: a Project to run filter on + """ + return {"samples": MultilineOutputFormatter.format(p.samples)} + + +def processed_pep_filter(p, **kwargs) -> Dict[str, str]: + """ + Processed PEP filter, that returns the converted sample and subsample tables. + This filter can return the tables as a table or a document. + :param peppy.Project p: a Project to run filter on + :param bool samples_as_objects: Flag to write as a table + :param bool subsamples_as_objects: Flag to write as a table + """ + # get params + samples_as_objects = kwargs.get("samples_as_objects") + subsamples_as_objects = kwargs.get("subsamples_as_objects") + + prj_repr = p.config + + return { + "project": str(prj_repr), + "samples": ( + str(p.samples) if samples_as_objects else str(p.sample_table.to_csv()) + ), + "subsamples": ( + str(p.subsamples) + if subsamples_as_objects + else str(p.subsample_table.to_csv()) + ), + } diff --git a/peppy/eido/exceptions.py b/peppy/eido/exceptions.py new file mode 100644 index 00000000..ac9a9c9e --- /dev/null +++ b/peppy/eido/exceptions.py @@ -0,0 +1,50 @@ +"""Exceptions for specific eido issues.""" + +from abc import ABCMeta +from typing import Dict, List + +__all__ = [ + "EidoFilterError", + "EidoSchemaInvalidError", + "EidoValidationError", + "PathAttrNotFoundError", +] + + +class EidoException(Exception): + """Base type for custom package errors.""" + + __metaclass__ = ABCMeta + + +class PathAttrNotFoundError(EidoException): + """Path-like argument does not exist.""" + + def __init__(self, key): + super(PathAttrNotFoundError, self).__init__(key) + + +class EidoSchemaInvalidError(EidoException): + """Schema does not comply to eido-specific requirements.""" + + def __init__(self, key): + super(EidoSchemaInvalidError, self).__init__(key) + + +class EidoFilterError(EidoException): + """Issue with the PEP filter.""" + + def __init__(self, key): + super(EidoFilterError, self).__init__(key) + + +class EidoValidationError(EidoException): + """Object was not validated successfully according to schema.""" + + def __init__(self, message: str, errors_by_type: Dict[str, List[Dict[str, str]]]): + super().__init__(message) + self.errors_by_type = errors_by_type + self.message = message + + def __str__(self): + return f"EidoValidationError ({self.message}): {self.errors_by_type}" diff --git a/peppy/eido/inspection.py b/peppy/eido/inspection.py new file mode 100644 index 00000000..ecaf8fdc --- /dev/null +++ b/peppy/eido/inspection.py @@ -0,0 +1,124 @@ +import os +from logging import getLogger +from typing import Dict, Iterable, List, Set, Union +from warnings import catch_warnings + +from ubiquerg import size + +from .. import Project, Sample +from .const import ( + ALL_INPUTS_KEY, + INPUT_FILE_SIZE_KEY, + MISSING_KEY, + PROP_KEY, + REQUIRED_INPUTS_KEY, + SAMPLES_KEY, + SIZING_KEY, + TANGIBLE_KEY, +) +from .schema import read_schema +from .validation import _get_attr_values, _validate_sample_object + +_LOGGER = getLogger(__name__) + + +def inspect_project( + p: Project, sample_names: Union[None, List[str]] = None, max_attr: int = 10 +) -> None: + """Print inspection info: Project or, if sample_names argument is provided, matched samples. + + Args: + p: Project to inspect + sample_names: List of samples to inspect + max_attr: Max number of sample attributes to display + """ + if sample_names: + samples = p.get_samples(sample_names) + if not samples: + print("No samples matched by names: {}".format(sample_names)) + return + for s in samples: + print(s.__str__(max_attr=max_attr)) + print("\n") + return + print(p) + return + + +def get_input_files_size( + sample: Sample, schema: Union[str, List[Dict]] +) -> Dict[str, Union[List[str], Set[str], float]]: + """Determine which of this Sample's required attributes/files are missing and calculate sizes. + + The names of the attributes that are required and/or deemed as inputs + are sourced from the schema, more specifically from required_input_attrs + and input_attrs sections in samples section. Note, this function does + perform actual Sample object validation with jsonschema. + + Args: + sample: Sample to investigate + schema: Schema dict to validate against or a path to one + + Returns: + Dictionary with validation data, i.e missing, required_inputs, + all_inputs, input_file_size + + Raises: + ValidationError: If any required sample attribute is missing + """ + + def _compute_input_file_size(inputs: Iterable[str]) -> float: + """ + Compute total size of input files. + """ + with catch_warnings(record=True) as w: + total_bytes = sum( + size(f, size_str=False) or 0.0 + for f in inputs + if f != "" and f is not None + ) + if w: + _LOGGER.warning( + f"{len(w)} input files missing, job input size was " + f"not calculated accurately" + ) + return total_bytes / (1024**3) + + if isinstance(schema, str): + schema = read_schema(schema) + + # first, validate attrs existence using jsonschema + _validate_sample_object(schemas=schema, sample=sample) + + all_inputs = set() + required_inputs = set() + schema = schema[-1] # use only first schema, in case there are imports + sample_schema_dict = schema[PROP_KEY][SAMPLES_KEY]["items"] + if SIZING_KEY in sample_schema_dict: + all_inputs.update(_get_attr_values(sample, sample_schema_dict[SIZING_KEY])) + if TANGIBLE_KEY in sample_schema_dict: + required_inputs = set( + _get_attr_values(sample, sample_schema_dict[TANGIBLE_KEY]) + ) + all_inputs.update(required_inputs) + with catch_warnings(record=True) as w: + # input_file_size = sum( + # [ + # size(f, size_str=False) or 0.0 + # for f in all_inputs + # if f != "" and f is not None + # ] + # ) / (1024**3) + input_file_size = _compute_input_file_size(all_inputs) + if w: + _LOGGER.warning( + f"{len(w)} input files missing, job input size was " + f"not calculated accurately" + ) + + return { + MISSING_KEY: [i for i in required_inputs if not os.path.exists(i)], + REQUIRED_INPUTS_KEY: required_inputs, + ALL_INPUTS_KEY: all_inputs, + INPUT_FILE_SIZE_KEY: input_file_size, + } diff --git a/peppy/eido/output_formatters.py b/peppy/eido/output_formatters.py new file mode 100644 index 00000000..ad5e6f84 --- /dev/null +++ b/peppy/eido/output_formatters.py @@ -0,0 +1,126 @@ +from abc import ABC, abstractmethod +from typing import Iterable, List, Union + +from ..sample import Sample + + +class BaseOutputFormatter(ABC): + @staticmethod + @abstractmethod + def format(samples: List[Sample]) -> str: + """ + Convert the samples to correct format. + """ + pass + + +class MultilineOutputFormatter(BaseOutputFormatter): + @staticmethod + def format(samples: List[Sample]) -> str: + output_rows = [] + sample_attributes = [ + attribute + for attribute in samples[0].keys() + if not attribute.startswith("_") and not attribute == "subsample_name" + ] + header = MultilineOutputFormatter._get_header(sample_attributes) + + for sample in samples: + attribute_with_multiple_properties = MultilineOutputFormatter._get_the_name_of_the_first_attribute_with_multiple_properties( + sample, sample_attributes + ) + if attribute_with_multiple_properties: + sample_rows = MultilineOutputFormatter._split_sample_to_multiple_rows( + sample, sample_attributes, attribute_with_multiple_properties + ) + output_rows.extend(sample_rows) + else: + one_sample_row = MultilineOutputFormatter._convert_sample_to_row( + sample, sample_attributes + ) + output_rows.append(one_sample_row) + + return "\n".join(header + output_rows) + "\n" + + @staticmethod + def _get_header(header_column_names: List[str]) -> List[str]: + return [",".join(header_column_names)] + + @staticmethod + def _get_the_name_of_the_first_attribute_with_multiple_properties( + sample: Sample, sample_attributes: List[str] + ) -> Union[str, None]: + for attribute in sample_attributes: + if MultilineOutputFormatter._sample_attribute_is_list(sample, attribute): + return attribute + + @staticmethod + def _split_sample_to_multiple_rows( + sample: Sample, sample_attributes: List, attribute_with_multiple_properties: str + ) -> Iterable[str]: + """ + If one sample object contains array properties instead of single value, then it will be converted + to multiple rows. + + Args: + sample: Sample from project. + sample_attributes: List of all sample properties names (name of columns from sample_table). + + Returns: + List of rows created from given sample object. + """ + number_of_samples_after_split = len( + getattr(sample, attribute_with_multiple_properties) + ) + sample_rows_after_split = [] + + for sample_index in range(number_of_samples_after_split): + sample_row = MultilineOutputFormatter._convert_sample_to_row( + sample, sample_attributes, sample_index + ) + sample_rows_after_split.append(sample_row) + + return sample_rows_after_split + + @staticmethod + def _convert_sample_to_row( + sample: Sample, sample_attributes: List, sample_index: int = 0 + ) -> str: + """ + Converts single sample object to CSV row. + + Some samples have a list of values instead of single value for given attribute (column), and + sample_index indicates index of the value that will be used to create a row. For samples that don't + have any attributes with given names this will always be zero. + + Args: + sample: Single sample object. + sample_attributes: Array of all attributes names (column names) for given sample. + sample_index: Number indicating which value will be used to create row. Some samples + + Returns: + Representation of sample as a CSV row. + """ + sample_row = [] + + for attribute in sample_attributes: + if ( + MultilineOutputFormatter._sample_attribute_is_list(sample, attribute) + and sample[attribute] + ): + value = sample[attribute][sample_index] + else: + value = sample.get(attribute) + + sample_row.append(value or "") + + return ",".join(sample_row) + + @staticmethod + def _sample_attribute_is_list(sample: Sample, attribute: str) -> bool: + return isinstance(getattr(sample, attribute, ""), list) + + +class SampleSubsampleOutputFormatter(BaseOutputFormatter): + def format(self, samples: List[Sample]) -> str: + pass diff --git a/peppy/eido/schema.py b/peppy/eido/schema.py new file mode 100644 index 00000000..851ddd29 --- /dev/null +++ b/peppy/eido/schema.py @@ -0,0 +1,95 @@ +import os +from logging import getLogger +from typing import Dict, List, Union + +from ubiquerg import is_url + +from ..utils import load_yaml +from .const import PROP_KEY, SAMPLES_KEY + +_LOGGER = getLogger(__name__) + + +def preprocess_schema(schema_dict: Dict) -> Dict: + """Preprocess schema before validation for user's convenience. + + Preprocessing includes: + - renaming 'samples' to '_samples' since in the peppy.Project object + _samples attribute holds the list of peppy.Samples objects. + - adding array of strings entry for every string specified to accommodate + subsamples in peppy.Project + + Args: + schema_dict: Schema dictionary to preprocess + + Returns: + Preprocessed schema + """ + _LOGGER.debug(f"schema ori: {schema_dict}") + if "project" not in schema_dict[PROP_KEY]: + _LOGGER.debug("No project section found in schema") + + if SAMPLES_KEY in schema_dict[PROP_KEY]: + if ( + "items" in schema_dict[PROP_KEY][SAMPLES_KEY] + and PROP_KEY in schema_dict[PROP_KEY][SAMPLES_KEY]["items"] + ): + s_props = schema_dict[PROP_KEY][SAMPLES_KEY]["items"][PROP_KEY] + for prop, val in s_props.items(): + if "type" in val and val["type"] in ["string", "number", "boolean"]: + s_props[prop] = {} + s_props[prop]["anyOf"] = [val, {"type": "array", "items": val}] + else: + _LOGGER.debug("No samples section found in schema") + _LOGGER.debug(f"schema processed: {schema_dict}") + return schema_dict + + +def read_schema(schema: Union[str, Dict]) -> List[Dict]: + """Safely read schema from YAML-formatted file. + + If the schema imports any other schemas, they will be read recursively. + + Args: + schema: Path to the schema file or schema in a dict form + + Returns: + Read schemas + + Raises: + TypeError: If the schema arg is neither a Mapping nor a file path or + if the 'imports' sections in any of the schemas is not a list + """ + + def _recursively_read_schemas( + x: Dict, lst: List[Dict], parent_folder: Union[str, None] + ) -> List[Dict]: + if "imports" in x: + if isinstance(x["imports"], list): + for sch in x["imports"]: + if (not is_url(sch)) and (not os.path.isabs(sch)): + # resolve relative path + if parent_folder is not None: + sch = os.path.normpath(os.path.join(parent_folder, sch)) + else: + _LOGGER.warning( + f"The schema contains relative path without known parent folder: {sch}" + ) + lst.extend(read_schema(sch)) + else: + raise TypeError("In schema the 'imports' section has to be a list") + lst.append(x) + return lst + + schema_list = [] + schema_folder = None + if isinstance(schema, str): + _LOGGER.debug(f"Reading schema: {schema}") + schema_folder = os.path.split(schema)[0] + schema = load_yaml(schema) + if not isinstance(schema, dict): + raise TypeError( + f"schema has to be a dict, path to an existing file or URL to a remote one. " + f"Got: {type(schema)}" + ) + return _recursively_read_schemas(schema, schema_list, schema_folder) diff --git a/peppy/eido/validation.py b/peppy/eido/validation.py new file mode 100644 index 00000000..cadef703 --- /dev/null +++ b/peppy/eido/validation.py @@ -0,0 +1,280 @@ +import os +from copy import deepcopy as dpcpy +from logging import getLogger +from typing import Dict, List, Mapping, NoReturn, Union +from warnings import warn + +import pandas as pd +from jsonschema import Draft7Validator +from pandas.core.common import flatten + +from ..project import Project +from ..sample import Sample +from ..utils import load_yaml +from .const import PROP_KEY, SAMPLES_KEY, SIZING_KEY, TANGIBLE_KEY +from .exceptions import EidoValidationError, PathAttrNotFoundError +from .schema import preprocess_schema, read_schema + +_LOGGER = getLogger(__name__) + + +def _validate_object( + obj: Mapping, + schema: Union[str, dict], + sample_name_colname: Union[str, bool] = False, +) -> None: + """Generic function to validate object against a schema. + + Args: + obj: An object to validate + schema: Schema dict to validate against or a path to one + sample_name_colname: Column name for sample names in error reporting + + Raises: + EidoValidationError: If validation is unsuccessful + """ + validator = Draft7Validator(schema) + _LOGGER.debug(f"{obj},\n {schema}") + if not validator.is_valid(obj): + errors = sorted(validator.iter_errors(obj), key=lambda e: e.path) + errors_by_type = {} + + # Accumulate and restructure error objects by error type + for error in errors: + if not error.message in errors_by_type: + errors_by_type[error.message] = [] + + try: + instance_name = error.instance[sample_name_colname] + except KeyError: + instance_name = "project" + except TypeError: + instance_name = obj["samples"][error.absolute_path[1]][ + sample_name_colname + ] + errors_by_type[error.message].append( + { + "type": error.message, + "message": f"{error.message} on instance {instance_name}", + "sample_name": instance_name, + } + ) + + raise EidoValidationError("Validation failed", errors_by_type) + else: + _LOGGER.debug("Validation was successful...") + + +def validate_project(project: Project, schema: Union[str, dict]) -> NoReturn: + """Validate a project object against a schema. + + Args: + project: A project object to validate + schema: Schema dict to validate against or a path to one + + Raises: + EidoValidationError: If validation is unsuccessful + """ + sample_name_colname = project.sample_name_colname + schema_dicts = read_schema(schema=schema) + for schema_dict in schema_dicts: + project_dict = project.to_dict() + _validate_object( + project_dict, preprocess_schema(schema_dict), sample_name_colname + ) + _LOGGER.debug("Project validation successful") + + +def _validate_sample_object(sample: Sample, schemas: List[Dict]) -> None: + """Validate a peppy.Sample object without requiring a reference to peppy.Project. + + Args: + sample: A sample object to validate + schemas: List of schemas to validate against or a path to one + """ + for schema_dict in schemas: + schema_dict = preprocess_schema(schema_dict) + sample_schema_dict = schema_dict[PROP_KEY][SAMPLES_KEY]["items"] + _validate_object(sample.to_dict(), sample_schema_dict) + _LOGGER.debug( + f"{getattr(sample, 'sample_name', '')} sample validation successful" + ) + + +def validate_sample( + project: Project, sample_name: Union[str, int], schema: Union[str, dict] +) -> NoReturn: + """Validate the selected sample object against a schema. + + Args: + project: A project object to validate + sample_name: Name or index of the sample to validate + schema: Schema dict to validate against or a path to one + + Raises: + EidoValidationError: If validation is unsuccessful + """ + sample = ( + project.samples[sample_name] + if isinstance(sample_name, int) + else project.get_sample(sample_name) + ) + _validate_sample_object( + sample=sample, + schemas=read_schema(schema=schema), + ) + + +def validate_config( + project: Union[Project, dict, str], schema: Union[str, dict] +) -> NoReturn: + """Validate the config part of the Project object against a schema. + + Args: + project: A project object, dict, or path to config file to validate + schema: Schema dict to validate against or a path to one + """ + schema_dicts = read_schema(schema=schema) + for schema_dict in schema_dicts: + schema_cpy = preprocess_schema(dpcpy(schema_dict)) + try: + del schema_cpy[PROP_KEY][SAMPLES_KEY] + except KeyError: + # Schema doesn't have samples key, which is fine for config-only validation + pass + if "required" in schema_cpy: + try: + schema_cpy["required"].remove(SAMPLES_KEY) + except ValueError: + # SAMPLES_KEY is not in required list, no action needed + pass + if isinstance(project, dict): + _validate_object({"project": project}, schema_cpy) + + elif isinstance(project, str): + try: + project_dict = load_yaml(project) + except (FileNotFoundError, IOError, OSError) as e: + raise ValueError( + f"Please provide a valid yaml config of PEP project; invalid config path: {project}" + ) from e + _validate_object({"project": project_dict}, schema_cpy) + else: + project_dict = project.to_dict() + _validate_object(project_dict, schema_cpy) + _LOGGER.debug("Config validation successful") + + +def _get_attr_values( + obj: Mapping, attrlist: Union[str, List[str]] +) -> Union[None, List[str]]: + """Get value corresponding to each given attribute. + + Args: + obj: An object to get the attributes from + attrlist: Names of attributes to retrieve values for + + Returns: + Value corresponding to each named attribute; None if this Sample's + value for the attribute is empty/null, or if this Sample lacks the + indicated attribute + """ + # If attribute is None, then value is also None. + if not attrlist: + return None + if not isinstance(attrlist, list): + attrlist = [attrlist] + # Strings contained here are appended later so shouldn't be null. + return list(flatten([getattr(obj, attr, "") for attr in attrlist])) + + +def validate_input_files( + project: Project, + schemas: Union[str, dict], + sample_name: Union[str, int, None] = None, +) -> None: + """Determine which of the required and optional files are missing. + + The names of the attributes that are required and/or deemed as inputs + are sourced from the schema, more specifically from `required_files` + and `files` sections in samples section: + + - If any of the required files are missing, this function raises an error. + - If any of the optional files are missing, the function raises a warning. + + Note, this function also performs Sample object validation with jsonschema. + + Args: + project: Project that defines the samples to validate + schemas: Schema dict to validate against or a path to one + sample_name: Name or index of the sample to validate. If None, + validate all samples in the project + + Raises: + PathAttrNotFoundError: If any required sample attribute is missing + """ + + if sample_name is None: + samples = project.samples + else: + samples = ( + project.samples[sample_name] + if isinstance(sample_name, int) + else project.get_sample(sample_name) + ) + samples = [samples] + + if isinstance(schemas, str): + schemas = read_schema(schemas) + + for sample in samples: + # validate attrs existence first + _validate_sample_object(schemas=schemas, sample=sample) + + all_inputs = set() + required_inputs = set() + schema = schemas[-1] # use only first schema, in case there are imports + sample_schema_dict = schema[PROP_KEY][SAMPLES_KEY]["items"] + if SIZING_KEY in sample_schema_dict: + all_inputs.update(_get_attr_values(sample, sample_schema_dict[SIZING_KEY])) + if TANGIBLE_KEY in sample_schema_dict: + required_inputs = set( + _get_attr_values(sample, sample_schema_dict[TANGIBLE_KEY]) + ) + all_inputs.update(required_inputs) + + missing_required_inputs = [i for i in required_inputs if not os.path.exists(i)] + missing_inputs = [i for i in all_inputs if not os.path.exists(i)] + if missing_inputs: + warn( + f"For sample '{getattr(sample, project.sample_table_index)}'. " + f"Optional inputs not found: {missing_inputs}" + ) + if missing_required_inputs: + raise PathAttrNotFoundError( + f"For sample '{getattr(sample, project.sample_table_index)}'. " + f"Required inputs not found: {required_inputs}" + ) + + +def validate_original_samples( + samples: Union[str, pd.DataFrame], schema: Union[str, dict] +) -> None: + """Validate the original samples from the csv table against a schema. + + Args: + samples: The path to the sample table csv or the dataframe from the table + schema: Schema dict to validate against or a path to one + + Raises: + EidoValidationError: If validation is unsuccessful + """ + if isinstance(samples, str): + samples = pd.read_csv(samples) + + assist_project = Project.from_pandas(samples) + for s in assist_project.samples: + _validate_sample_object( + sample=s, + schemas=read_schema(schema=schema), + ) diff --git a/peppy/exceptions.py b/peppy/exceptions.py index ec356f49..d5fd3459 100644 --- a/peppy/exceptions.py +++ b/peppy/exceptions.py @@ -2,6 +2,7 @@ from abc import ABCMeta from collections.abc import Iterable +from typing import Optional __all__ = [ "IllegalStateException", @@ -19,7 +20,7 @@ class PeppyError(Exception): __metaclass__ = ABCMeta - def __init__(self, msg): + def __init__(self, msg: str) -> None: super(PeppyError, self).__init__(msg) @@ -50,12 +51,12 @@ class RemoteYAMLError(PeppyError): class MissingAmendmentError(PeppyError): """Error when project config lacks a requested subproject.""" - def __init__(self, amendment, defined=None): - """ - Create exception with missing amendment request. + def __init__(self, amendment: str, defined: Optional[Iterable[str]] = None) -> None: + """Create exception with missing amendment request. - :param str amendment: the requested (and missing) amendment - :param Iterable[str] defined: collection of names of defined amendment + Args: + amendment: The requested (and missing) amendment + defined: Collection of names of defined amendments """ msg = "Amendment '{}' not found".format(amendment) if isinstance(defined, Iterable): diff --git a/peppy/parsers.py b/peppy/parsers.py index 961d6e17..7c357eb8 100644 --- a/peppy/parsers.py +++ b/peppy/parsers.py @@ -88,6 +88,7 @@ def parse(self) -> pd.DataFrame: Parse the sample table """ self.validate_path() + print(f"Path: {self.path}") self._table = pd.read_csv(self.path, **self._pandas_kwargs) self._table = self._table.where(pd.notnull(self._table), None) return self.table diff --git a/peppy/pephubclient/__init__.py b/peppy/pephubclient/__init__.py new file mode 100644 index 00000000..48c42e3f --- /dev/null +++ b/peppy/pephubclient/__init__.py @@ -0,0 +1 @@ +__author__ = "Oleksandr Khoroshevskyi, Rafal Stepien" diff --git a/peppy/pephubclient/cli.py b/peppy/pephubclient/cli.py new file mode 100644 index 00000000..20c90813 --- /dev/null +++ b/peppy/pephubclient/cli.py @@ -0,0 +1,73 @@ +import typer + +from .helpers import call_client_func +from .pephubclient import PEPHubClient + +_client = PEPHubClient() + +app = typer.Typer() + + +@app.command() +def login(): + """ + Login to PEPhub + """ + call_client_func(_client.login) + + +@app.command() +def logout(): + """ + Logout + """ + _client.logout() + + +@app.command() +def pull( + project_registry_path: str, + force: bool = typer.Option(False, help="Overwrite project if it exists."), + zip: bool = typer.Option(False, help="Save project as zip file."), + output: str = typer.Option(None, help="Output directory."), +): + """ + Download and save project locally. + """ + call_client_func( + _client.pull, + project_registry_path=project_registry_path, + force=force, + output=output, + zip=zip, + ) + + +@app.command() +def push( + cfg: str = typer.Argument( + ..., + help="Project config file (YAML) or sample table (CSV/TSV)" + "with one row per sample to constitute project", + ), + namespace: str = typer.Option(..., help="Project namespace"), + name: str = typer.Option(..., help="Project name"), + tag: str = typer.Option(None, help="Project tag"), + force: bool = typer.Option( + False, help="Force push to the database. Use it to update, or upload project." + ), + is_private: bool = typer.Option(False, help="Upload project as private."), +): + """ + Upload/update project in PEPhub + """ + + call_client_func( + _client.push, + cfg=cfg, + namespace=namespace, + name=name, + tag=tag, + is_private=is_private, + force=force, + ) diff --git a/peppy/pephubclient/constants.py b/peppy/pephubclient/constants.py new file mode 100644 index 00000000..075fc83f --- /dev/null +++ b/peppy/pephubclient/constants.py @@ -0,0 +1,48 @@ +import os +from enum import Enum +from typing import Optional + +from pydantic import BaseModel, field_validator + +PEPHUB_BASE_URL = os.getenv( + "PEPHUB_BASE_URL", default="https://pephub-api.databio.org/" +) +# PEPHUB_BASE_URL = "http://0.0.0.0:8000/" +PEPHUB_PEP_API_BASE_URL = f"{PEPHUB_BASE_URL}api/v1/projects/" +PEPHUB_PEP_SEARCH_URL = f"{PEPHUB_BASE_URL}api/v1/namespaces/{{namespace}}/projects" +PEPHUB_PUSH_URL = f"{PEPHUB_BASE_URL}api/v1/namespaces/{{namespace}}/projects/json" + +PEPHUB_SAMPLE_URL = f"{PEPHUB_BASE_URL}api/v1/projects/{{namespace}}/{{project}}/samples/{{sample_name}}" +PEPHUB_VIEW_URL = ( + f"{PEPHUB_BASE_URL}api/v1/projects/{{namespace}}/{{project}}/views/{{view_name}}" +) +PEPHUB_VIEW_SAMPLE_URL = f"{PEPHUB_BASE_URL}api/v1/projects/{{namespace}}/{{project}}/views/{{view_name}}/{{sample_name}}" + + +class RegistryPath(BaseModel): + protocol: Optional[str] = None + namespace: str + item: str + subitem: Optional[str] = None + tag: Optional[str] = "default" + + @field_validator("tag") + def tag_should_not_be_none(cls, v): + return v or "default" + + +class ResponseStatusCodes(int, Enum): + OK = 200 + ACCEPTED = 202 + UNAUTHORIZED = 401 + FORBIDDEN = 403 + NOT_EXIST = 404 + CONFLICT = 409 + INTERNAL_ERROR = 500 + + +USER_DATA_FILE_NAME = "jwt.txt" +HOME_PATH = os.getenv("HOME") +if not HOME_PATH: + HOME_PATH = os.path.expanduser("~") +PATH_TO_FILE_WITH_JWT = os.path.join(HOME_PATH, ".pephubclient/") + USER_DATA_FILE_NAME diff --git a/peppy/pephubclient/exceptions.py b/peppy/pephubclient/exceptions.py new file mode 100644 index 00000000..bb8787f4 --- /dev/null +++ b/peppy/pephubclient/exceptions.py @@ -0,0 +1,33 @@ +from typing import Optional + + +class BasePephubclientException(Exception): + def __init__(self, message: str): + super().__init__(message) + + +class IncorrectQueryStringError(BasePephubclientException): + def __init__(self, query_string: Optional[str] = None): + self.query_string = query_string + super().__init__( + f"PEP data with passed namespace and project ({self.query_string}) name not found." + ) + + +class ResponseError(BasePephubclientException): + default_message = "The response looks incorrect and must be verified manually." + + def __init__(self, message: Optional[str] = None): + self.message = message + super().__init__(self.message or self.default_message) + + +class PEPExistsError(BasePephubclientException): + default_message = ( + "PEP already exists. Change location, delete previous PEP, or set force argument " + "to overwrite previous PEP" + ) + + def __init__(self, message: Optional[str] = None): + self.message = message + super().__init__(self.message or self.default_message) diff --git a/peppy/pephubclient/files_manager.py b/peppy/pephubclient/files_manager.py new file mode 100644 index 00000000..9cb65d28 --- /dev/null +++ b/peppy/pephubclient/files_manager.py @@ -0,0 +1,95 @@ +import os +import zipfile +from contextlib import suppress +from pathlib import Path + +import pandas +import yaml + +from .exceptions import PEPExistsError + + +class FilesManager: + @staticmethod + def save_jwt_data_to_file(path: str, jwt_data: str) -> None: + """ + Save jwt to provided path + """ + Path(os.path.dirname(path)).mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + f.write(jwt_data) + + @staticmethod + def load_jwt_data_from_file(path: str) -> str: + """ + Open the file with username and ID and load this data. + """ + with suppress(FileNotFoundError): + with open(path, "r") as f: + return f.read() + + @staticmethod + def create_project_folder( + parent_path: str, + folder_name: str, + ) -> str: + """ + Create new project folder + + :param parent_path: parent path to create folder in + :param folder_name: folder name + :return: folder_path + """ + if parent_path: + if not Path(parent_path).exists(): + raise OSError( + f"Parent path does not exist. Provided path: {parent_path}" + ) + folder_path = os.path.join(parent_path or os.getcwd(), folder_name) + Path(folder_path).mkdir(parents=True, exist_ok=True) + return folder_path + + @staticmethod + def save_yaml(config: dict, full_path: str, not_force: bool = False): + FilesManager.check_writable(path=full_path, force=not not_force) + with open(full_path, "w") as outfile: + yaml.dump(config, outfile, default_flow_style=False) + + @staticmethod + def save_pandas(df: pandas.DataFrame, full_path: str, not_force: bool = False): + FilesManager.check_writable(path=full_path, force=not not_force) + df.to_csv(full_path, index=False) + + @staticmethod + def file_exists(full_path: str) -> bool: + return os.path.isfile(full_path) + + @staticmethod + def delete_file_if_exists(filename: str) -> None: + with suppress(FileNotFoundError): + os.remove(filename) + print( + f"\033[38;5;11m{f'File was deleted successfully -> {filename}'}\033[0m" + ) + + @staticmethod + def check_writable(path: str, force: bool = True): + if not force and os.path.isfile(path): + raise PEPExistsError(f"File already exists and won't be updated: {path}") + + @staticmethod + def save_zip_file(files_dict: dict, file_path: str, force: bool = False) -> None: + """ + Save zip file with provided files as dict. + + :param files_dict: dict with files to save. e.g. {"file1.txt": "file1 content"} + :param file_path: filename to save zip file to + :param force: overwrite file if exists + :return: None + """ + FilesManager.check_writable(path=file_path, force=force) + with zipfile.ZipFile( + file_path, mode="w", compression=zipfile.ZIP_DEFLATED + ) as zf: + for name, res in files_dict.items(): + zf.writestr(name, str.encode(res)) diff --git a/peppy/pephubclient/helpers.py b/peppy/pephubclient/helpers.py new file mode 100644 index 00000000..e7119ac4 --- /dev/null +++ b/peppy/pephubclient/helpers.py @@ -0,0 +1,323 @@ +import json +import os +from typing import Any, Callable, Optional, Union +from urllib.parse import urlencode + +import pandas as pd +import requests +import yaml +from pydantic import ValidationError +from requests.exceptions import ConnectionError +from ubiquerg import parse_registry_path + +from ..const import ( + CFG_SAMPLE_TABLE_KEY, + CFG_SUBSAMPLE_TABLE_KEY, + CONFIG_KEY, + DESC_KEY, + NAME_KEY, + SAMPLE_RAW_DICT_KEY, + SUBSAMPLE_RAW_LIST_KEY, +) +from ..project import Project +from .constants import RegistryPath +from .exceptions import PEPExistsError, ResponseError +from .files_manager import FilesManager +from .models import ProjectDict + + +class RequestManager: + @staticmethod + def send_request( + method: str, + url: str, + headers: Optional[dict] = None, + cookies: Optional[dict] = None, + params: Optional[dict] = None, + json: Optional[Union[dict, list]] = None, + ) -> requests.Response: + request_return = requests.request( + method=method, + url=url, + verify=False, + cookies=cookies, + headers=headers, + params=params, + json=json, + timeout=10, + ) + if request_return.status_code == 401: + if ( + RequestManager.decode_response(request_return, output_json=True).get( + "detail" + ) + == "JWT has expired" + ): + raise ResponseError("JWT has expired. Please log in again.") + return request_return + + @staticmethod + def decode_response( + response: requests.Response, encoding: str = "utf-8", output_json: bool = False + ) -> Union[str, dict]: + """ + Decode the response from PEPhub and pack the returned data into appropriate model. + + :param response: Response from PEPhub. + :param encoding: Response encoding [Default: utf-8] + :param output_json: If True, return response in json format + :return: Response data as an instance of correct model. + """ + + try: + if output_json: + return response.json() + else: + return response.content.decode(encoding) + except json.JSONDecodeError as err: + raise ResponseError(f"Error in response encoding format: {err}") + + @staticmethod + def parse_query_param(pep_variables: dict) -> str: + """ + Grab all the variables passed by user (if any) and parse them to match the format specified + by PEPhub API for query parameters. + + :param pep_variables: dict of query parameters + :return: PEPHubClient variables transformed into string in correct format. + """ + return "?" + urlencode(pep_variables) + + @staticmethod + def parse_header(jwt_data: Optional[str] = None) -> dict: + """ + Create Authorization header + + :param jwt_data: jwt string + :return: Authorization dict + """ + if jwt_data: + return {"Authorization": jwt_data} + else: + return {} + + +class MessageHandler: + """ + Class holding print function in different colors + """ + + RED = 9 + YELLOW = 11 + GREEN = 40 + + @staticmethod + def print_error(text: str) -> None: + print(f"\033[38;5;9m{text}\033[0m") + + @staticmethod + def print_success(text: str) -> None: + print(f"\033[38;5;40m{text}\033[0m") + + @staticmethod + def print_warning(text: str) -> None: + print(f"\033[38;5;11m{text}\033[0m") + + +def call_client_func(func: Callable[..., Any], **kwargs) -> Any: + """ + Catch exceptions in functions called through cli. + + :param func: The function to call. + :param kwargs: The keyword arguments to pass to the function. + :return: The result of the function call. + """ + + try: + func(**kwargs) + except ConnectionError as err: + MessageHandler.print_error(f"Failed to connect to server. Try later. {err}") + except ResponseError as err: + MessageHandler.print_error(f"{err}") + except PEPExistsError as err: + MessageHandler.print_warning(f"PEP already exists. {err}") + except OSError as err: + MessageHandler.print_error(f"{err}") + + +def is_registry_path(input_string: str) -> bool: + """ + Check if input is a registry path to pephub + :param str input_string: path to the PEP (or registry path) + :return bool: True if input is a registry path + """ + if input_string.endswith(".yaml"): + return False + try: + RegistryPath(**parse_registry_path(input_string)) + except (ValidationError, TypeError): + return False + return True + + +def unwrap_registry_path(input_string: str) -> RegistryPath: + """ + Unwrap registry path from string + :param str input_string: path to the PEP (or registry path) + :return RegistryPath: RegistryPath object + """ + return RegistryPath(**parse_registry_path(input_string)) + + +def _build_filename(registry_path: RegistryPath) -> str: + """ + Takes query string and creates output filename to save the project to. + + :param registry_path: Query string that was used to find the project. + :return: Filename uniquely identifying the project. + """ + filename = "_".join(filter(bool, [registry_path.namespace, registry_path.item])) + if registry_path.tag: + filename += f"_{registry_path.tag}" + return filename + + +def _save_zip_pep(project: dict, zip_filepath: str, force: bool = False) -> None: + """ + Zip and save a project + + :param project: peppy project to zip + :param zip_filepath: path to save zip file + :param force: overwrite project if exists + """ + + content_to_zip = {} + config = project[CONFIG_KEY] + project_name = config[NAME_KEY] + + if project[SAMPLE_RAW_DICT_KEY] is not None: + config[CFG_SAMPLE_TABLE_KEY] = ["sample_table.csv"] + content_to_zip["sample_table.csv"] = pd.DataFrame( + project[SAMPLE_RAW_DICT_KEY] + ).to_csv(index=False) + + if project[SUBSAMPLE_RAW_LIST_KEY] is not None: + if not isinstance(project[SUBSAMPLE_RAW_LIST_KEY], list): + config[CFG_SUBSAMPLE_TABLE_KEY] = ["subsample_table1.csv"] + content_to_zip["subsample_table1.csv"] = pd.DataFrame( + project[SUBSAMPLE_RAW_LIST_KEY] + ).to_csv(index=False) + else: + config[CFG_SUBSAMPLE_TABLE_KEY] = [] + for number, file in enumerate(project[SUBSAMPLE_RAW_LIST_KEY]): + file_name = f"subsample_table{number + 1}.csv" + config[CFG_SUBSAMPLE_TABLE_KEY].append(file_name) + content_to_zip[file_name] = pd.DataFrame(file).to_csv(index=False) + + content_to_zip[f"{project_name}_config.yaml"] = yaml.dump(config, indent=4) + FilesManager.save_zip_file(content_to_zip, file_path=zip_filepath, force=force) + + MessageHandler.print_success(f"Project was saved successfully -> {zip_filepath}") + return None + + +def _save_unzipped_pep( + project_dict: dict, folder_path: str, force: bool = False +) -> None: + """ + Save unzipped project to specified folder + + :param project_dict: raw pep project + :param folder_path: path to save project + :param force: overwrite project if exists + :return: None + """ + + def full_path(fn: str) -> str: + return os.path.join(folder_path, fn) + + project_name = project_dict[CONFIG_KEY][NAME_KEY] + sample_table_filename = "sample_table.csv" + yaml_full_path = full_path(f"{project_name}_config.yaml") + sample_full_path = full_path(sample_table_filename) + if not force: + extant = [p for p in [yaml_full_path, sample_full_path] if os.path.isfile(p)] + if extant: + raise PEPExistsError(f"{len(extant)} file(s) exist(s): {', '.join(extant)}") + + config_dict = project_dict.get(CONFIG_KEY) + config_dict[NAME_KEY] = project_name + config_dict[DESC_KEY] = project_dict[CONFIG_KEY][DESC_KEY] + config_dict["sample_table"] = sample_table_filename + + sample_pandas = pd.DataFrame(project_dict.get(SAMPLE_RAW_DICT_KEY, {})) + + subsample_list = [ + pd.DataFrame(sub_a) for sub_a in project_dict.get(SUBSAMPLE_RAW_LIST_KEY) or [] + ] + + filenames = [] + for idx, subsample in enumerate(subsample_list): + fn = f"subsample_table{idx + 1}.csv" + filenames.append(fn) + FilesManager.save_pandas(subsample, full_path(fn), not_force=False) + config_dict["subsample_table"] = filenames + + FilesManager.save_yaml(config_dict, yaml_full_path, not_force=False) + FilesManager.save_pandas(sample_pandas, sample_full_path, not_force=False) + + if config_dict.get("subsample_table"): + for number, subsample in enumerate(subsample_list): + FilesManager.save_pandas( + subsample, + os.path.join(folder_path, config_dict["subsample_table"][number]), + not_force=False, + ) + + MessageHandler.print_success(f"Project was saved successfully -> {folder_path}") + return None + + +def save_pep( + project: Union[dict, Project], + reg_path: str = None, + force: bool = False, + project_path: Optional[str] = None, + zip: bool = False, +) -> None: + """ + Save project locally. + + :param dict project: PEP dictionary (raw project) + :param str reg_path: Project registry path in PEPhub (e.g. databio/base:default). If not provided, + folder will be created with just project name. + :param bool force: overwrite project if exists + :param str project_path: Path where project will be saved. By default, it will be saved in current directory. + :param bool zip: If True, save project as zip file + :return: None + """ + if isinstance(project, Project): + project = project.to_dict(extended=True, orient="records") + + project = ProjectDict(**project).model_dump(by_alias=True) + + if not project_path: + project_path = os.getcwd() + + if reg_path: + file_name = _build_filename(RegistryPath(**parse_registry_path(reg_path))) + else: + file_name = project[CONFIG_KEY][NAME_KEY] + + if zip: + _save_zip_pep( + project, + zip_filepath=f"{os.path.join(project_path, file_name)}.zip", + force=force, + ) + return None + + folder_path = FilesManager.create_project_folder( + parent_path=project_path, folder_name=file_name + ) + _save_unzipped_pep(project, folder_path, force=force) diff --git a/peppy/pephubclient/models.py b/peppy/pephubclient/models.py new file mode 100644 index 00000000..e69ef92e --- /dev/null +++ b/peppy/pephubclient/models.py @@ -0,0 +1,56 @@ +import datetime +from typing import List, Optional, Union + +from pydantic import BaseModel, ConfigDict, Field, field_validator + +from ..const import CONFIG_KEY, SAMPLE_RAW_DICT_KEY, SUBSAMPLE_RAW_LIST_KEY + + +class ProjectDict(BaseModel): + """ + Project dict (raw) model + """ + + config: dict = Field(alias=CONFIG_KEY) + subsample_list: Optional[list] = Field(alias=SUBSAMPLE_RAW_LIST_KEY) + sample_list: list = Field(alias=SAMPLE_RAW_DICT_KEY) + + model_config = ConfigDict(populate_by_name=True, extra="allow") + + +class ProjectUploadData(BaseModel): + """ + Model used in post request to upload project + """ + + pep_dict: ProjectDict + tag: Optional[str] = "default" + is_private: Optional[bool] = False + overwrite: Optional[bool] = False + + @field_validator("tag") + def tag_should_not_be_none(cls, v): + return v or "default" + + +class ProjectAnnotationModel(BaseModel): + namespace: str + name: str + tag: str + is_private: bool + number_of_samples: int + description: str + last_update_date: datetime.datetime + submission_date: datetime.datetime + digest: str + pep_schema: Union[str, int, None] = None + pop: bool = False + stars_number: Optional[int] = 0 + forked_from: Optional[Union[str, None]] = None + + +class SearchReturnModel(BaseModel): + count: int + limit: int + offset: int + results: List[ProjectAnnotationModel] diff --git a/tests/__init__.py b/peppy/pephubclient/modules/__init__.py similarity index 100% rename from tests/__init__.py rename to peppy/pephubclient/modules/__init__.py diff --git a/peppy/pephubclient/modules/sample.py b/peppy/pephubclient/modules/sample.py new file mode 100644 index 00000000..3199063a --- /dev/null +++ b/peppy/pephubclient/modules/sample.py @@ -0,0 +1,208 @@ +import logging + +from ..constants import PEPHUB_SAMPLE_URL, ResponseStatusCodes +from ..exceptions import ResponseError +from ..helpers import RequestManager + +_LOGGER = logging.getLogger("pephubclient") + + +class PEPHubSample(RequestManager): + """ + Class for managing samples in PEPhub and provides methods for + getting, creating, updating and removing samples. + This class is not related to peppy.Sample class. + """ + + def __init__(self, jwt_data: str = None): + """ + :param jwt_data: jwt token for authorization + """ + + self.__jwt_data = jwt_data + + def get( + self, + namespace: str, + name: str, + tag: str, + sample_name: str = None, + ) -> dict: + """ + Get sample from project in PEPhub. + + :param namespace: namespace of project + :param name: name of project + :param tag: tag of project + :param sample_name: sample name + :return: Sample object + """ + url = self._build_sample_request_url( + namespace=namespace, name=name, sample_name=sample_name + ) + + url = url + self.parse_query_param(pep_variables={"tag": tag}) + + response = self.send_request( + method="GET", url=url, headers=self.parse_header(self.__jwt_data) + ) + if response.status_code == ResponseStatusCodes.OK: + return self.decode_response(response, output_json=True) + if response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError( + f"Sample does not exist. Project: '{namespace}/{name}:{tag}'. Sample_name: '{sample_name}'" + ) + elif response.status_code == ResponseStatusCodes.INTERNAL_ERROR: + raise ResponseError("Internal server error. Unexpected return value.") + else: + raise ResponseError( + f"Unexpected return value. Error: {response.status_code}" + ) + + def create( + self, + namespace: str, + name: str, + tag: str, + sample_name: str, + sample_dict: dict, + overwrite: bool = False, + ) -> None: + """ + Create sample in project in PEPhub. + + :param namespace: namespace of project + :param name: name of project + :param tag: tag of project + :param sample_dict: sample dict + :param sample_name: sample name + :param overwrite: overwrite sample if it exists + :return: None + """ + url = self._build_sample_request_url( + namespace=namespace, + name=name, + sample_name=sample_name, + ) + + url = url + self.parse_query_param( + pep_variables={"tag": tag, "overwrite": overwrite} + ) + + # add sample name to sample_dict if it is not there + if sample_name not in sample_dict.values(): + sample_dict["sample_name"] = sample_name + + response = self.send_request( + method="POST", + url=url, + headers=self.parse_header(self.__jwt_data), + json=sample_dict, + ) + if response.status_code == ResponseStatusCodes.ACCEPTED: + _LOGGER.info( + f"Sample '{sample_name}' added to project '{namespace}/{name}:{tag}' successfully." + ) + return None + elif response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError(f"Project '{namespace}/{name}:{tag}' does not exist.") + elif response.status_code == ResponseStatusCodes.CONFLICT: + raise ResponseError( + f"Sample '{sample_name}' already exists. Set overwrite to True to overwrite sample." + ) + else: + raise ResponseError( + f"Unexpected return value. Error: {response.status_code}" + ) + + def update( + self, + namespace: str, + name: str, + tag: str, + sample_name: str, + sample_dict: dict, + ): + """ + Update sample in project in PEPhub. + + :param namespace: namespace of project + :param name: name of project + :param tag: tag of project + :param sample_name: sample name + :param sample_dict: sample dict, that contain elements to update, or + :return: None + """ + + url = self._build_sample_request_url( + namespace=namespace, name=name, sample_name=sample_name + ) + + url = url + self.parse_query_param(pep_variables={"tag": tag}) + + response = self.send_request( + method="PATCH", + url=url, + headers=self.parse_header(self.__jwt_data), + json=sample_dict, + ) + if response.status_code == ResponseStatusCodes.ACCEPTED: + _LOGGER.info( + f"Sample '{sample_name}' updated in project '{namespace}/{name}:{tag}' successfully." + ) + return None + elif response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError( + f"Sample '{sample_name}' or project {namespace}/{name}:{tag} does not exist. Error: {response.status_code}" + ) + else: + raise ResponseError( + f"Unexpected return value. Error: {response.status_code}" + ) + + def remove(self, namespace: str, name: str, tag: str, sample_name: str): + """ + Remove sample from project in PEPhub. + + :param namespace: namespace of project + :param name: name of project + :param tag: tag of project + :param sample_name: sample name + :return: None + """ + url = self._build_sample_request_url( + namespace=namespace, name=name, sample_name=sample_name + ) + + url = url + self.parse_query_param(pep_variables={"tag": tag}) + + response = self.send_request( + method="DELETE", + url=url, + headers=self.parse_header(self.__jwt_data), + ) + if response.status_code == ResponseStatusCodes.ACCEPTED: + _LOGGER.info( + f"Sample '{sample_name}' removed from project '{namespace}/{name}:{tag}' successfully." + ) + return None + elif response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError( + f"Sample '{sample_name}' or project {namespace}/{name}:{tag} does not exist. Error: {response.status_code}" + ) + else: + raise ResponseError( + f"Unexpected return value. Error: {response.status_code}" + ) + + @staticmethod + def _build_sample_request_url(namespace: str, name: str, sample_name: str) -> str: + """ + Build url for sample request. + + :param namespace: namespace where project will be uploaded + :return: url string + """ + return PEPHUB_SAMPLE_URL.format( + namespace=namespace, project=name, sample_name=sample_name + ) diff --git a/peppy/pephubclient/modules/view.py b/peppy/pephubclient/modules/view.py new file mode 100644 index 00000000..d45de3b9 --- /dev/null +++ b/peppy/pephubclient/modules/view.py @@ -0,0 +1,269 @@ +import logging +from typing import Union + +# import peppy +from ...project import Project +from ..constants import PEPHUB_VIEW_SAMPLE_URL, PEPHUB_VIEW_URL, ResponseStatusCodes +from ..exceptions import ResponseError +from ..helpers import RequestManager +from ..models import ProjectDict + +_LOGGER = logging.getLogger("pephubclient") + + +class PEPHubView(RequestManager): + """ + Class for managing views in PEPhub and provides methods for + getting, creating, updating and removing views. + + This class aims to warp the Views API for easier maintenance and + better user experience. + """ + + def __init__(self, jwt_data: str = None): + """ + :param jwt_data: jwt token for authorization + """ + + self.__jwt_data = jwt_data + + def get( + self, namespace: str, name: str, tag: str, view_name: str, raw: bool = False + ) -> Union[Project, dict]: + """ + Get view from project in PEPhub. + + :param namespace: namespace of project + :param name: name of project + :param tag: tag of project + :param view_name: name of the view + :param raw: if True, return raw response + :return: peppy.Project object or dictionary of the project (view) + """ + url = self._build_view_request_url( + namespace=namespace, name=name, view_name=view_name + ) + + url = url + self.parse_query_param(pep_variables={"tag": tag}) + + response = self.send_request( + method="GET", url=url, headers=self.parse_header(self.__jwt_data) + ) + if response.status_code == ResponseStatusCodes.OK: + output = self.decode_response(response, output_json=True) + if raw: + return output + output = ProjectDict(**output).model_dump(by_alias=True) + return Project.from_dict(output) + elif response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError("View does not exist, or you are unauthorized.") + else: + raise ResponseError( + f"Internal server error. Unexpected return value. Error: {response.status_code}" + ) + + def create( + self, + namespace: str, + name: str, + tag: str, + view_name: str, + description: str = None, + sample_list: list = None, + no_fail: bool = False, + ): + """ + Create view in project in PEPhub. + + :param namespace: namespace of project + :param name: name of project + :param tag: tag of project + :param description: description of the view + :param view_name: name of the view + :param sample_list: list of sample names + :param no_fail: whether to raise an error if view was not added to the project + """ + + if not sample_list or not isinstance(sample_list, list): + raise ValueError("Sample list must be a list of sample names.") + + url = self._build_view_request_url( + namespace=namespace, name=name, view_name=view_name + ) + + url = url + self.parse_query_param(pep_variables={"tag": tag}) + + response = self.send_request( + method="POST", + url=url, + headers=self.parse_header(self.__jwt_data), + params={"description": description, "no_fail": no_fail}, + json=sample_list, + ) + if response.status_code == ResponseStatusCodes.ACCEPTED: + _LOGGER.info( + f"View '{view_name}' created in project '{namespace}/{name}:{tag}' successfully." + ) + return None + elif response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError( + f"Project '{namespace}/{name}:{tag}' or one of the samples does not exist." + ) + elif response.status_code == ResponseStatusCodes.CONFLICT: + raise ResponseError(f"View '{view_name}' already exists in the project.") + else: + raise ResponseError(f"Unexpected return value.{response.status_code}") + + def delete(self, namespace: str, name: str, tag: str, view_name: str) -> None: + """ + Delete view from project in PEPhub. + + :param namespace: namespace of project + :param name: name of project + :param tag: tag of project + :param view_name: name of the view + :return: None + """ + url = self._build_view_request_url( + namespace=namespace, name=name, view_name=view_name + ) + + url = url + self.parse_query_param(pep_variables={"tag": tag}) + + response = self.send_request( + method="DELETE", url=url, headers=self.parse_header(self.__jwt_data) + ) + + if response.status_code == ResponseStatusCodes.ACCEPTED: + _LOGGER.info( + f"View '{view_name}' deleted from project '{namespace}/{name}:{tag}' successfully." + ) + return None + elif response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError("View does not exists, or you are unauthorized.") + elif response.status_code == ResponseStatusCodes.UNAUTHORIZED: + raise ResponseError("You are unauthorized to delete this view.") + else: + raise ResponseError("Unexpected return value. ") + + def add_sample( + self, + namespace: str, + name: str, + tag: str, + view_name: str, + sample_name: str, + ): + """ + Add sample to view in project in PEPhub. + + :param namespace: namespace of project + :param name: name of project + :param tag: tag of project + :param view_name: name of the view + :param sample_name: name of the sample + """ + url = self._build_view_request_url( + namespace=namespace, + name=name, + view_name=view_name, + sample_name=sample_name, + ) + + url = url + self.parse_query_param(pep_variables={"tag": tag}) + + response = self.send_request( + method="POST", + url=url, + headers=self.parse_header(self.__jwt_data), + ) + if response.status_code == ResponseStatusCodes.ACCEPTED: + _LOGGER.info( + f"Sample '{sample_name}' added to view '{view_name}' in project '{namespace}/{name}:{tag}' successfully." + ) + return None + elif response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError( + f"Sample '{sample_name}' or project {namespace}/{name}:{tag} does not exist." + ) + elif response.status_code == ResponseStatusCodes.CONFLICT: + raise ResponseError(f"Sample '{sample_name}' already exists in the view.") + else: + raise ResponseError( + f"Unexpected return value. Error: {response.status_code}" + ) + + def remove_sample( + self, + namespace: str, + name: str, + tag: str, + view_name: str, + sample_name: str, + ): + """ + Remove sample from view in project in PEPhub. + + :param namespace: namespace of project + :param name: name of project + :param tag: tag of project + :param view_name: name of the view + :param sample_name: name of the sample + :return: None + """ + url = self._build_view_request_url( + namespace=namespace, + name=name, + view_name=view_name, + sample_name=sample_name, + ) + + url = url + self.parse_query_param(pep_variables={"tag": tag}) + + response = self.send_request( + method="DELETE", + url=url, + headers=self.parse_header(self.__jwt_data), + ) + if response.status_code == ResponseStatusCodes.ACCEPTED: + _LOGGER.info( + f"Sample '{sample_name}' removed from view '{view_name}' in project '{namespace}/{name}:{tag}' successfully." + ) + return None + elif response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError( + f"Sample '{sample_name}' or project {namespace}/{name}:{tag} does not exist. " + ) + elif response.status_code == ResponseStatusCodes.UNAUTHORIZED: + raise ResponseError( + "You are unauthorized to remove this sample from the view." + ) + else: + raise ResponseError( + f"Unexpected return value. Error: {response.status_code}" + ) + + @staticmethod + def _build_view_request_url( + namespace: str, name: str, view_name: str, sample_name: str = None + ): + """ + Build URL for view request. + + :param namespace: namespace of project + :param name: name of project + :param view_name: name of view + :return: URL + """ + if sample_name: + return PEPHUB_VIEW_SAMPLE_URL.format( + namespace=namespace, + project=name, + view_name=view_name, + sample_name=sample_name, + ) + return PEPHUB_VIEW_URL.format( + namespace=namespace, + project=name, + view_name=view_name, + ) diff --git a/tests/smoketests/__init__.py b/peppy/pephubclient/pephub_oauth/__init__.py similarity index 100% rename from tests/smoketests/__init__.py rename to peppy/pephubclient/pephub_oauth/__init__.py diff --git a/peppy/pephubclient/pephub_oauth/const.py b/peppy/pephubclient/pephub_oauth/const.py new file mode 100644 index 00000000..0cdfbc8e --- /dev/null +++ b/peppy/pephubclient/pephub_oauth/const.py @@ -0,0 +1,6 @@ +# constants of pephub_auth + +from ..constants import PEPHUB_BASE_URL + +PEPHUB_DEVICE_INIT_URI = f"{PEPHUB_BASE_URL}auth/device/init" +PEPHUB_DEVICE_TOKEN_URI = f"{PEPHUB_BASE_URL}auth/device/token" diff --git a/peppy/pephubclient/pephub_oauth/exceptions.py b/peppy/pephubclient/pephub_oauth/exceptions.py new file mode 100644 index 00000000..d7ef711c --- /dev/null +++ b/peppy/pephubclient/pephub_oauth/exceptions.py @@ -0,0 +1,26 @@ +"""auth exceptions""" + + +class PEPHubResponseException(Exception): + """Request response exception. Used when response != 200""" + + def __init__(self, reason: str = ""): + """ + Optionally provide explanation for exceptional condition. + :param str reason: some context or perhaps just a value that + could not be interpreted as an accession + """ + super(PEPHubResponseException, self).__init__(reason) + + +class PEPHubTokenExchangeException(Exception): + """Exception in exchanging device code on token == 400""" + + def __init__(self, reason: str = ""): + """ + Optionally provide explanation for exceptional condition. + + :param str reason: some context or perhaps just a value that + could not be interpreted as an accession + """ + super(PEPHubTokenExchangeException, self).__init__(reason) diff --git a/peppy/pephubclient/pephub_oauth/models.py b/peppy/pephubclient/pephub_oauth/models.py new file mode 100644 index 00000000..a3d64772 --- /dev/null +++ b/peppy/pephubclient/pephub_oauth/models.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel + + +class InitializeDeviceCodeResponse(BaseModel): + device_code: str + auth_url: str + + +class PEPHubDeviceTokenResponse(BaseModel): + jwt_token: str diff --git a/peppy/pephubclient/pephub_oauth/pephub_oauth.py b/peppy/pephubclient/pephub_oauth/pephub_oauth.py new file mode 100644 index 00000000..d9246979 --- /dev/null +++ b/peppy/pephubclient/pephub_oauth/pephub_oauth.py @@ -0,0 +1,107 @@ +import json +import time +from typing import Type, Union + +import requests +from pydantic import BaseModel + +from ..helpers import MessageHandler, RequestManager +from ..pephub_oauth.const import PEPHUB_DEVICE_INIT_URI, PEPHUB_DEVICE_TOKEN_URI +from ..pephub_oauth.exceptions import ( + PEPHubResponseException, + PEPHubTokenExchangeException, +) +from ..pephub_oauth.models import ( + InitializeDeviceCodeResponse, + PEPHubDeviceTokenResponse, +) + + +class PEPHubAuth(RequestManager): + """ + Class responsible for authorization to PEPhub. + """ + + def login_to_pephub(self): + pephub_response = self._request_pephub_for_device_code() + print( + f"User verification code: {pephub_response.device_code}, please go to the website: " + f"{pephub_response.auth_url} to authenticate." + ) + + # Sleep 2 minutes and then try 3 times exchange device code on token + time.sleep(2) + + number_of_token_exchange_attempts = 3 + for i in range(number_of_token_exchange_attempts): + try: + user_token = self._exchange_device_code_on_token( + pephub_response.device_code + ) + except PEPHubTokenExchangeException: + time.sleep(2) + else: + print("Successfully logged in!") + return user_token + + # If you didn't log in press enter to try again. + input("If you logged in, press enter to continue...") + try: + user_token = self._exchange_device_code_on_token( + pephub_response.device_code + ) + except PEPHubTokenExchangeException: + MessageHandler.print_warning("You are not logged in") + else: + MessageHandler.print_success("Successfully logged in!") + return user_token + + def _request_pephub_for_device_code(self) -> InitializeDeviceCodeResponse: + """ + Requests device code from pephub + """ + response = PEPHubAuth.send_request( + method="POST", + url=PEPHUB_DEVICE_INIT_URI, + params=None, + headers=None, + ) + return self._handle_pephub_response(response, InitializeDeviceCodeResponse) + + def _exchange_device_code_on_token(self, device_code: str) -> str: + """ + Send request with device code to pephub in order to exchange it on JWT + :param device_code: device code that was generated by pephub + """ + response = PEPHubAuth.send_request( + method="POST", + url=PEPHUB_DEVICE_TOKEN_URI, + params=None, + headers={"device-code": device_code}, + ) + pephub_token_response = self._handle_pephub_response( + response, PEPHubDeviceTokenResponse + ) + return pephub_token_response.jwt_token + + @staticmethod + def _handle_pephub_response( + response: requests.Response, model: Type[BaseModel] + ) -> Union[BaseModel, InitializeDeviceCodeResponse, PEPHubDeviceTokenResponse]: + """ + Decode the response from PEPhub and pack the returned data into appropriate model. + :param response: Response from pephub + :param model: Model that the data will be packed to. + + :return: Response data as an instance of correct model. + """ + if response.status_code == 401: + raise PEPHubTokenExchangeException() + if response.status_code != 200: + raise PEPHubResponseException() + try: + content = json.loads(PEPHubAuth.decode_response(response)) + except json.JSONDecodeError: + raise Exception("Something went wrong with PEPhub response") + + return model(**content) diff --git a/peppy/pephubclient/pephubclient.py b/peppy/pephubclient/pephubclient.py new file mode 100644 index 00000000..acaf6de5 --- /dev/null +++ b/peppy/pephubclient/pephubclient.py @@ -0,0 +1,357 @@ +from typing import Literal, NoReturn, Optional + +import urllib3 +from pydantic import ValidationError +from typing_extensions import deprecated +from ubiquerg import parse_registry_path + +from ..const import NAME_KEY +from ..project import Project +from .constants import ( + PATH_TO_FILE_WITH_JWT, + PEPHUB_PEP_API_BASE_URL, + PEPHUB_PEP_SEARCH_URL, + PEPHUB_PUSH_URL, + RegistryPath, + ResponseStatusCodes, +) +from .exceptions import IncorrectQueryStringError, ResponseError +from .files_manager import FilesManager +from .helpers import MessageHandler, RequestManager, save_pep +from .models import ( + ProjectAnnotationModel, + ProjectDict, + ProjectUploadData, + SearchReturnModel, +) +from .modules.sample import PEPHubSample +from .modules.view import PEPHubView +from .pephub_oauth.pephub_oauth import PEPHubAuth + +urllib3.disable_warnings() + + +class PEPHubClient(RequestManager): + def __init__(self): + self.__jwt_data = FilesManager.load_jwt_data_from_file(PATH_TO_FILE_WITH_JWT) + + self.__view = PEPHubView(self.__jwt_data) + self.__sample = PEPHubSample(self.__jwt_data) + + @property + def view(self) -> PEPHubView: + return self.__view + + @property + def sample(self) -> PEPHubSample: + return self.__sample + + def login(self) -> None: + """ + Log in to PEPhub + """ + user_token = PEPHubAuth().login_to_pephub() + + FilesManager.save_jwt_data_to_file(PATH_TO_FILE_WITH_JWT, user_token) + self.__jwt_data = FilesManager.load_jwt_data_from_file(PATH_TO_FILE_WITH_JWT) + + def logout(self) -> None: + """ + Log out from PEPhub + """ + FilesManager.delete_file_if_exists(PATH_TO_FILE_WITH_JWT) + self.__jwt_data = None + + def pull( + self, + project_registry_path: str, + force: Optional[bool] = False, + zip: Optional[bool] = False, + output: Optional[str] = None, + ) -> None: + """ + Download project locally + + :param str project_registry_path: Project registry path in PEPhub (e.g. databio/base:default) + :param bool force: if project exists, overwrite it. + :param bool zip: if True, save project as zip file + :param str output: path where project will be saved + :return: None + """ + project_dict = self.load_raw_pep( + registry_path=project_registry_path, + ) + + save_pep( + project=project_dict, + reg_path=project_registry_path, + force=force, + project_path=output, + zip=zip, + ) + + def load_project( + self, + project_registry_path: str, + query_param: Optional[dict] = None, + ) -> Project: + """ + Load peppy project from PEPhub in Project object + + :param project_registry_path: registry path of the project + :param query_param: query parameters used in get request + :return Project: peppy project. + """ + raw_pep = self.load_raw_pep(project_registry_path, query_param) + peppy_project = Project().from_dict(raw_pep) + return peppy_project + + def push( + self, + cfg: str, + namespace: str, + name: Optional[str] = None, + tag: Optional[str] = None, + is_private: Optional[bool] = False, + force: Optional[bool] = False, + ) -> None: + """ + Push (upload/update) project to Pephub using config/csv path + + :param str cfg: Project config file (YAML) or sample table (CSV/TSV) + with one row per sample to constitute project + :param str namespace: namespace + :param str name: project name + :param str tag: project tag + :param bool is_private: Specifies whether project should be private [Default= False] + :param bool force: Force push to the database. Use it to update, or upload project. [Default= False] + :return: None + """ + peppy_project = Project(cfg=cfg) + self.upload( + project=peppy_project, + namespace=namespace, + name=name, + tag=tag, + is_private=is_private, + force=force, + ) + + def upload( + self, + project: Project, + namespace: str, + name: str = None, + tag: str = None, + is_private: bool = False, + force: bool = True, + ) -> None: + """ + Upload peppy project to the PEPhub. + + :param Project project: Project object that has to be uploaded to the DB + :param namespace: namespace + :param name: project name + :param tag: project tag + :param is_private: Make project private + :param force: overwrite project if it exists, use it to update, or upload project. + :return: None + """ + if name: + project[NAME_KEY] = name + + upload_data = ProjectUploadData( + pep_dict=project.to_dict( + extended=True, + orient="records", + ), + tag=tag, + is_private=is_private, + overwrite=force, + ) + pephub_response = self.send_request( + method="POST", + url=self._build_push_request_url(namespace=namespace), + headers=self.parse_header(self.__jwt_data), + json=upload_data.model_dump(), + cookies=None, + ) + if pephub_response.status_code == ResponseStatusCodes.ACCEPTED: + MessageHandler.print_success( + f"Project '{namespace}/{name}:{upload_data.tag}' was successfully uploaded" + ) + elif pephub_response.status_code == ResponseStatusCodes.CONFLICT: + raise ResponseError( + "Project already exists. Set force to overwrite project." + ) + elif pephub_response.status_code == ResponseStatusCodes.UNAUTHORIZED: + raise ResponseError("Unauthorized! Failure in uploading project.") + elif pephub_response.status_code == ResponseStatusCodes.FORBIDDEN: + raise ResponseError( + "User does not have permission to write to this namespace!" + ) + else: + raise ResponseError( + f"Unexpected Response Error. {pephub_response.status_code}" + ) + return None + + def find_project( + self, + namespace: str, + query_string: str = "", + limit: int = 100, + offset: int = 0, + filter_by: Literal["submission_date", "last_update_date"] = None, + start_date: str = None, + end_date: str = None, + ) -> SearchReturnModel: + """ + Find project in specific namespace and return list of PEP annotation + + :param namespace: Namespace where to search for projects + :param query_string: Search query + :param limit: Return limit + :param offset: Return offset + :param filter_by: Use filter date. Option: [submission_date, last_update_date] + :param start_date: filter beginning date + :param end_date: filter end date (if none today's date is used) + :return: + """ + + query_param = { + "q": query_string, + "limit": limit, + "offset": offset, + } + if filter_by in ["submission_date", "last_update_date"]: + query_param["filter_by"] = filter_by + query_param["filter_start_date"] = start_date + if end_date: + query_param["filter_end_date"] = end_date + + url = self._build_project_search_url( + namespace=namespace, + query_param=query_param, + ) + + pephub_response = self.send_request( + method="GET", + url=url, + headers=self.parse_header(self.__jwt_data), + json=None, + cookies=None, + ) + if pephub_response.status_code == ResponseStatusCodes.OK: + decoded_response = self.decode_response(pephub_response, output_json=True) + project_list = [] + for project_found in decoded_response["results"]: + project_list.append(ProjectAnnotationModel(**project_found)) + return SearchReturnModel(**decoded_response) + + @deprecated("This method is deprecated. Use load_raw_pep instead.") + def _load_raw_pep( + self, + registry_path: str, + jwt_data: Optional[str] = None, + query_param: Optional[dict] = None, + ) -> dict: + """ + !!! This method is deprecated. Use load_raw_pep instead. !!! + + Request PEPhub and return the requested project as Project object. + + :param registry_path: Project namespace, eg. "geo/GSE124224:tag" + :param query_param: Optional variables to be passed to PEPhub + :return: Raw project in dict. + """ + return self.load_raw_pep(registry_path, query_param) + + def load_raw_pep( + self, + registry_path: str, + query_param: Optional[dict] = None, + ) -> dict: + """ + Request PEPhub and return the requested project as Project object. + + :param registry_path: Project namespace, eg. "geo/GSE124224:tag" + :param query_param: Optional variables to be passed to PEPhub + :return: Raw project in dict. + """ + query_param = query_param or {} + query_param["raw"] = "true" + + self._set_registry_data(registry_path) + pephub_response = self.send_request( + method="GET", + url=self._build_pull_request_url(query_param=query_param), + headers=self.parse_header(self.__jwt_data), + cookies=None, + ) + if pephub_response.status_code == ResponseStatusCodes.OK: + decoded_response = self.decode_response(pephub_response, output_json=True) + correct_proj_dict = ProjectDict(**decoded_response) + + # This step is necessary because of this issue: https://github.com/pepkit/pephub/issues/124 + return correct_proj_dict.model_dump(by_alias=True) + + if pephub_response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError("File does not exist, or you are unauthorized.") + if pephub_response.status_code == ResponseStatusCodes.INTERNAL_ERROR: + raise ResponseError( + f"Internal server error. Unexpected return value. Error: {pephub_response.status_code}" + ) + + def _set_registry_data(self, query_string: str) -> None: + """ + Parse provided query string to extract project name, sample name, etc. + + :param query_string: Passed by user. Contain information needed to locate the project. + :return: Parsed query string. + """ + try: + self.registry_path = RegistryPath(**parse_registry_path(query_string)) + except (ValidationError, TypeError): + raise IncorrectQueryStringError(query_string=query_string) + + def _build_pull_request_url(self, query_param: dict = None) -> str: + """ + Build request for getting projects from pephub + + :param query_param: dict of parameters used in query string + :return: url string + """ + query_param = query_param or {} + query_param["tag"] = self.registry_path.tag + + endpoint = self.registry_path.namespace + "/" + self.registry_path.item + + variables_string = self.parse_query_param(query_param) + endpoint += variables_string + + return PEPHUB_PEP_API_BASE_URL + endpoint + + @staticmethod + def _build_project_search_url(namespace: str, query_param: dict = None) -> str: + """ + Build request for searching projects from pephub + + :param query_param: dict of parameters used in query string + :return: url string + """ + + variables_string = RequestManager.parse_query_param(query_param) + endpoint = variables_string + + return PEPHUB_PEP_SEARCH_URL.format(namespace=namespace) + endpoint + + @staticmethod + def _build_push_request_url(namespace: str) -> str: + """ + Build project upload request used in pephub + + :param namespace: namespace where project will be uploaded + :return: url string + """ + return PEPHUB_PUSH_URL.format(namespace=namespace) diff --git a/peppy/project.py b/peppy/project.py index ed7b3133..7be77e3d 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -6,8 +6,9 @@ import sys from collections.abc import Mapping, MutableMapping from contextlib import suppress +from copy import deepcopy from logging import getLogger -from typing import Iterable, List, Tuple, Union, Literal +from typing import Iterable, List, Literal, Tuple, Union import numpy as np import pandas as pd @@ -16,11 +17,11 @@ from rich.console import Console from rich.progress import track from ubiquerg import is_url -from copy import deepcopy from .const import ( ACTIVE_AMENDMENTS_KEY, AMENDMENTS_KEY, + APPEND_KEY, ATTR_KEY_PREFIX, CFG_IMPORTS_KEY, CFG_SAMPLE_TABLE_KEY, @@ -28,7 +29,6 @@ CONFIG_FILE_KEY, CONFIG_KEY, CONFIG_VERSION_KEY, - APPEND_KEY, DERIVED_ATTRS_KEY, DERIVED_KEY, DERIVED_SOURCES_KEY, @@ -41,6 +41,7 @@ MAX_PROJECT_SAMPLES_REPR, METADATA_KEY, NAME_KEY, + ORIGINAL_CONFIG_KEY, PEP_LATEST_VERSION, PKG_NAME, PROJ_MODS_KEY, @@ -60,13 +61,12 @@ SUBSAMPLE_RAW_LIST_KEY, SUBSAMPLE_TABLE_INDEX_KEY, SUBSAMPLE_TABLES_FILE_KEY, - ORIGINAL_CONFIG_KEY, ) from .exceptions import ( - InvalidSampleTableFileException, - MissingAmendmentError, IllegalStateException, InvalidConfigFileException, + InvalidSampleTableFileException, + MissingAmendmentError, ) from .parsers import select_parser from .sample import Sample @@ -76,6 +76,7 @@ load_yaml, make_abs_via_cfg, make_list, + unpopulated_env_var, ) _LOGGER = getLogger(PKG_NAME) @@ -669,7 +670,7 @@ def _assert_samples_have_names(self): if self.st_index not in sample: message = ( f"{CFG_SAMPLE_TABLE_KEY} is missing '{self.st_index}' column; " - f"you must specify {CFG_SAMPLE_TABLE_KEY}s in {self.st_index} or derive them" + f"you must specify a {self.st_index} column for your {CFG_SAMPLE_TABLE_KEY} or derive it" ) raise InvalidSampleTableFileException(message) @@ -920,6 +921,7 @@ def attr_derive(self, attrs=None): ds = self[CONFIG_KEY][SAMPLE_MODS_KEY][DERIVED_KEY][DERIVED_SOURCES_KEY] derivations = attrs or (da if isinstance(da, list) else [da]) _LOGGER.debug("Derivations to be done: {}".format(derivations)) + env_var_miss = set() for sample in track( self.samples, description="Deriving sample attributes", @@ -942,6 +944,9 @@ def attr_derive(self, attrs=None): derived_attr = sample.derive_attribute(ds, attr) if derived_attr: + if "$" in derived_attr: + env_var_miss.add(derived_attr) + _LOGGER.debug("Setting '{}' to '{}'".format(attr, derived_attr)) sample[attr] = derived_attr else: @@ -949,6 +954,8 @@ def attr_derive(self, attrs=None): f"Not setting null/empty value for data source '{attr}': {type(derived_attr)}" ) sample._derived_cols_done.append(attr) + if len(env_var_miss) > 0: + unpopulated_env_var(env_var_miss) def activate_amendments(self, amendments): """ diff --git a/peppy/sample.py b/peppy/sample.py index 60d14677..432cf41d 100644 --- a/peppy/sample.py +++ b/peppy/sample.py @@ -4,7 +4,7 @@ from copy import copy as cp from logging import getLogger from string import Formatter -from typing import Optional, Union +from typing import Any, Dict, Optional, Union import pandas as pd import yaml @@ -20,8 +20,8 @@ SAMPLE_SHEET_KEY, ) from .exceptions import InvalidSampleTableFileException -from .utils import copy, grab_project_data from .simple_attr_map import SimpleAttMap +from .utils import copy, grab_project_data _LOGGER = getLogger(PKG_NAME) @@ -39,7 +39,9 @@ class Sample(SimpleAttMap): :param Mapping | pandas.core.series.Series series: Sample's data. """ - def __init__(self, series, prj=None): + def __init__( + self, series: Union[Mapping, Series], prj: Optional[Any] = None + ) -> None: super(Sample, self).__init__() data = dict(series) @@ -75,25 +77,28 @@ def __init__(self, series, prj=None): self._derived_cols_done = [] self._attributes = list(series.keys()) - def get_sheet_dict(self): - """ - Create a K-V pairs for items originally passed in via the sample sheet. + def get_sheet_dict(self) -> Dict: + """Create K-V pairs for items originally passed in via the sample sheet. + This is useful for summarizing; it provides a representation of the sample that excludes things like config files and derived entries. - :return OrderedDict: mapping from name to value for data elements - originally provided via the sample sheet (i.e., the a map-like - representation of the instance, excluding derived items) + Returns: + Mapping from name to value for data elements originally provided + via the sample sheet (i.e., a map-like representation of the + instance, excluding derived items) """ return dict([[k, self[k]] for k in self._attributes]) - def to_dict(self, add_prj_ref=False): - """ - Serializes itself as dict object. + def to_dict(self, add_prj_ref: bool = False) -> Dict: + """Serializes itself as dict object. + + Args: + add_prj_ref: Whether the project reference bound to the Sample + object should be included in the dict representation - :param bool add_prj_ref: whether the project reference bound do the - Sample object should be included in the YAML representation - :return dict: dict representation of this Sample + Returns: + Dict representation of this Sample """ def _obj2dict(obj, name=None): @@ -136,16 +141,19 @@ def _obj2dict(obj, name=None): return serial def to_yaml( - self, path: Optional[str] = None, add_prj_ref=False + self, path: Optional[str] = None, add_prj_ref: bool = False ) -> Union[str, None]: - """ - Serializes itself in YAML format. Writes to file if path is provided, else returns string representation. + """Serializes itself in YAML format. + + Writes to file if path is provided, else returns string representation. + + Args: + path: A file path to write YAML to; defaults to None + add_prj_ref: Whether the project reference bound to the Sample + object should be included in the YAML representation - :param str path: A file path to write yaml to; provide this or - the subs_folder_path, defaults to None - :param bool add_prj_ref: whether the project reference bound do the - Sample object should be included in the YAML representation - :return str | None: returns string representation of sample yaml or None + Returns: + String representation of sample YAML or None if written to file """ serial = self.to_dict(add_prj_ref=add_prj_ref) if path: @@ -199,11 +207,6 @@ def _format_regex(regex, items): keys = [i[1] for i in Formatter().parse(regex) if i[1] is not None] if not keys: return [regex] - if "$" in regex: - _LOGGER.warning( - "Not all environment variables were populated " - "in derived attribute source: {}".format(regex) - ) attr_lens = [ len(v) for k, v in items.items() if (isinstance(v, list) and k in keys) ] diff --git a/peppy/utils.py b/peppy/utils.py index 45d9e557..7f8d5f28 100644 --- a/peppy/utils.py +++ b/peppy/utils.py @@ -2,8 +2,12 @@ import logging import os -from typing import Dict, Mapping, Type, Union +import posixpath as psp +import re +from collections import defaultdict +from typing import Any, Dict, Mapping, Optional, Set, Type, Union from urllib.request import urlopen +import pprint import yaml from ubiquerg import expandpath, is_url @@ -14,7 +18,7 @@ _LOGGER = logging.getLogger(__name__) -def copy(obj): +def copy(obj: Any) -> Any: def copy(self): """ Copy self to a new object. @@ -27,8 +31,23 @@ def copy(self): return obj -def make_abs_via_cfg(maybe_relpath, cfg_path, check_exists=False): - """Ensure that a possibly relative path is absolute.""" +def make_abs_via_cfg( + maybe_relpath: str, cfg_path: str, check_exists: bool = False +) -> str: + """Ensure that a possibly relative path is absolute. + + Args: + maybe_relpath: Path that may be relative + cfg_path: Path to configuration file + check_exists: Whether to verify the resulting path exists + + Returns: + Absolute path + + Raises: + TypeError: If maybe_relpath is not a string + OSError: If check_exists is True and path doesn't exist + """ if not isinstance(maybe_relpath, str): raise TypeError( "Attempting to ensure non-text value is absolute path: {} ({})".format( @@ -53,19 +72,24 @@ def make_abs_via_cfg(maybe_relpath, cfg_path, check_exists=False): return abs_path -def grab_project_data(prj): - """ - From the given Project, grab Sample-independent data. +def grab_project_data(prj: Any) -> Mapping: + """From the given Project, grab Sample-independent data. There are some aspects of a Project of which it's beneficial for a Sample to be aware, particularly for post-hoc analysis. Since Sample objects within a Project are mutually independent, though, each doesn't need to - know about any of the others. A Project manages its, Sample instances, + know about any of the others. A Project manages its Sample instances, so for each Sample knowledge of Project data is limited. This method facilitates adoption of that conceptual model. - :param Project prj: Project from which to grab data - :return Mapping: Sample-independent data sections from given Project + Args: + prj: Project from which to grab data + + Returns: + Sample-independent data sections from given Project + + Raises: + KeyError: If project lacks required config section """ if not prj: return {} @@ -77,16 +101,17 @@ def grab_project_data(prj): def make_list(arg: Union[list, str], obj_class: Type) -> list: - """ - Convert an object of predefined class to a list of objects of that class or - ensure a list is a list of objects of that class + """Convert an object of predefined class to a list or ensure list contains correct type. - :param list[obj] | obj arg: string or a list of strings to listify - :param str obj_class: name of the class of intrest + Args: + arg: Object or list of objects to listify + obj_class: Class that objects should be instances of - :return list: list of objects of the predefined class + Returns: + List of objects of the predefined class - :raise TypeError: if a faulty argument was provided + Raises: + TypeError: If a faulty argument was provided """ def _raise_faulty_arg(): @@ -106,22 +131,26 @@ def _raise_faulty_arg(): _raise_faulty_arg() -def _expandpath(path: str): - """ - Expand a filesystem path that may or may not contain user/env vars. +def _expandpath(path: str) -> str: + """Expand a filesystem path that may or may not contain user/env vars. - :param str path: path to expand - :return str: expanded version of input path + Args: + path: Path to expand + + Returns: + Expanded version of input path """ return os.path.expandvars(os.path.expanduser(path)) def expand_paths(x: dict) -> dict: - """ - Recursively expand paths in a dict. + """Recursively expand paths in a dict. + + Args: + x: Dict to expand - :param dict x: dict to expand - :return dict: dict with expanded paths + Returns: + Dict with expanded paths """ if isinstance(x, str): return expandpath(x) @@ -130,13 +159,17 @@ def expand_paths(x: dict) -> dict: return x -def load_yaml(filepath): - """ - Load a local or remote YAML file into a Python dict +def load_yaml(filepath: str) -> dict: + """Load a local or remote YAML file into a Python dict. + + Args: + filepath: Path to the file to read - :param str filepath: path to the file to read - :raises RemoteYAMLError: if the remote YAML file reading fails - :return dict: read data + Returns: + Read data + + Raises: + RemoteYAMLError: If the remote YAML file reading fails """ if is_url(filepath): _LOGGER.debug(f"Got URL: {filepath}") @@ -153,16 +186,27 @@ def load_yaml(filepath): else: with open(os.path.abspath(filepath), "r") as f: data = yaml.safe_load(f) - return expand_paths(data) + expd = expandpath(data) + print("Expanded:") + pprint.pprint(expd) + return expd -def is_cfg_or_anno(file_path, formats=None): - """ - Determine if the input file seems to be a project config file (based on the file extension). - :param str file_path: file path to examine - :param dict formats: formats dict to use. Must include 'config' and 'annotation' keys. - :raise ValueError: if the file seems to be neither a config nor an annotation - :return bool: True if the file is a config, False if the file is an annotation +def is_cfg_or_anno( + file_path: Optional[str], formats: Optional[dict] = None +) -> Optional[bool]: + """Determine if the input file seems to be a project config file (based on extension). + + Args: + file_path: File path to examine + formats: Formats dict to use. Must include 'config' and 'annotation' keys + + Returns: + True if the file is a config, False if the file is an annotation, + None if file_path is None + + Raises: + ValueError: If the file seems to be neither a config nor an annotation """ formats_dict = formats or { "config": (".yaml", ".yml"), @@ -182,8 +226,15 @@ def is_cfg_or_anno(file_path, formats=None): ) -def extract_custom_index_for_sample_table(pep_dictionary: Dict): - """Extracts a custom index for the sample table if it exists""" +def extract_custom_index_for_sample_table(pep_dictionary: Dict) -> Optional[str]: + """Extracts a custom index for the sample table if it exists. + + Args: + pep_dictionary: PEP configuration dictionary + + Returns: + Custom index name or None if not specified + """ return ( pep_dictionary[SAMPLE_TABLE_INDEX_KEY] if SAMPLE_TABLE_INDEX_KEY in pep_dictionary @@ -191,10 +242,68 @@ def extract_custom_index_for_sample_table(pep_dictionary: Dict): ) -def extract_custom_index_for_subsample_table(pep_dictionary: Dict): - """Extracts a custom index for the subsample table if it exists""" +def extract_custom_index_for_subsample_table(pep_dictionary: Dict) -> Optional[str]: + """Extracts a custom index for the subsample table if it exists. + + Args: + pep_dictionary: PEP configuration dictionary + + Returns: + Custom index name or None if not specified + """ return ( pep_dictionary[SUBSAMPLE_TABLE_INDEX_KEY] if SUBSAMPLE_TABLE_INDEX_KEY in pep_dictionary else None ) + + +def unpopulated_env_var(paths: Set[str]) -> None: + """Print warnings for unpopulated environment variables in paths. + + Given a set of paths that may contain env vars, group by env var and + print a warning for each group with the deepest common directory and + the paths relative to that directory. + + Args: + paths: Set of paths that may contain environment variables + """ + _VAR_RE = re.compile(r"^\$(\w+)/(.*)$") + groups: dict[str, list[str]] = defaultdict(list) + + # 1) Group by env var + for s in paths: + m = _VAR_RE.match(s.strip()) + if not m: + # Not in "$VAR/..." form — skip or collect under a special key if you prefer + continue + var, tail = m.group(1), m.group(2) + # normalize to POSIX-ish, no leading "./" + tail = tail.lstrip("/") + groups[var].append(tail) + + # 2) For each var, compute deepest common directory and print + for var, tails in groups.items(): + if not tails: + continue + + if len(tails) == 1: + # With a single path, use its directory as the common dir + common_dir = psp.dirname(tails[0]) or "." + else: + common_dir = psp.commonpath(tails) or "." + # Ensure it's a directory; commonpath is component-wise, so it's fine. + + warning_message = "Not all environment variables were populated in derived attribute source: $%s/{" + + in_env = [] + for t in tails: + rel = psp.relpath(t, start=common_dir or ".") + in_env.append(rel) + + warning_message += ", ".join(in_env) + warning_message += "}" + _LOGGER.warning( + warning_message, + var, + ) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index ace32606..bf0dbb5c 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -1,6 +1,12 @@ -pandas>=0.24.2 -pyyaml +pandas>=2.2.0 +pyyaml>=6.0.0 rich>=10.3.0 ubiquerg>=0.6.2 numpy -pephubclient>=0.4.2 +logmuse>=0.2.8 +importlib-metadata; python_version < '3.10' +jsonschema>=3.0.1 +typer>=0.20.0 +requests>=2.28.2 +pydantic>2.5.0 +coloredlogs>=15.0.1 \ No newline at end of file diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt index 2461d2eb..56ac11e9 100644 --- a/requirements/requirements-test.txt +++ b/requirements/requirements-test.txt @@ -2,3 +2,14 @@ mock pytest pytest-cov pytest-remotedata +# eido +coveralls +pytest-mock==3.6.1 +# pephubclient +black +ruff +python-dotenv +flake8 +pre-commit +coverage +smokeshow \ No newline at end of file diff --git a/scripts/update_usage_docs.sh b/scripts/update_usage_docs.sh new file mode 100755 index 00000000..95d150ee --- /dev/null +++ b/scripts/update_usage_docs.sh @@ -0,0 +1,21 @@ +#!/bin/bash +cp ../docs/templates/usage.template usage.template + +for cmd in "--help" "pull --help" "push --help"; do + echo $cmd + echo -e "## \`phc $cmd\`" > USAGE_header.temp + phc $cmd --help > USAGE.temp 2>&1 + # sed -i 's/^/\t/' USAGE.temp + sed -i.bak '1s;^;\`\`\`console\ +;' USAGE.temp +# sed -i '1s/^/\n\`\`\`console\n/' USAGE.temp + echo -e "\`\`\`\n" >> USAGE.temp + #sed -i -e "/\`looper $cmd\`/r USAGE.temp" -e '$G' usage.template # for -in place inserts + cat USAGE_header.temp USAGE.temp >> usage.template # to append to the end +done +rm USAGE.temp +rm USAGE_header.temp +rm USAGE.temp.bak +mv usage.template ../docs/usage.md +#cat usage.template +# rm USAGE.temp \ No newline at end of file diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index ec734d81..00000000 --- a/setup.cfg +++ /dev/null @@ -1,6 +0,0 @@ -[aliases] -test = pytest - -[pytest] -# Only request extra info from failures and errors. -addopts = -rfE diff --git a/setup.py b/setup.py index 3d0619c9..79cb7b52 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,13 @@ def get_static(name, condition=None): setup( name=PACKAGE_NAME, - packages=[PACKAGE_NAME], + packages=[ + PACKAGE_NAME, + "peppy.eido", + "peppy.pephubclient", + "peppy.pephubclient.pephub_oauth", + "peppy.pephubclient.modules", + ], version=version, description="A python-based project metadata manager for portable encapsulated projects", long_description=long_description, @@ -59,6 +65,15 @@ def get_static(name, condition=None): url="https://github.com/pepkit/peppy/", author="Michal Stolarczyk, Nathan Sheffield, Vince Reuter, Andre Rendeiro, Oleksandr Khoroshevskyi", license="BSD2", + entry_points={ + "console_scripts": ["peppy = peppy.cli:main"], + "pep.filters": [ + "basic=peppy.eido.conversion_plugins:basic_pep_filter", + "yaml=peppy.eido.conversion_plugins:yaml_pep_filter", + "csv=peppy.eido.conversion_plugins:csv_pep_filter", + "yaml-samples=peppy.eido.conversion_plugins:yaml_samples_pep_filter", + ], + }, include_package_data=True, tests_require=(["pytest"]), setup_requires=( diff --git a/tests/data/eidodata/common/schemas/common_pep_validation.yaml b/tests/data/eidodata/common/schemas/common_pep_validation.yaml new file mode 100644 index 00000000..78ff9a3c --- /dev/null +++ b/tests/data/eidodata/common/schemas/common_pep_validation.yaml @@ -0,0 +1,69 @@ +description: "Schema for a minimal PEP" +version: "2.0.0" +properties: + config: + properties: + name: + type: string + pattern: "^\\S*$" + description: "Project name with no whitespace" + pep_version: + description: "Version of the PEP Schema this PEP follows" + type: string + sample_table: + type: string + description: "Path to the sample annotation table with one row per sample" + subsample_table: + type: string + description: "Path to the subsample annotation table with one row per subsample and sample_name attribute matching an entry in the sample table" + sample_modifiers: + type: object + properties: + append: + type: object + duplicate: + type: object + imply: + type: array + items: + type: object + properties: + if: + type: object + then: + type: object + derive: + type: object + properties: + attributes: + type: array + items: + type: string + sources: + type: object + project_modifiers: + type: object + properties: + amend: + description: "Object overwriting original project attributes" + type: object + import: + description: "List of external PEP project config files to import" + type: array + items: + type: string + required: + - pep_version + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + pattern: "^\\S*$" + description: "Unique name of the sample with no whitespace" + required: + - sample_name +required: + - samples \ No newline at end of file diff --git a/tests/data/eidodata/peps/multiline_output/config.yaml b/tests/data/eidodata/peps/multiline_output/config.yaml new file mode 100644 index 00000000..4196ee81 --- /dev/null +++ b/tests/data/eidodata/peps/multiline_output/config.yaml @@ -0,0 +1,5 @@ +pep_version: "2.0.0" +sample_table: "samplesheet.csv" +subsample_table: "subsamplesheet.csv" +sample_table_index: "sample" +subsample_table_index: "sample" \ No newline at end of file diff --git a/tests/data/eidodata/peps/multiline_output/multiline_output.csv b/tests/data/eidodata/peps/multiline_output/multiline_output.csv new file mode 100644 index 00000000..5e889262 --- /dev/null +++ b/tests/data/eidodata/peps/multiline_output/multiline_output.csv @@ -0,0 +1,8 @@ +sample,strandedness,instrument_platform,run_accession,fastq_1,fastq_2 +WT_REP1,reverse,ABI_SOLID,runaccession1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_2.fastq.gz +WT_REP1,reverse,BGISEQ,runaccession2,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_2.fastq.gz +WT_REP2,reverse,CAPILLARY,123123123,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_2.fastq.gz +RAP1_UNINDUCED_REP1,reverse,COMPLETE_GENOMICS,somerunaccesion,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357073_1.fastq.gz, +RAP1_UNINDUCED_REP2,reverse,DNBSEQ,ERR2412421,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357074_1.fastq.gz, +RAP1_UNINDUCED_REP2,reverse,HELICOS,xxxxxxxxxx,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357075_1.fastq.gz, +RAP1_IAA_30M_REP1,reverse,ILLUMINA,None,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_2.fastq.gz diff --git a/tests/data/example_peps-master/example_nextflow_subsamples/samplesheet.csv b/tests/data/eidodata/peps/multiline_output/samplesheet.csv similarity index 100% rename from tests/data/example_peps-master/example_nextflow_subsamples/samplesheet.csv rename to tests/data/eidodata/peps/multiline_output/samplesheet.csv diff --git a/tests/data/example_peps-master/example_nextflow_subsamples/subsamplesheet.csv b/tests/data/eidodata/peps/multiline_output/subsamplesheet.csv similarity index 100% rename from tests/data/example_peps-master/example_nextflow_subsamples/subsamplesheet.csv rename to tests/data/eidodata/peps/multiline_output/subsamplesheet.csv diff --git a/tests/data/eidodata/peps/multiple_subsamples/project_config.yaml b/tests/data/eidodata/peps/multiple_subsamples/project_config.yaml new file mode 100644 index 00000000..e0e580b7 --- /dev/null +++ b/tests/data/eidodata/peps/multiple_subsamples/project_config.yaml @@ -0,0 +1,19 @@ +pep_version: "2.1.0" +sample_table: sample_table.csv +subsample_table: + - subsample_table1.csv + - subsample_table2.csv + +sample_modifiers: + append: + local_files: LOCAL + genome: "fg" + derive: + attributes: [local_files] + sources: + LOCAL: "../data/{file_path}" + imply: + - if: + identifier: "frog1" + then: + genome: "frog_frog" diff --git a/tests/data/example_peps-master/example_multiple_subsamples/sample_table.csv b/tests/data/eidodata/peps/multiple_subsamples/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_multiple_subsamples/sample_table.csv rename to tests/data/eidodata/peps/multiple_subsamples/sample_table.csv diff --git a/tests/data/example_peps-master/example_multiple_subsamples/subsample_table1.csv b/tests/data/eidodata/peps/multiple_subsamples/subsample_table1.csv similarity index 100% rename from tests/data/example_peps-master/example_multiple_subsamples/subsample_table1.csv rename to tests/data/eidodata/peps/multiple_subsamples/subsample_table1.csv diff --git a/tests/data/example_peps-master/example_multiple_subsamples/subsample_table2.csv b/tests/data/eidodata/peps/multiple_subsamples/subsample_table2.csv similarity index 100% rename from tests/data/example_peps-master/example_multiple_subsamples/subsample_table2.csv rename to tests/data/eidodata/peps/multiple_subsamples/subsample_table2.csv diff --git a/tests/data/example_peps-master/example_nextflow_config/project_config.yaml b/tests/data/eidodata/peps/pep_nextflow_taxprofiler/config.yaml similarity index 100% rename from tests/data/example_peps-master/example_nextflow_config/project_config.yaml rename to tests/data/eidodata/peps/pep_nextflow_taxprofiler/config.yaml diff --git a/tests/data/eidodata/peps/pep_nextflow_taxprofiler/output.csv b/tests/data/eidodata/peps/pep_nextflow_taxprofiler/output.csv new file mode 100644 index 00000000..d70a0e77 --- /dev/null +++ b/tests/data/eidodata/peps/pep_nextflow_taxprofiler/output.csv @@ -0,0 +1,7 @@ +sample,instrument_platform,run_accession,fastq_1,fastq_2,fasta +2611,ILLUMINA,ERR5766174,,,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fasta/ERX5474930_ERR5766174_1.fa.gz +2613,ILLUMINA,ERR5766181,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474937_ERR5766181_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474937_ERR5766181_2.fastq.gz, +ERR3201952,OXFORD_NANOPORE,ERR3201952,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERR3201952.fastq.gz,, +2612,ILLUMINA,ERR5766176,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_2.fastq.gz, +2612,ILLUMINA,ERR5766176_B,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_B_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_B_2.fastq.gz, +2612,ILLUMINA,ERR5766180,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474936_ERR5766180_1.fastq.gz,, diff --git a/tests/data/example_peps-master/example_nextflow_config/samplesheet.csv b/tests/data/eidodata/peps/pep_nextflow_taxprofiler/samplesheet.csv similarity index 100% rename from tests/data/example_peps-master/example_nextflow_config/samplesheet.csv rename to tests/data/eidodata/peps/pep_nextflow_taxprofiler/samplesheet.csv diff --git a/tests/data/eidodata/peps/pep_schema_rel_path/config.yaml b/tests/data/eidodata/peps/pep_schema_rel_path/config.yaml new file mode 100644 index 00000000..2783c73d --- /dev/null +++ b/tests/data/eidodata/peps/pep_schema_rel_path/config.yaml @@ -0,0 +1,3 @@ +description: "Example PEP for this particular pipeline." +pep_version: 2.0.0 +sample_table: sample_sheet.csv diff --git a/tests/data/eidodata/peps/pep_schema_rel_path/sample_sheet.csv b/tests/data/eidodata/peps/pep_schema_rel_path/sample_sheet.csv new file mode 100644 index 00000000..18840dd6 --- /dev/null +++ b/tests/data/eidodata/peps/pep_schema_rel_path/sample_sheet.csv @@ -0,0 +1,3 @@ +"sample_name","patient" +"a","Test" +"b", "Also Test" diff --git a/tests/data/eidodata/peps/pep_with_fasta_column/config.yaml b/tests/data/eidodata/peps/pep_with_fasta_column/config.yaml new file mode 100644 index 00000000..a56d90b3 --- /dev/null +++ b/tests/data/eidodata/peps/pep_with_fasta_column/config.yaml @@ -0,0 +1,3 @@ +pep_version: "2.0.0" +sample_table: "samplesheet.csv" +subsample_table: "subsamplesheet.csv" diff --git a/tests/data/eidodata/peps/pep_with_fasta_column/output.csv b/tests/data/eidodata/peps/pep_with_fasta_column/output.csv new file mode 100644 index 00000000..ac7dc5b2 --- /dev/null +++ b/tests/data/eidodata/peps/pep_with_fasta_column/output.csv @@ -0,0 +1,8 @@ +sample,strandedness,instrument_platform,run_accession,fastq_1,fastq_2,fasta +WT_REP1,reverse,ABI_SOLID,runaccession1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_2.fastq.gz, +WT_REP1,reverse,ABI_SOLID,runaccession2,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_2.fastq.gz, +WT_REP2,reverse,BGISEQ,123123123,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_2.fastq.gz, +RAP1_UNINDUCED_REP1,reverse,CAPILLARY,somerunaccesion,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357073_1.fastq.gz,, +RAP1_UNINDUCED_REP2,reverse,COMPLETE_GENOMICS,ERR2412421,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357074_1.fastq.gz,, +RAP1_UNINDUCED_REP2,reverse,COMPLETE_GENOMICS,xxxxxxxxxx,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357075_1.fastq.gz,, +RAP1_IAA_30M_REP1,reverse,DNBSEQ,None,,,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fasta/ERX5474930_ERR5766174_1.fa.gz diff --git a/tests/data/eidodata/peps/pep_with_fasta_column/samplesheet.csv b/tests/data/eidodata/peps/pep_with_fasta_column/samplesheet.csv new file mode 100644 index 00000000..6d9956d8 --- /dev/null +++ b/tests/data/eidodata/peps/pep_with_fasta_column/samplesheet.csv @@ -0,0 +1,6 @@ +sample,strandedness,instrument_platform +WT_REP1,reverse,ABI_SOLID +WT_REP2,reverse,BGISEQ +RAP1_UNINDUCED_REP1,reverse,CAPILLARY +RAP1_UNINDUCED_REP2,reverse,COMPLETE_GENOMICS +RAP1_IAA_30M_REP1,reverse,DNBSEQ diff --git a/tests/data/eidodata/peps/pep_with_fasta_column/subsamplesheet.csv b/tests/data/eidodata/peps/pep_with_fasta_column/subsamplesheet.csv new file mode 100644 index 00000000..446f9a91 --- /dev/null +++ b/tests/data/eidodata/peps/pep_with_fasta_column/subsamplesheet.csv @@ -0,0 +1,8 @@ +sample,run_accession,fastq_1,fastq_2,fasta,strandedness +WT_REP1,runaccession1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_2.fastq.gz,,reverse +WT_REP1,runaccession2,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_2.fastq.gz,,reverse +WT_REP2,123123123,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_2.fastq.gz,,reverse +RAP1_UNINDUCED_REP1,somerunaccesion,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357073_1.fastq.gz,,,reverse +RAP1_UNINDUCED_REP2,ERR2412421,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357074_1.fastq.gz,,,reverse +RAP1_UNINDUCED_REP2,xxxxxxxxxx,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357075_1.fastq.gz,,,reverse +RAP1_IAA_30M_REP1,None,,,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fasta/ERX5474930_ERR5766174_1.fa.gz,reverse diff --git a/tests/data/eidodata/peps/test_file_existing/project_config.yaml b/tests/data/eidodata/peps/test_file_existing/project_config.yaml new file mode 100644 index 00000000..23ebfee5 --- /dev/null +++ b/tests/data/eidodata/peps/test_file_existing/project_config.yaml @@ -0,0 +1,12 @@ +pep_version: "2.1.0" +sample_table: sample_table.csv +subsample_table: subsample_table.csv + + +sample_modifiers: + append: + local_files: LOCAL + derive: + attributes: [local_files] + sources: + LOCAL: "../data/{file_path}" diff --git a/tests/data/eidodata/peps/test_file_existing/sample_table.csv b/tests/data/eidodata/peps/test_file_existing/sample_table.csv new file mode 100644 index 00000000..1137443a --- /dev/null +++ b/tests/data/eidodata/peps/test_file_existing/sample_table.csv @@ -0,0 +1,5 @@ +sample_name,protocol,identifier +frog_1,anySampleType,frog1 +frog_2,anySampleType,frog2 +frog_3,anySampleType,frog3 +frog_4,anySampleType,frog4 diff --git a/tests/data/eidodata/peps/test_file_existing/subsample_table.csv b/tests/data/eidodata/peps/test_file_existing/subsample_table.csv new file mode 100644 index 00000000..1d4f9553 --- /dev/null +++ b/tests/data/eidodata/peps/test_file_existing/subsample_table.csv @@ -0,0 +1,6 @@ +sample_name,file_path,subsample_name +frog_1,file/a.txt,a +frog_1,file/b.txt,b +frog_1,file/c.txt,c +frog_2,file/a.txt,a +frog_2,file/b.txt,b diff --git a/tests/data/eidodata/peps/test_pep/test_cfg.yaml b/tests/data/eidodata/peps/test_pep/test_cfg.yaml new file mode 100644 index 00000000..32f028d7 --- /dev/null +++ b/tests/data/eidodata/peps/test_pep/test_cfg.yaml @@ -0,0 +1,10 @@ +name: test +pep_version: 2.0.0 +sample_table: test_sample_table.csv + +sample_modifiers: + imply: + - if: + organism: "Homo sapiens" + then: + genome: hg38 diff --git a/tests/data/eidodata/peps/test_pep/test_sample_table.csv b/tests/data/eidodata/peps/test_pep/test_sample_table.csv new file mode 100644 index 00000000..d2881012 --- /dev/null +++ b/tests/data/eidodata/peps/test_pep/test_sample_table.csv @@ -0,0 +1,3 @@ +sample_name,protocol,genome +GSM1558746,GRO,hg38 +GSM1480327,PRO,hg38 diff --git a/tests/data/eidodata/peps/value_check_pep/project_config.yaml b/tests/data/eidodata/peps/value_check_pep/project_config.yaml new file mode 100644 index 00000000..66c4380c --- /dev/null +++ b/tests/data/eidodata/peps/value_check_pep/project_config.yaml @@ -0,0 +1,6 @@ +description: None +name: encode_prj +pep_version: 2.0.0 +project_name: value_check_pep +sample_table: sample_table.csv +subsample_table: [] diff --git a/tests/data/eidodata/peps/value_check_pep/sample_table.csv b/tests/data/eidodata/peps/value_check_pep/sample_table.csv new file mode 100644 index 00000000..cefc2aa3 --- /dev/null +++ b/tests/data/eidodata/peps/value_check_pep/sample_table.csv @@ -0,0 +1,7 @@ +sample_name,file_name,genome,assay,cell_line,target,format_type +encode_4,ENCFF452DAM.bed.gz,hg38,Histone ChIP-seq,skeletal muscle myoblast,H3K36me3,narrowPeak +encode_20,ENCFF121AXG.bed.gz,hg38,DNase-seq,RPMI7951,,tssPeak +encode_21,ENCFF710ECJ.bed.gz,hg38,DNase-seq,RPMI7951,,broadPeak +encode_22,ENCFF945FZN.bed.gz,hg38,DNase-seq,RPMI7951,,narrowPeak +encode_23,ENCFF322PQO.bed.gz,hg38,DNase-seq,RPMI7951,,tssPeak +encode_24,ENCFF322PQO.bed.gz,hg38,DNase-seq,RPMI7951,,tssPeak1 diff --git a/tests/data/eidodata/schemas/schema_test_file_exist.yaml b/tests/data/eidodata/schemas/schema_test_file_exist.yaml new file mode 100644 index 00000000..e1814b8d --- /dev/null +++ b/tests/data/eidodata/schemas/schema_test_file_exist.yaml @@ -0,0 +1,35 @@ +description: test existing files in subsamples + +properties: + dcc: + type: object + properties: + compute_packages: + type: object + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + protocol: + type: string + local_files: + anyOf: + - type: string + - type: array + items: + type: string + sizing: + - local_files + + tangible: + - local_files + + required: + - sample_name + - local_files + +required: + - samples diff --git a/tests/data/eidodata/schemas/test_schema.yaml b/tests/data/eidodata/schemas/test_schema.yaml new file mode 100644 index 00000000..13cdd1d6 --- /dev/null +++ b/tests/data/eidodata/schemas/test_schema.yaml @@ -0,0 +1,22 @@ +description: test PEP schema + +properties: + dcc: + type: object + properties: + compute_packages: + type: object + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + protocol: + type: string + genome: + type: string + +required: + - samples diff --git a/tests/data/eidodata/schemas/test_schema_imports.yaml b/tests/data/eidodata/schemas/test_schema_imports.yaml new file mode 100644 index 00000000..e48db5df --- /dev/null +++ b/tests/data/eidodata/schemas/test_schema_imports.yaml @@ -0,0 +1,17 @@ +imports: + - http://schema.databio.org/pep/2.0.0.yaml +description: Schema for a more restrictive PEP +properties: + samples: + type: array + items: + type: object + properties: + my_numeric_attribute: + type: integer + minimum: 0 + maximum: 1 + required: + - my_numeric_attribute +required: + - samples diff --git a/tests/data/eidodata/schemas/test_schema_imports_rel_path.yaml b/tests/data/eidodata/schemas/test_schema_imports_rel_path.yaml new file mode 100644 index 00000000..527fb341 --- /dev/null +++ b/tests/data/eidodata/schemas/test_schema_imports_rel_path.yaml @@ -0,0 +1,13 @@ +description: "PEP validation schema for this particular pipeline." +version: "2.0.0" +imports: + - "../common/schemas/common_pep_validation.yaml" +properties: + samples: + items: + properties: + patient: + type: string + pattern: "\\S+" + description: >- + Unique identifier of the patient a sample has been taken from. \ No newline at end of file diff --git a/tests/data/eidodata/schemas/test_schema_invalid.yaml b/tests/data/eidodata/schemas/test_schema_invalid.yaml new file mode 100644 index 00000000..715e8243 --- /dev/null +++ b/tests/data/eidodata/schemas/test_schema_invalid.yaml @@ -0,0 +1,25 @@ +description: test PEP schema + +properties: + dcc: + type: object + properties: + compute_packages: + type: object + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + protocol: + type: string + genome: + type: string + invalid: + type: string + +required: + - samples + - invalid diff --git a/tests/data/eidodata/schemas/test_schema_invalid_with_type.yaml b/tests/data/eidodata/schemas/test_schema_invalid_with_type.yaml new file mode 100644 index 00000000..9815bdae --- /dev/null +++ b/tests/data/eidodata/schemas/test_schema_invalid_with_type.yaml @@ -0,0 +1,25 @@ +description: test PEP schema +type: object +properties: + dcc: + type: object + properties: + compute_packages: + type: object + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + protocol: + type: string + genome: + type: string + invalid: + type: string + +required: + - samples + - invalid diff --git a/tests/data/eidodata/schemas/test_schema_sample_invalid.yaml b/tests/data/eidodata/schemas/test_schema_sample_invalid.yaml new file mode 100644 index 00000000..7b429c1e --- /dev/null +++ b/tests/data/eidodata/schemas/test_schema_sample_invalid.yaml @@ -0,0 +1,26 @@ +description: test PEP schema + +properties: + dcc: + type: object + properties: + compute_packages: + type: object + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + protocol: + type: string + genome: + type: string + newattr: + type: string + required: + - newattr + +required: + - samples diff --git a/tests/data/eidodata/schemas/test_schema_samples.yaml b/tests/data/eidodata/schemas/test_schema_samples.yaml new file mode 100644 index 00000000..13cdd1d6 --- /dev/null +++ b/tests/data/eidodata/schemas/test_schema_samples.yaml @@ -0,0 +1,22 @@ +description: test PEP schema + +properties: + dcc: + type: object + properties: + compute_packages: + type: object + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + protocol: + type: string + genome: + type: string + +required: + - samples diff --git a/tests/data/eidodata/schemas/value_check_schema.yaml b/tests/data/eidodata/schemas/value_check_schema.yaml new file mode 100644 index 00000000..fb2352dc --- /dev/null +++ b/tests/data/eidodata/schemas/value_check_schema.yaml @@ -0,0 +1,16 @@ +description: bedboss run-all pep schema +properties: + samples: + items: + properties: + format_type: + description: whether the regions are narrow (transcription factor implies + narrow, histone mark implies broad peaks) + enum: + - narrowPeak + - broadPeak + type: string + type: object + type: array +required: +- samples diff --git a/tests/data/example_peps-master/.gitignore b/tests/data/peppydata/example_peps-master/.gitignore similarity index 100% rename from tests/data/example_peps-master/.gitignore rename to tests/data/peppydata/example_peps-master/.gitignore diff --git a/tests/data/example_peps-master/.pre-commit-config.yaml b/tests/data/peppydata/example_peps-master/.pre-commit-config.yaml similarity index 100% rename from tests/data/example_peps-master/.pre-commit-config.yaml rename to tests/data/peppydata/example_peps-master/.pre-commit-config.yaml diff --git a/tests/data/example_peps-master/README.md b/tests/data/peppydata/example_peps-master/README.md similarity index 100% rename from tests/data/example_peps-master/README.md rename to tests/data/peppydata/example_peps-master/README.md diff --git a/tests/data/example_peps-master/data/frog1_data.txt b/tests/data/peppydata/example_peps-master/data/frog1_data.txt similarity index 100% rename from tests/data/example_peps-master/data/frog1_data.txt rename to tests/data/peppydata/example_peps-master/data/frog1_data.txt diff --git a/tests/data/example_peps-master/data/frog1a_data.txt b/tests/data/peppydata/example_peps-master/data/frog1a_data.txt similarity index 100% rename from tests/data/example_peps-master/data/frog1a_data.txt rename to tests/data/peppydata/example_peps-master/data/frog1a_data.txt diff --git a/tests/data/example_peps-master/data/frog1a_data2.txt b/tests/data/peppydata/example_peps-master/data/frog1a_data2.txt similarity index 100% rename from tests/data/example_peps-master/data/frog1a_data2.txt rename to tests/data/peppydata/example_peps-master/data/frog1a_data2.txt diff --git a/tests/data/example_peps-master/data/frog1b_data.txt b/tests/data/peppydata/example_peps-master/data/frog1b_data.txt similarity index 100% rename from tests/data/example_peps-master/data/frog1b_data.txt rename to tests/data/peppydata/example_peps-master/data/frog1b_data.txt diff --git a/tests/data/example_peps-master/data/frog1b_data2.txt b/tests/data/peppydata/example_peps-master/data/frog1b_data2.txt similarity index 100% rename from tests/data/example_peps-master/data/frog1b_data2.txt rename to tests/data/peppydata/example_peps-master/data/frog1b_data2.txt diff --git a/tests/data/example_peps-master/data/frog1c_data.txt b/tests/data/peppydata/example_peps-master/data/frog1c_data.txt similarity index 100% rename from tests/data/example_peps-master/data/frog1c_data.txt rename to tests/data/peppydata/example_peps-master/data/frog1c_data.txt diff --git a/tests/data/example_peps-master/data/frog1c_data2.txt b/tests/data/peppydata/example_peps-master/data/frog1c_data2.txt similarity index 100% rename from tests/data/example_peps-master/data/frog1c_data2.txt rename to tests/data/peppydata/example_peps-master/data/frog1c_data2.txt diff --git a/tests/data/example_peps-master/data/frog2_data.txt b/tests/data/peppydata/example_peps-master/data/frog2_data.txt similarity index 100% rename from tests/data/example_peps-master/data/frog2_data.txt rename to tests/data/peppydata/example_peps-master/data/frog2_data.txt diff --git a/tests/data/example_peps-master/data/frog2a_data.txt b/tests/data/peppydata/example_peps-master/data/frog2a_data.txt similarity index 100% rename from tests/data/example_peps-master/data/frog2a_data.txt rename to tests/data/peppydata/example_peps-master/data/frog2a_data.txt diff --git a/tests/data/example_peps-master/data/frog2b_data.txt b/tests/data/peppydata/example_peps-master/data/frog2b_data.txt similarity index 100% rename from tests/data/example_peps-master/data/frog2b_data.txt rename to tests/data/peppydata/example_peps-master/data/frog2b_data.txt diff --git a/tests/data/example_peps-master/data/frog3_data.txt b/tests/data/peppydata/example_peps-master/data/frog3_data.txt similarity index 100% rename from tests/data/example_peps-master/data/frog3_data.txt rename to tests/data/peppydata/example_peps-master/data/frog3_data.txt diff --git a/tests/data/example_peps-master/data/frog4_data.txt b/tests/data/peppydata/example_peps-master/data/frog4_data.txt similarity index 100% rename from tests/data/example_peps-master/data/frog4_data.txt rename to tests/data/peppydata/example_peps-master/data/frog4_data.txt diff --git a/tests/data/example_peps-master/example_BiocProject/data/laminB1Lads.bed b/tests/data/peppydata/example_peps-master/example_BiocProject/data/laminB1Lads.bed similarity index 100% rename from tests/data/example_peps-master/example_BiocProject/data/laminB1Lads.bed rename to tests/data/peppydata/example_peps-master/example_BiocProject/data/laminB1Lads.bed diff --git a/tests/data/example_peps-master/example_BiocProject/data/vistaEnhancers.bed b/tests/data/peppydata/example_peps-master/example_BiocProject/data/vistaEnhancers.bed similarity index 100% rename from tests/data/example_peps-master/example_BiocProject/data/vistaEnhancers.bed rename to tests/data/peppydata/example_peps-master/example_BiocProject/data/vistaEnhancers.bed diff --git a/tests/data/example_peps-master/example_BiocProject/project_config.yaml b/tests/data/peppydata/example_peps-master/example_BiocProject/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_BiocProject/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_BiocProject/project_config.yaml diff --git a/tests/data/example_peps-master/example_BiocProject/project_config_resize.yaml b/tests/data/peppydata/example_peps-master/example_BiocProject/project_config_resize.yaml similarity index 100% rename from tests/data/example_peps-master/example_BiocProject/project_config_resize.yaml rename to tests/data/peppydata/example_peps-master/example_BiocProject/project_config_resize.yaml diff --git a/tests/data/example_peps-master/example_BiocProject/readBedFiles.R b/tests/data/peppydata/example_peps-master/example_BiocProject/readBedFiles.R similarity index 100% rename from tests/data/example_peps-master/example_BiocProject/readBedFiles.R rename to tests/data/peppydata/example_peps-master/example_BiocProject/readBedFiles.R diff --git a/tests/data/example_peps-master/example_BiocProject/readBedFiles_resize.R b/tests/data/peppydata/example_peps-master/example_BiocProject/readBedFiles_resize.R similarity index 100% rename from tests/data/example_peps-master/example_BiocProject/readBedFiles_resize.R rename to tests/data/peppydata/example_peps-master/example_BiocProject/readBedFiles_resize.R diff --git a/tests/data/example_peps-master/example_BiocProject/sample_table.csv b/tests/data/peppydata/example_peps-master/example_BiocProject/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_BiocProject/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_BiocProject/sample_table.csv diff --git a/tests/data/example_peps-master/example_BiocProject_exceptions/project_config.yaml b/tests/data/peppydata/example_peps-master/example_BiocProject_exceptions/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_BiocProject_exceptions/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_BiocProject_exceptions/project_config.yaml diff --git a/tests/data/example_peps-master/example_BiocProject_exceptions/readBedFilesExceptions.R b/tests/data/peppydata/example_peps-master/example_BiocProject_exceptions/readBedFilesExceptions.R similarity index 100% rename from tests/data/example_peps-master/example_BiocProject_exceptions/readBedFilesExceptions.R rename to tests/data/peppydata/example_peps-master/example_BiocProject_exceptions/readBedFilesExceptions.R diff --git a/tests/data/example_peps-master/example_BiocProject_exceptions/sample_table.csv b/tests/data/peppydata/example_peps-master/example_BiocProject_exceptions/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_BiocProject_exceptions/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_BiocProject_exceptions/sample_table.csv diff --git a/tests/data/example_peps-master/example_BiocProject_remote/project_config.yaml b/tests/data/peppydata/example_peps-master/example_BiocProject_remote/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_BiocProject_remote/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_BiocProject_remote/project_config.yaml diff --git a/tests/data/example_peps-master/example_BiocProject_remote/project_config_resize.yaml b/tests/data/peppydata/example_peps-master/example_BiocProject_remote/project_config_resize.yaml similarity index 100% rename from tests/data/example_peps-master/example_BiocProject_remote/project_config_resize.yaml rename to tests/data/peppydata/example_peps-master/example_BiocProject_remote/project_config_resize.yaml diff --git a/tests/data/example_peps-master/example_BiocProject_remote/readRemoteData.R b/tests/data/peppydata/example_peps-master/example_BiocProject_remote/readRemoteData.R similarity index 100% rename from tests/data/example_peps-master/example_BiocProject_remote/readRemoteData.R rename to tests/data/peppydata/example_peps-master/example_BiocProject_remote/readRemoteData.R diff --git a/tests/data/example_peps-master/example_BiocProject_remote/readRemoteData_resize.R b/tests/data/peppydata/example_peps-master/example_BiocProject_remote/readRemoteData_resize.R similarity index 100% rename from tests/data/example_peps-master/example_BiocProject_remote/readRemoteData_resize.R rename to tests/data/peppydata/example_peps-master/example_BiocProject_remote/readRemoteData_resize.R diff --git a/tests/data/example_peps-master/example_BiocProject_remote/sample_table.csv b/tests/data/peppydata/example_peps-master/example_BiocProject_remote/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_BiocProject_remote/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_BiocProject_remote/sample_table.csv diff --git a/tests/data/example_peps-master/example_amendments1/project_config.yaml b/tests/data/peppydata/example_peps-master/example_amendments1/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_amendments1/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_amendments1/project_config.yaml diff --git a/tests/data/example_peps-master/example_amendments1/sample_table.csv b/tests/data/peppydata/example_peps-master/example_amendments1/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_amendments1/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_amendments1/sample_table.csv diff --git a/tests/data/example_peps-master/example_amendments1/sample_table_newLib.csv b/tests/data/peppydata/example_peps-master/example_amendments1/sample_table_newLib.csv similarity index 100% rename from tests/data/example_peps-master/example_amendments1/sample_table_newLib.csv rename to tests/data/peppydata/example_peps-master/example_amendments1/sample_table_newLib.csv diff --git a/tests/data/example_peps-master/example_amendments1/sample_table_newLib2.csv b/tests/data/peppydata/example_peps-master/example_amendments1/sample_table_newLib2.csv similarity index 100% rename from tests/data/example_peps-master/example_amendments1/sample_table_newLib2.csv rename to tests/data/peppydata/example_peps-master/example_amendments1/sample_table_newLib2.csv diff --git a/tests/data/example_peps-master/example_amendments1/sample_table_pre.csv b/tests/data/peppydata/example_peps-master/example_amendments1/sample_table_pre.csv similarity index 100% rename from tests/data/example_peps-master/example_amendments1/sample_table_pre.csv rename to tests/data/peppydata/example_peps-master/example_amendments1/sample_table_pre.csv diff --git a/tests/data/peppydata/example_peps-master/example_amendments1/win_project_config.yaml b/tests/data/peppydata/example_peps-master/example_amendments1/win_project_config.yaml new file mode 100644 index 00000000..03347a84 --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_amendments1/win_project_config.yaml @@ -0,0 +1,16 @@ +pep_version: "2.0.0" +sample_table: sample_table.csv +output_dir: "%USERPROFILE%/hello_looper_results" + +sample_modifiers: + derive: + attributes: [file_path] + sources: + source1: /data/lab/project/{organism}_{time}h.fastq + source2: /path/from/collaborator/weirdNamingScheme_{external_id}.fastq +project_modifiers: + amend: + newLib: + sample_table: sample_table_newLib.csv + newLib2: + sample_table: sample_table_newLib2.csv diff --git a/tests/data/example_peps-master/example_amendments2/project_config.yaml b/tests/data/peppydata/example_peps-master/example_amendments2/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_amendments2/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_amendments2/project_config.yaml diff --git a/tests/data/example_peps-master/example_amendments2/sample_table.csv b/tests/data/peppydata/example_peps-master/example_amendments2/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_amendments2/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_amendments2/sample_table.csv diff --git a/tests/data/example_peps-master/example_amendments2/sample_table_noFrog.csv b/tests/data/peppydata/example_peps-master/example_amendments2/sample_table_noFrog.csv similarity index 100% rename from tests/data/example_peps-master/example_amendments2/sample_table_noFrog.csv rename to tests/data/peppydata/example_peps-master/example_amendments2/sample_table_noFrog.csv diff --git a/tests/data/example_peps-master/example_amendments2/sample_table_pre.csv b/tests/data/peppydata/example_peps-master/example_amendments2/sample_table_pre.csv similarity index 100% rename from tests/data/example_peps-master/example_amendments2/sample_table_pre.csv rename to tests/data/peppydata/example_peps-master/example_amendments2/sample_table_pre.csv diff --git a/tests/data/example_peps-master/example_append/project_config.yaml b/tests/data/peppydata/example_peps-master/example_append/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_append/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_append/project_config.yaml diff --git a/tests/data/example_peps-master/example_append/sample_table.csv b/tests/data/peppydata/example_peps-master/example_append/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_append/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_append/sample_table.csv diff --git a/tests/data/example_peps-master/example_append/sample_table_pre.csv b/tests/data/peppydata/example_peps-master/example_append/sample_table_pre.csv similarity index 100% rename from tests/data/example_peps-master/example_append/sample_table_pre.csv rename to tests/data/peppydata/example_peps-master/example_append/sample_table_pre.csv diff --git a/tests/data/example_peps-master/example_automerge/project_config.yaml b/tests/data/peppydata/example_peps-master/example_automerge/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_automerge/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_automerge/project_config.yaml diff --git a/tests/data/example_peps-master/example_automerge/sample_table.csv b/tests/data/peppydata/example_peps-master/example_automerge/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_automerge/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_automerge/sample_table.csv diff --git a/tests/data/example_peps-master/example_basic/project_config.yaml b/tests/data/peppydata/example_peps-master/example_basic/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_basic/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_basic/project_config.yaml diff --git a/tests/data/example_peps-master/example_basic/sample_table.csv b/tests/data/peppydata/example_peps-master/example_basic/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_basic/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_basic/sample_table.csv diff --git a/tests/data/example_peps-master/example_basic_sample_yaml/sample.yaml b/tests/data/peppydata/example_peps-master/example_basic_sample_yaml/sample.yaml similarity index 100% rename from tests/data/example_peps-master/example_basic_sample_yaml/sample.yaml rename to tests/data/peppydata/example_peps-master/example_basic_sample_yaml/sample.yaml diff --git a/tests/data/example_peps-master/example_custom_index/project_config.yaml b/tests/data/peppydata/example_peps-master/example_custom_index/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_custom_index/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_custom_index/project_config.yaml diff --git a/tests/data/example_peps-master/example_custom_index/sample_table.csv b/tests/data/peppydata/example_peps-master/example_custom_index/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_custom_index/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_custom_index/sample_table.csv diff --git a/tests/data/example_peps-master/example_derive/project_config.yaml b/tests/data/peppydata/example_peps-master/example_derive/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_derive/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_derive/project_config.yaml diff --git a/tests/data/example_peps-master/example_derive/sample_table.csv b/tests/data/peppydata/example_peps-master/example_derive/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_derive/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_derive/sample_table.csv diff --git a/tests/data/example_peps-master/example_derive/sample_table_pre.csv b/tests/data/peppydata/example_peps-master/example_derive/sample_table_pre.csv similarity index 100% rename from tests/data/example_peps-master/example_derive/sample_table_pre.csv rename to tests/data/peppydata/example_peps-master/example_derive/sample_table_pre.csv diff --git a/tests/data/example_peps-master/example_derive_imply/project_config.yaml b/tests/data/peppydata/example_peps-master/example_derive_imply/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_derive_imply/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_derive_imply/project_config.yaml diff --git a/tests/data/example_peps-master/example_derive_imply/sample_table.csv b/tests/data/peppydata/example_peps-master/example_derive_imply/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_derive_imply/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_derive_imply/sample_table.csv diff --git a/tests/data/example_peps-master/example_derive_imply/sample_table_pre.csv b/tests/data/peppydata/example_peps-master/example_derive_imply/sample_table_pre.csv similarity index 100% rename from tests/data/example_peps-master/example_derive_imply/sample_table_pre.csv rename to tests/data/peppydata/example_peps-master/example_derive_imply/sample_table_pre.csv diff --git a/tests/data/example_peps-master/example_duplicate/project_config.yaml b/tests/data/peppydata/example_peps-master/example_duplicate/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_duplicate/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_duplicate/project_config.yaml diff --git a/tests/data/example_peps-master/example_duplicate/sample_table.csv b/tests/data/peppydata/example_peps-master/example_duplicate/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_duplicate/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_duplicate/sample_table.csv diff --git a/tests/data/example_peps-master/example_imply/project_config.yaml b/tests/data/peppydata/example_peps-master/example_imply/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_imply/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_imply/project_config.yaml diff --git a/tests/data/example_peps-master/example_imply/sample_table.csv b/tests/data/peppydata/example_peps-master/example_imply/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_imply/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_imply/sample_table.csv diff --git a/tests/data/example_peps-master/example_imply/sample_table_pre.csv b/tests/data/peppydata/example_peps-master/example_imply/sample_table_pre.csv similarity index 100% rename from tests/data/example_peps-master/example_imply/sample_table_pre.csv rename to tests/data/peppydata/example_peps-master/example_imply/sample_table_pre.csv diff --git a/tests/data/example_peps-master/example_imports/project_config.yaml b/tests/data/peppydata/example_peps-master/example_imports/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_imports/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_imports/project_config.yaml diff --git a/tests/data/example_peps-master/example_imports/project_config1.yaml b/tests/data/peppydata/example_peps-master/example_imports/project_config1.yaml similarity index 100% rename from tests/data/example_peps-master/example_imports/project_config1.yaml rename to tests/data/peppydata/example_peps-master/example_imports/project_config1.yaml diff --git a/tests/data/example_peps-master/example_imports/sample_table.csv b/tests/data/peppydata/example_peps-master/example_imports/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_imports/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_imports/sample_table.csv diff --git a/tests/data/example_peps-master/example_incorrect_index/project_config.yaml b/tests/data/peppydata/example_peps-master/example_incorrect_index/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_incorrect_index/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_incorrect_index/project_config.yaml diff --git a/tests/data/example_peps-master/example_incorrect_index/sample_table.csv b/tests/data/peppydata/example_peps-master/example_incorrect_index/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_incorrect_index/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_incorrect_index/sample_table.csv diff --git a/tests/data/peppydata/example_peps-master/example_issue499/project_config.yaml b/tests/data/peppydata/example_peps-master/example_issue499/project_config.yaml new file mode 100644 index 00000000..ee846cb7 --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_issue499/project_config.yaml @@ -0,0 +1,10 @@ +pep_version: "2.0.0" +sample_table: sample_table.csv +output_dir: "$HOME/hello_looper_results" + +sample_modifiers: + derive: + attributes: [file_path] + sources: + source1: $PROJECT/{organism}_{time}h.fastq + source2: $COLLABORATOR/weirdNamingScheme_{external_id}.fastq diff --git a/tests/data/example_peps-master/example_remove/sample_table.csv b/tests/data/peppydata/example_peps-master/example_issue499/sample_table.csv old mode 100644 new mode 100755 similarity index 100% rename from tests/data/example_peps-master/example_remove/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_issue499/sample_table.csv diff --git a/tests/data/peppydata/example_peps-master/example_issue499/sample_table_pre.csv b/tests/data/peppydata/example_peps-master/example_issue499/sample_table_pre.csv new file mode 100755 index 00000000..159fc341 --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_issue499/sample_table_pre.csv @@ -0,0 +1,5 @@ +sample_name,protocol,organism,time,file_path +pig_0h,RRBS,pig,0,data/lab/project/pig_0h.fastq +pig_1h,RRBS,pig,1,data/lab/project/pig_1h.fastq +frog_0h,RRBS,frog,0,data/lab/project/frog_0h.fastq +frog_1h,RRBS,frog,1,data/lab/project/frog_1h.fastq diff --git a/tests/data/example_peps-master/example_missing_version/project_config.yaml b/tests/data/peppydata/example_peps-master/example_missing_version/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_missing_version/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_missing_version/project_config.yaml diff --git a/tests/data/example_peps-master/example_missing_version/sample_table.csv b/tests/data/peppydata/example_peps-master/example_missing_version/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_missing_version/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_missing_version/sample_table.csv diff --git a/tests/data/example_peps-master/example_multiple_subsamples/project_config.yaml b/tests/data/peppydata/example_peps-master/example_multiple_subsamples/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_multiple_subsamples/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_multiple_subsamples/project_config.yaml diff --git a/tests/data/peppydata/example_peps-master/example_multiple_subsamples/sample_table.csv b/tests/data/peppydata/example_peps-master/example_multiple_subsamples/sample_table.csv new file mode 100644 index 00000000..7c06204c --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_multiple_subsamples/sample_table.csv @@ -0,0 +1,5 @@ +sample_id,protocol,identifier +frog_1,anySampleType,frog1 +frog_2,anySampleType,frog2 +frog_3,anySampleType,frog3 +frog_4,anySampleType,frog4 diff --git a/tests/data/peppydata/example_peps-master/example_multiple_subsamples/subsample_table1.csv b/tests/data/peppydata/example_peps-master/example_multiple_subsamples/subsample_table1.csv new file mode 100644 index 00000000..f1b3c2f1 --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_multiple_subsamples/subsample_table1.csv @@ -0,0 +1,6 @@ +sample_id,file_path,subsample_name +frog_1,file/a.txt,a +frog_1,file/b.txt,b +frog_1,file/c.txt,c +frog_2,file/a.txt,a +frog_2,file/b.txt,b diff --git a/tests/data/peppydata/example_peps-master/example_multiple_subsamples/subsample_table2.csv b/tests/data/peppydata/example_peps-master/example_multiple_subsamples/subsample_table2.csv new file mode 100644 index 00000000..5e6d2981 --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_multiple_subsamples/subsample_table2.csv @@ -0,0 +1,6 @@ +sample_id,random_string,subsample_name +frog_1,x_x,x +frog_1,y_y,y +frog_1,z_z,z +frog_2,xy_yx,xy +frog_2,xx_xx,xx diff --git a/tests/data/peppydata/example_peps-master/example_nextflow_config/project_config.yaml b/tests/data/peppydata/example_peps-master/example_nextflow_config/project_config.yaml new file mode 100644 index 00000000..51cc3784 --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_nextflow_config/project_config.yaml @@ -0,0 +1,3 @@ +pep_version: "2.1.0" +sample_table: "samplesheet.csv" + diff --git a/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet.csv b/tests/data/peppydata/example_peps-master/example_nextflow_config/samplesheet.csv similarity index 100% rename from tests/data/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet.csv rename to tests/data/peppydata/example_peps-master/example_nextflow_config/samplesheet.csv diff --git a/tests/data/example_peps-master/example_nextflow_samplesheet/sample_table.csv b/tests/data/peppydata/example_peps-master/example_nextflow_samplesheet/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_nextflow_samplesheet/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_nextflow_samplesheet/sample_table.csv diff --git a/tests/data/example_peps-master/example_nextflow_subsamples/project_config.yaml b/tests/data/peppydata/example_peps-master/example_nextflow_subsamples/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_nextflow_subsamples/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_nextflow_subsamples/project_config.yaml diff --git a/tests/data/peppydata/example_peps-master/example_nextflow_subsamples/samplesheet.csv b/tests/data/peppydata/example_peps-master/example_nextflow_subsamples/samplesheet.csv new file mode 100644 index 00000000..a26933ef --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_nextflow_subsamples/samplesheet.csv @@ -0,0 +1,6 @@ +sample,strandedness +WT_REP1,reverse +WT_REP2,reverse +RAP1_UNINDUCED_REP1,reverse +RAP1_UNINDUCED_REP2,reverse +RAP1_IAA_30M_REP1,reverse diff --git a/tests/data/peppydata/example_peps-master/example_nextflow_subsamples/subsamplesheet.csv b/tests/data/peppydata/example_peps-master/example_nextflow_subsamples/subsamplesheet.csv new file mode 100644 index 00000000..1e56c363 --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_nextflow_subsamples/subsamplesheet.csv @@ -0,0 +1,8 @@ +sample,instrument_platform,run_accession,fastq_1,fastq_2 +WT_REP1,ABI_SOLID,runaccession1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_2.fastq.gz +WT_REP1,BGISEQ,runaccession2,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_2.fastq.gz +WT_REP2,CAPILLARY,123123123,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_2.fastq.gz +RAP1_UNINDUCED_REP1,COMPLETE_GENOMICS,somerunaccesion,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357073_1.fastq.gz, +RAP1_UNINDUCED_REP2,DNBSEQ,ERR2412421,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357074_1.fastq.gz, +RAP1_UNINDUCED_REP2,HELICOS,xxxxxxxxxx,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357075_1.fastq.gz, +RAP1_IAA_30M_REP1,ILLUMINA,None,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_2.fastq.gz diff --git a/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/config.yaml b/tests/data/peppydata/example_peps-master/example_nextflow_taxprofiler_pep/config.yaml similarity index 100% rename from tests/data/example_peps-master/example_nextflow_taxprofiler_pep/config.yaml rename to tests/data/peppydata/example_peps-master/example_nextflow_taxprofiler_pep/config.yaml diff --git a/tests/data/peppydata/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet.csv b/tests/data/peppydata/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet.csv new file mode 100644 index 00000000..1b17b767 --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet.csv @@ -0,0 +1,7 @@ +sample,instrument_platform,run_accession,fastq_1,fastq_2,fasta +2611,ILLUMINA,ERR5766174,,,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fasta/ERX5474930_ERR5766174_1.fa.gz +2612,ILLUMINA,ERR5766176,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_2.fastq.gz, +2612,ILLUMINA,ERR5766176_B,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_B_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_B_2.fastq.gz, +2612,ILLUMINA,ERR5766180,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474936_ERR5766180_1.fastq.gz,, +2613,ILLUMINA,ERR5766181,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474937_ERR5766181_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474937_ERR5766181_2.fastq.gz, +ERR3201952,OXFORD_NANOPORE,ERR3201952,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERR3201952.fastq.gz,, diff --git a/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet_schema.yaml b/tests/data/peppydata/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet_schema.yaml similarity index 100% rename from tests/data/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet_schema.yaml rename to tests/data/peppydata/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet_schema.yaml diff --git a/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/test_nextflow_original_samplesheet.csv b/tests/data/peppydata/example_peps-master/example_nextflow_taxprofiler_pep/test_nextflow_original_samplesheet.csv similarity index 100% rename from tests/data/example_peps-master/example_nextflow_taxprofiler_pep/test_nextflow_original_samplesheet.csv rename to tests/data/peppydata/example_peps-master/example_nextflow_taxprofiler_pep/test_nextflow_original_samplesheet.csv diff --git a/tests/data/example_peps-master/example_node_alias/README.md b/tests/data/peppydata/example_peps-master/example_node_alias/README.md similarity index 100% rename from tests/data/example_peps-master/example_node_alias/README.md rename to tests/data/peppydata/example_peps-master/example_node_alias/README.md diff --git a/tests/data/example_peps-master/example_node_alias/project_config.yaml b/tests/data/peppydata/example_peps-master/example_node_alias/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_node_alias/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_node_alias/project_config.yaml diff --git a/tests/data/example_peps-master/example_node_alias/project_config1.yaml b/tests/data/peppydata/example_peps-master/example_node_alias/project_config1.yaml similarity index 100% rename from tests/data/example_peps-master/example_node_alias/project_config1.yaml rename to tests/data/peppydata/example_peps-master/example_node_alias/project_config1.yaml diff --git a/tests/data/example_peps-master/example_node_alias/sample_table.csv b/tests/data/peppydata/example_peps-master/example_node_alias/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_node_alias/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_node_alias/sample_table.csv diff --git a/tests/data/example_peps-master/example_noname/project_config.yaml b/tests/data/peppydata/example_peps-master/example_noname/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_noname/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_noname/project_config.yaml diff --git a/tests/data/example_peps-master/example_noname/project_config_noname.yaml b/tests/data/peppydata/example_peps-master/example_noname/project_config_noname.yaml similarity index 100% rename from tests/data/example_peps-master/example_noname/project_config_noname.yaml rename to tests/data/peppydata/example_peps-master/example_noname/project_config_noname.yaml diff --git a/tests/data/example_peps-master/example_noname/sample_table.csv b/tests/data/peppydata/example_peps-master/example_noname/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_noname/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_noname/sample_table.csv diff --git a/tests/data/example_peps-master/example_old/project_config.yaml b/tests/data/peppydata/example_peps-master/example_old/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_old/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_old/project_config.yaml diff --git a/tests/data/example_peps-master/example_old/sample_table.csv b/tests/data/peppydata/example_peps-master/example_old/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_old/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_old/sample_table.csv diff --git a/tests/data/example_peps-master/example_piface/annotation_sheet.csv b/tests/data/peppydata/example_peps-master/example_piface/annotation_sheet.csv similarity index 100% rename from tests/data/example_peps-master/example_piface/annotation_sheet.csv rename to tests/data/peppydata/example_peps-master/example_piface/annotation_sheet.csv diff --git a/tests/data/example_peps-master/example_piface/output_schema.yaml b/tests/data/peppydata/example_peps-master/example_piface/output_schema.yaml similarity index 100% rename from tests/data/example_peps-master/example_piface/output_schema.yaml rename to tests/data/peppydata/example_peps-master/example_piface/output_schema.yaml diff --git a/tests/data/example_peps-master/example_piface/output_schema_project.yaml b/tests/data/peppydata/example_peps-master/example_piface/output_schema_project.yaml similarity index 100% rename from tests/data/example_peps-master/example_piface/output_schema_project.yaml rename to tests/data/peppydata/example_peps-master/example_piface/output_schema_project.yaml diff --git a/tests/data/example_peps-master/example_piface/output_schema_sample.yaml b/tests/data/peppydata/example_peps-master/example_piface/output_schema_sample.yaml similarity index 100% rename from tests/data/example_peps-master/example_piface/output_schema_sample.yaml rename to tests/data/peppydata/example_peps-master/example_piface/output_schema_sample.yaml diff --git a/tests/data/example_peps-master/example_piface/pipeline_interface1_project.yaml b/tests/data/peppydata/example_peps-master/example_piface/pipeline_interface1_project.yaml similarity index 100% rename from tests/data/example_peps-master/example_piface/pipeline_interface1_project.yaml rename to tests/data/peppydata/example_peps-master/example_piface/pipeline_interface1_project.yaml diff --git a/tests/data/example_peps-master/example_piface/pipeline_interface1_sample.yaml b/tests/data/peppydata/example_peps-master/example_piface/pipeline_interface1_sample.yaml similarity index 100% rename from tests/data/example_peps-master/example_piface/pipeline_interface1_sample.yaml rename to tests/data/peppydata/example_peps-master/example_piface/pipeline_interface1_sample.yaml diff --git a/tests/data/example_peps-master/example_piface/pipeline_interface2_project.yaml b/tests/data/peppydata/example_peps-master/example_piface/pipeline_interface2_project.yaml similarity index 100% rename from tests/data/example_peps-master/example_piface/pipeline_interface2_project.yaml rename to tests/data/peppydata/example_peps-master/example_piface/pipeline_interface2_project.yaml diff --git a/tests/data/example_peps-master/example_piface/pipeline_interface2_sample.yaml b/tests/data/peppydata/example_peps-master/example_piface/pipeline_interface2_sample.yaml similarity index 100% rename from tests/data/example_peps-master/example_piface/pipeline_interface2_sample.yaml rename to tests/data/peppydata/example_peps-master/example_piface/pipeline_interface2_sample.yaml diff --git a/tests/data/example_peps-master/example_piface/project_config.yaml b/tests/data/peppydata/example_peps-master/example_piface/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_piface/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_piface/project_config.yaml diff --git a/tests/data/example_peps-master/example_piface/readData.R b/tests/data/peppydata/example_peps-master/example_piface/readData.R similarity index 100% rename from tests/data/example_peps-master/example_piface/readData.R rename to tests/data/peppydata/example_peps-master/example_piface/readData.R diff --git a/tests/data/example_peps-master/example_project_as_dictionary/project.json b/tests/data/peppydata/example_peps-master/example_project_as_dictionary/project.json similarity index 100% rename from tests/data/example_peps-master/example_project_as_dictionary/project.json rename to tests/data/peppydata/example_peps-master/example_project_as_dictionary/project.json diff --git a/tests/data/example_peps-master/example_remove/project_config.yaml b/tests/data/peppydata/example_peps-master/example_remove/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_remove/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_remove/project_config.yaml diff --git a/tests/data/peppydata/example_peps-master/example_remove/sample_table.csv b/tests/data/peppydata/example_peps-master/example_remove/sample_table.csv new file mode 100644 index 00000000..bcfd9bde --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_remove/sample_table.csv @@ -0,0 +1,5 @@ +sample_name,protocol,organism,time,file_path +pig_0h,RRBS,pig,0,source1 +pig_1h,RRBS,pig,1,source1 +frog_0h,RRBS,frog,0,source1 +frog_1h,RRBS,frog,1,source1 diff --git a/tests/data/example_peps-master/example_subsamples_none/project_config.yaml b/tests/data/peppydata/example_peps-master/example_subsamples_none/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_subsamples_none/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_subsamples_none/project_config.yaml diff --git a/tests/data/example_peps-master/example_subsamples_none/sample_table.csv b/tests/data/peppydata/example_peps-master/example_subsamples_none/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subsamples_none/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_subsamples_none/sample_table.csv diff --git a/tests/data/example_peps-master/example_subtable1/project_config.yaml b/tests/data/peppydata/example_peps-master/example_subtable1/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_subtable1/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_subtable1/project_config.yaml diff --git a/tests/data/example_peps-master/example_subtable1/sample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable1/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable1/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable1/sample_table.csv diff --git a/tests/data/example_peps-master/example_subtable1/subsample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable1/subsample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable1/subsample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable1/subsample_table.csv diff --git a/tests/data/example_peps-master/example_subtable2/project_config.yaml b/tests/data/peppydata/example_peps-master/example_subtable2/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_subtable2/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_subtable2/project_config.yaml diff --git a/tests/data/example_peps-master/example_subtable2/sample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable2/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable2/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable2/sample_table.csv diff --git a/tests/data/example_peps-master/example_subtable2/subsample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable2/subsample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable2/subsample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable2/subsample_table.csv diff --git a/tests/data/example_peps-master/example_subtable3/project_config.yaml b/tests/data/peppydata/example_peps-master/example_subtable3/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_subtable3/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_subtable3/project_config.yaml diff --git a/tests/data/example_peps-master/example_subtable3/sample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable3/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable3/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable3/sample_table.csv diff --git a/tests/data/example_peps-master/example_subtable3/subsample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable3/subsample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable3/subsample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable3/subsample_table.csv diff --git a/tests/data/example_peps-master/example_subtable4/project_config.yaml b/tests/data/peppydata/example_peps-master/example_subtable4/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_subtable4/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_subtable4/project_config.yaml diff --git a/tests/data/example_peps-master/example_subtable4/sample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable4/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable4/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable4/sample_table.csv diff --git a/tests/data/example_peps-master/example_subtable4/subsample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable4/subsample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable4/subsample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable4/subsample_table.csv diff --git a/tests/data/example_peps-master/example_subtable5/project_config.yaml b/tests/data/peppydata/example_peps-master/example_subtable5/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_subtable5/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_subtable5/project_config.yaml diff --git a/tests/data/example_peps-master/example_subtable5/sample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable5/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable5/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable5/sample_table.csv diff --git a/tests/data/example_peps-master/example_subtable5/subsample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable5/subsample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable5/subsample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable5/subsample_table.csv diff --git a/tests/data/example_peps-master/example_subtable_automerge/project_config.yaml b/tests/data/peppydata/example_peps-master/example_subtable_automerge/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_subtable_automerge/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_subtable_automerge/project_config.yaml diff --git a/tests/data/example_peps-master/example_subtable_automerge/sample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable_automerge/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable_automerge/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable_automerge/sample_table.csv diff --git a/tests/data/example_peps-master/example_subtable_automerge/subsample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable_automerge/subsample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable_automerge/subsample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable_automerge/subsample_table.csv diff --git a/tests/data/example_peps-master/example_subtables/project_config.yaml b/tests/data/peppydata/example_peps-master/example_subtables/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_subtables/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_subtables/project_config.yaml diff --git a/tests/data/example_peps-master/example_subtables/sample_table.csv b/tests/data/peppydata/example_peps-master/example_subtables/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtables/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtables/sample_table.csv diff --git a/tests/data/example_peps-master/example_subtables/subsample_table.csv b/tests/data/peppydata/example_peps-master/example_subtables/subsample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtables/subsample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtables/subsample_table.csv diff --git a/tests/data/example_peps-master/example_subtables/subsample_table1.csv b/tests/data/peppydata/example_peps-master/example_subtables/subsample_table1.csv similarity index 100% rename from tests/data/example_peps-master/example_subtables/subsample_table1.csv rename to tests/data/peppydata/example_peps-master/example_subtables/subsample_table1.csv diff --git a/tests/data/example_peps-master/output/results_pipeline/sample1/other_pipeline1/sample1_GSM2471255_1.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample1/other_pipeline1/sample1_GSM2471255_1.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample1/other_pipeline1/sample1_GSM2471255_1.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample1/other_pipeline1/sample1_GSM2471255_1.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample1/other_pipeline1/sample1_GSM2471255_2.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample1/other_pipeline1/sample1_GSM2471255_2.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample1/other_pipeline1/sample1_GSM2471255_2.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample1/other_pipeline1/sample1_GSM2471255_2.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample1/pipeline1/sample1_GSM2471255_1.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample1/pipeline1/sample1_GSM2471255_1.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample1/pipeline1/sample1_GSM2471255_1.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample1/pipeline1/sample1_GSM2471255_1.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample1/pipeline1/sample1_GSM2471255_2.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample1/pipeline1/sample1_GSM2471255_2.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample1/pipeline1/sample1_GSM2471255_2.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample1/pipeline1/sample1_GSM2471255_2.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample2/other_pipeline1/sample2_GSM2471300_1.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample2/other_pipeline1/sample2_GSM2471300_1.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample2/other_pipeline1/sample2_GSM2471300_1.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample2/other_pipeline1/sample2_GSM2471300_1.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample2/other_pipeline1/sample2_GSM2471300_2.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample2/other_pipeline1/sample2_GSM2471300_2.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample2/other_pipeline1/sample2_GSM2471300_2.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample2/other_pipeline1/sample2_GSM2471300_2.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample2/pipeline1/sample2_GSM2471300_1.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample2/pipeline1/sample2_GSM2471300_1.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample2/pipeline1/sample2_GSM2471300_1.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample2/pipeline1/sample2_GSM2471300_1.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample2/pipeline1/sample2_GSM2471300_2.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample2/pipeline1/sample2_GSM2471300_2.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample2/pipeline1/sample2_GSM2471300_2.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample2/pipeline1/sample2_GSM2471300_2.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample3/other_pipeline2/sample3_GSM2471249_1.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample3/other_pipeline2/sample3_GSM2471249_1.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample3/other_pipeline2/sample3_GSM2471249_1.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample3/other_pipeline2/sample3_GSM2471249_1.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample3/other_pipeline2/sample3_GSM2471249_2.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample3/other_pipeline2/sample3_GSM2471249_2.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample3/other_pipeline2/sample3_GSM2471249_2.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample3/other_pipeline2/sample3_GSM2471249_2.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample3/pipeline2/sample3_GSM2471249_1.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample3/pipeline2/sample3_GSM2471249_1.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample3/pipeline2/sample3_GSM2471249_1.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample3/pipeline2/sample3_GSM2471249_1.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample3/pipeline2/sample3_GSM2471249_2.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample3/pipeline2/sample3_GSM2471249_2.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample3/pipeline2/sample3_GSM2471249_2.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample3/pipeline2/sample3_GSM2471249_2.bw diff --git a/tests/data/example_peps-master/subannotation.ipynb b/tests/data/peppydata/example_peps-master/subannotation.ipynb similarity index 100% rename from tests/data/example_peps-master/subannotation.ipynb rename to tests/data/peppydata/example_peps-master/subannotation.ipynb diff --git a/tests/data/phcdata/sample_pep/sample_table.csv b/tests/data/phcdata/sample_pep/sample_table.csv new file mode 100644 index 00000000..00039113 --- /dev/null +++ b/tests/data/phcdata/sample_pep/sample_table.csv @@ -0,0 +1,5 @@ +file,file_id,protocol,identifier,sample_name +local_files,,anySampleType,frog1,frog_1 +local_files_unmerged,,anySampleType,frog2,frog_2 +local_files_unmerged,,anySampleType,frog3,frog_3 +local_files_unmerged,,anySampleType,frog4,frog_4 diff --git a/tests/data/phcdata/sample_pep/subsamp_config.yaml b/tests/data/phcdata/sample_pep/subsamp_config.yaml new file mode 100644 index 00000000..06fae282 --- /dev/null +++ b/tests/data/phcdata/sample_pep/subsamp_config.yaml @@ -0,0 +1,16 @@ +description: This project contains subsamples! +looper: + output_dir: $HOME/hello_looper_results + pipeline_interfaces: + - ../pipeline/pipeline_interface.yaml +name: subsamp +pep_version: 2.0.0 +sample_modifiers: + derive: + attributes: + - file + sources: + local_files: ../data/{identifier}{file_id}_data.txt + local_files_unmerged: ../data/{identifier}*_data.txt +sample_table: sample_table.csv +subsample_table: subsample_table.csv diff --git a/tests/data/phcdata/sample_pep/subsample_table.csv b/tests/data/phcdata/sample_pep/subsample_table.csv new file mode 100644 index 00000000..e7cd6400 --- /dev/null +++ b/tests/data/phcdata/sample_pep/subsample_table.csv @@ -0,0 +1,4 @@ +file_id,sample_name +a,frog_1 +b,frog_1 +c,frog_1 diff --git a/tests/eidotests/conftest.py b/tests/eidotests/conftest.py new file mode 100644 index 00000000..bb46a5b8 --- /dev/null +++ b/tests/eidotests/conftest.py @@ -0,0 +1,164 @@ +import os + +import pytest +from peppy import Project + + +@pytest.fixture +def data_path(): + return os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data", "eidodata" + ) + + +@pytest.fixture +def schemas_path(data_path): + return os.path.join(data_path, "schemas") + + +@pytest.fixture +def peps_path(data_path): + return os.path.join(data_path, "peps") + + +@pytest.fixture +def project_file_path(peps_path): + return os.path.join(peps_path, "test_pep", "test_cfg.yaml") + + +@pytest.fixture +def project_table_path(peps_path): + return os.path.join(peps_path, "test_pep", "test_sample_table.csv") + + +@pytest.fixture +def project_object(project_file_path): + return Project(project_file_path) + + +@pytest.fixture +def schema_file_path(schemas_path): + return os.path.join(schemas_path, "test_schema.yaml") + + +@pytest.fixture +def schema_samples_file_path(schemas_path): + return os.path.join(schemas_path, "test_schema_samples.yaml") + + +@pytest.fixture +def schema_invalid_file_path(schemas_path): + return os.path.join(schemas_path, "test_schema_invalid.yaml") + + +@pytest.fixture +def schema_sample_invalid_file_path(schemas_path): + return os.path.join(schemas_path, "test_schema_sample_invalid.yaml") + + +@pytest.fixture +def schema_imports_file_path(schemas_path): + return os.path.join(schemas_path, "test_schema_imports.yaml") + + +@pytest.fixture +def schema_rel_path_imports_file_path(schemas_path): + return os.path.join(schemas_path, "test_schema_imports_rel_path.yaml") + + +@pytest.fixture +def taxprofiler_project_path(peps_path): + return os.path.join(peps_path, "multiline_output", "config.yaml") + + +@pytest.fixture +def taxprofiler_project(taxprofiler_project_path): + return Project(taxprofiler_project_path) + + +@pytest.fixture +def path_to_taxprofiler_csv_multiline_output(peps_path): + return os.path.join(peps_path, "multiline_output", "multiline_output.csv") + + +@pytest.fixture +def path_pep_with_fasta_column(peps_path): + return os.path.join(peps_path, "pep_with_fasta_column", "config.yaml") + + +@pytest.fixture +def project_pep_with_fasta_column(path_pep_with_fasta_column): + return Project(path_pep_with_fasta_column, sample_table_index="sample") + + +@pytest.fixture +def output_pep_with_fasta_column(path_pep_with_fasta_column): + with open( + os.path.join(os.path.dirname(path_pep_with_fasta_column), "output.csv") + ) as f: + return f.read() + + +@pytest.fixture +def taxprofiler_csv_multiline_output(path_to_taxprofiler_csv_multiline_output): + with open(path_to_taxprofiler_csv_multiline_output, "r") as file: + data = file.read() + return data + # This is broken unless I add na_filter=False. But it's a bad idea anyway, since + # we're just using this for string comparison anyway... + # return pd.read_csv( + # path_to_taxprofiler_csv_multiline_output, na_filter=False + # ).to_csv(path_or_buf=None, index=None) + + +@pytest.fixture +def path_pep_for_schema_with_rel_path(peps_path): + return os.path.join(peps_path, "pep_schema_rel_path", "config.yaml") + + +@pytest.fixture +def path_pep_nextflow_taxprofiler(peps_path): + return os.path.join(peps_path, "pep_nextflow_taxprofiler", "config.yaml") + + +@pytest.fixture +def project_pep_nextflow_taxprofiler(path_pep_nextflow_taxprofiler): + return Project(path_pep_nextflow_taxprofiler, sample_table_index="sample") + + +@pytest.fixture +def output_pep_nextflow_taxprofiler(path_pep_nextflow_taxprofiler): + with open( + os.path.join(os.path.dirname(path_pep_nextflow_taxprofiler), "output.csv") + ) as f: + return f.read() + + +@pytest.fixture +def save_result_mock(mocker): + return mocker.patch("peppy.eido.conversion.save_result") + + +@pytest.fixture +def test_file_existing_schema(schemas_path): + return os.path.join(schemas_path, "schema_test_file_exist.yaml") + + +@pytest.fixture +def test_file_existing_pep(peps_path): + return os.path.join(peps_path, "test_file_existing", "project_config.yaml") + + +@pytest.fixture +def test_schema_value_check(schemas_path): + return os.path.join(schemas_path, "value_check_schema.yaml") + + +@pytest.fixture +def test_file_value_check(peps_path): + return os.path.join(peps_path, "value_check_pep", "project_config.yaml") + + +@pytest.fixture +def test_multiple_subs(peps_path): + return os.path.join(peps_path, "multiple_subsamples", "project_config.yaml") diff --git a/tests/eidotests/test_conversions.py b/tests/eidotests/test_conversions.py new file mode 100644 index 00000000..e821030b --- /dev/null +++ b/tests/eidotests/test_conversions.py @@ -0,0 +1,106 @@ +from peppy.eido.conversion import ( + convert_project, + get_available_pep_filters, + pep_conversion_plugins, + run_filter, +) +from peppy.project import Project + + +class TestConversionInfrastructure: + def test_plugins_are_read(self): + avail_filters = get_available_pep_filters() + assert isinstance(avail_filters, list) + + def test_plugins_contents(self): + avail_plugins = pep_conversion_plugins() + avail_filters = get_available_pep_filters() + assert all( + [plugin_name in avail_filters for plugin_name in avail_plugins.keys()] + ) + + def test_plugins_are_callable(self): + avail_plugins = pep_conversion_plugins() + assert all( + [callable(plugin_fun) for plugin_name, plugin_fun in avail_plugins.items()] + ) + + def test_basic_filter(self, save_result_mock, project_object): + conv_result = run_filter( + project_object, + "basic", + verbose=False, + plugin_kwargs={"paths": {"project": "out/basic_prj.txt"}}, + ) + + assert save_result_mock.called + assert conv_result["project"] == str(project_object) + + def test_csv_filter( + self, save_result_mock, taxprofiler_project, taxprofiler_csv_multiline_output + ): + conv_result = run_filter( + taxprofiler_project, + "csv", + verbose=False, + plugin_kwargs={"paths": {"samples": "out/basic_prj.txt"}}, + ) + + assert save_result_mock.called + assert conv_result["samples"] == taxprofiler_csv_multiline_output + + def test_csv_filter_handles_empty_fasta_correctly( + self, + project_pep_with_fasta_column, + output_pep_with_fasta_column, + save_result_mock, + ): + conv_result = run_filter( + project_pep_with_fasta_column, + "csv", + verbose=False, + plugin_kwargs={"paths": {"samples": "out/basic_prj.txt"}}, + ) + + assert save_result_mock.called + assert conv_result == {"samples": output_pep_with_fasta_column} + + def test_eido_csv_filter_filters_nextflow_taxprofiler_input_correctly( + self, + project_pep_nextflow_taxprofiler, + output_pep_nextflow_taxprofiler, + save_result_mock, + ): + conv_result = run_filter( + project_pep_nextflow_taxprofiler, + "csv", + verbose=False, + plugin_kwargs={"paths": {"samples": "out/basic_prj.txt"}}, + ) + + assert save_result_mock.called + assert conv_result == {"samples": output_pep_nextflow_taxprofiler} + + def test_multiple_subsamples(self, test_multiple_subs): + project = Project(test_multiple_subs, sample_table_index="sample_id") + + conversion = convert_project( + project, + "csv", + ) + assert isinstance(conversion["samples"], str) + conversion = convert_project( + project, + "basic", + ) + assert isinstance(conversion["project"], str) + conversion = convert_project( + project, + "yaml", + ) + assert isinstance(conversion["project"], str) + conversion = convert_project( + project, + "yaml-samples", + ) + assert isinstance(conversion["samples"], str) diff --git a/tests/eidotests/test_schema_operations.py b/tests/eidotests/test_schema_operations.py new file mode 100644 index 00000000..af0beb87 --- /dev/null +++ b/tests/eidotests/test_schema_operations.py @@ -0,0 +1,15 @@ +from peppy.eido.schema import read_schema +from yaml import safe_load + + +class TestSchemaReading: + def test_imports_file_schema(self, schema_imports_file_path): + s = read_schema(schema_imports_file_path) + assert isinstance(s, list) + assert len(s) == 2 + + def test_imports_dict_schema(self, schema_imports_file_path): + with open(schema_imports_file_path, "r") as f: + s = read_schema(safe_load(f)) + assert isinstance(s, list) + assert len(s) == 2 diff --git a/tests/eidotests/test_validations.py b/tests/eidotests/test_validations.py new file mode 100644 index 00000000..c5ff3c92 --- /dev/null +++ b/tests/eidotests/test_validations.py @@ -0,0 +1,174 @@ +import urllib + +import pytest +from peppy import Project +from peppy.eido.exceptions import EidoValidationError, PathAttrNotFoundError +from peppy.eido.validation import ( + validate_config, + validate_input_files, + validate_original_samples, + validate_project, + validate_sample, +) +from peppy.utils import load_yaml + + +def _check_remote_file_accessible(url): + try: + code = urllib.request.urlopen(url).getcode() + except (urllib.error.URLError, OSError): + pytest.skip(f"Remote file not found: {url}") + else: + if code != 200: + pytest.skip(f"Return code: {code}. Remote file not found: {url}") + + +class TestProjectValidation: + def test_validate_works(self, project_object, schema_file_path): + validate_project(project=project_object, schema=schema_file_path) + + def test_validate_detects_invalid(self, project_object, schema_invalid_file_path): + with pytest.raises(EidoValidationError): + validate_project(project=project_object, schema=schema_invalid_file_path) + + def test_validate_detects_invalid_imports( + self, project_object, schema_imports_file_path + ): + with pytest.raises(EidoValidationError): + validate_project(project=project_object, schema=schema_imports_file_path) + + def test_validate_imports_with_rel_path( + self, path_pep_for_schema_with_rel_path, schema_rel_path_imports_file_path + ): + pep_project = Project(path_pep_for_schema_with_rel_path) + validate_project(project=pep_project, schema=schema_rel_path_imports_file_path) + + def test_validate_converts_samples_to_private_attr( + self, project_object, schema_samples_file_path + ): + """ + In peppy.Project the list of peppy.Sample objects is + accessible via _samples attr. + To make the schema creation more accessible for eido users + samples->_samples key conversion has been implemented + """ + validate_project(project=project_object, schema=schema_samples_file_path) + + def test_validate_works_with_dict_schema(self, project_object, schema_file_path): + validate_project(project=project_object, schema=load_yaml(schema_file_path)) + + @pytest.mark.parametrize("schema_arg", [1, None, [1, 2, 3]]) + def test_validate_raises_error_for_incorrect_schema_type( + self, project_object, schema_arg + ): + with pytest.raises(TypeError): + validate_project(project=project_object, schema=schema_arg) + + +class TestSampleValidation: + @pytest.mark.parametrize("sample_name", [0, 1, "GSM1558746"]) + def test_validate_works( + self, project_object, sample_name, schema_samples_file_path + ): + validate_sample( + project=project_object, + sample_name=sample_name, + schema=schema_samples_file_path, + ) + + @pytest.mark.parametrize("sample_name", [22, "bogus_sample_name"]) + def test_validate_raises_error_for_incorrect_sample_name( + self, project_object, sample_name, schema_samples_file_path + ): + with pytest.raises((ValueError, IndexError)): + validate_sample( + project=project_object, + sample_name=sample_name, + schema=schema_samples_file_path, + ) + + @pytest.mark.parametrize("sample_name", [0, 1, "GSM1558746"]) + def test_validate_detects_invalid( + self, project_object, sample_name, schema_sample_invalid_file_path + ): + with pytest.raises(EidoValidationError): + validate_sample( + project=project_object, + sample_name=sample_name, + schema=schema_sample_invalid_file_path, + ) + + def test_original_sample(self, project_table_path, schema_samples_file_path): + validate_original_samples(project_table_path, schema_samples_file_path) + + +class TestConfigValidation: + def test_validate_succeeds_on_invalid_sample( + self, project_object, schema_sample_invalid_file_path + ): + validate_config(project=project_object, schema=schema_sample_invalid_file_path) + + def test_validate_on_yaml_dict( + self, project_file_path, schema_sample_invalid_file_path + ): + validate_config( + project=project_file_path, schema=schema_sample_invalid_file_path + ) + + +class TestRemoteValidation: + @pytest.mark.parametrize("schema_url", ["http://schema.databio.org/pep/2.0.0.yaml"]) + def test_validate_works_with_remote_schemas(self, project_object, schema_url): + _check_remote_file_accessible(schema_url) + validate_project(project=project_object, schema=schema_url) + validate_config(project=project_object, schema=schema_url) + validate_sample(project=project_object, schema=schema_url, sample_name=0) + + +class TestImportsValidation: + def test_validate(self, project_object, schema_file_path): + validate_project(project=project_object, schema=schema_file_path) + + +class TestProjectWithoutConfigValidation: + @pytest.mark.parametrize( + "remote_pep_cfg", + [ + "https://raw.githubusercontent.com/pepkit/example_peps/master/example_basic/sample_table.csv" + ], + ) + def test_validate_works(self, schema_file_path, remote_pep_cfg): + _check_remote_file_accessible(remote_pep_cfg) + validate_project( + project=Project( + remote_pep_cfg + ), # create Project object from a remote sample table + schema=schema_file_path, + ) + + @pytest.mark.parametrize( + "remote_pep_cfg", + [ + "https://raw.githubusercontent.com/pepkit/example_peps/master/example_basic/sample_table.csv" + ], + ) + def test_validate_detects_invalid(self, schema_invalid_file_path, remote_pep_cfg): + _check_remote_file_accessible(remote_pep_cfg) + with pytest.raises(EidoValidationError): + validate_project( + project=Project(remote_pep_cfg), schema=schema_invalid_file_path + ) + + def test_validate_file_existence( + self, test_file_existing_pep, test_file_existing_schema + ): + schema_path = test_file_existing_schema + prj = Project(test_file_existing_pep) + with pytest.raises(PathAttrNotFoundError): + validate_input_files(prj, schema_path) + + def test_validation_values(self, test_schema_value_check, test_file_value_check): + schema_path = test_schema_value_check + prj = Project(test_file_value_check) + with pytest.raises(EidoValidationError): + validate_project(project=prj, schema=schema_path) diff --git a/tests/conftest.py b/tests/peppytests/conftest.py similarity index 98% rename from tests/conftest.py rename to tests/peppytests/conftest.py index 0d43a209..2d9260f8 100644 --- a/tests/conftest.py +++ b/tests/peppytests/conftest.py @@ -15,8 +15,8 @@ def merge_paths(pep_branch, directory_name): return os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), - "tests", "data", + "peppydata", "example_peps-{}".format(pep_branch), "example_{}".format(directory_name), ) diff --git a/tests/test_Project.py b/tests/peppytests/test_Project.py similarity index 88% rename from tests/test_Project.py rename to tests/peppytests/test_Project.py index 58ff97bd..703886d8 100644 --- a/tests/test_Project.py +++ b/tests/peppytests/test_Project.py @@ -1,15 +1,11 @@ -"""Classes for peppy.Project smoketesting""" - import os +import pickle import socket import tempfile import numpy as np import pytest from pandas import DataFrame -from yaml import dump, safe_load -import pickle - from peppy import Project from peppy.const import SAMPLE_NAME_ATTR, SAMPLE_TABLE_FILE_KEY from peppy.exceptions import ( @@ -18,6 +14,7 @@ MissingAmendmentError, RemoteYAMLError, ) +from yaml import dump, safe_load __author__ = "Michal Stolarczyk" __email__ = "michal.stolarczyk@nih.gov" @@ -38,6 +35,7 @@ "subtable4", "subtable5", "remove", + "issue499", ] @@ -98,12 +96,21 @@ def test_expand_path(self, example_pep_cfg_path, defer): """ Verify output_path is expanded """ + # Adjust for Windows-specific config file + if os.name == "nt": + example_pep_cfg_path = os.path.join( + *os.path.split(example_pep_cfg_path)[:1], + f"win_{os.path.split(example_pep_cfg_path)[1]}", + ) p = Project( cfg=example_pep_cfg_path, amendments="newLib", defer_samples_creation=defer, ) - assert not p.config["output_dir"].startswith("$") + assert not ( + p.config["output_dir"].startswith("$") + or p.config["output_dir"].startswith("%") + ) @pytest.mark.parametrize( "config_path", @@ -577,6 +584,15 @@ def test_derive(self, example_pep_cfg_path): p, pd = _get_pair_to_post_init_test(example_pep_cfg_path) _cmp_all_samples_attr(p, pd, "file_path") + @pytest.mark.parametrize("example_pep_cfg_path", ["issue499"], indirect=True) + def test_issue499(self, example_pep_cfg_path): + """ + Verify that the derivation the same way in a post init + sample creation scenario + """ + p, pd = _get_pair_to_post_init_test(example_pep_cfg_path) + _cmp_all_samples_attr(p, pd, "file_path") + @pytest.mark.parametrize("example_pep_cfg_path", ["append"], indirect=True) def test_equality(self, example_pep_cfg_path): """ @@ -744,3 +760,69 @@ def test_nextflow_subsamples(self, example_pep_cfg_path): """ p = Project(cfg=example_pep_cfg_path) assert isinstance(p, Project) + + +class TestSampleModifiers: + @pytest.mark.parametrize("example_pep_cfg_path", ["append"], indirect=True) + def test_append(self, example_pep_cfg_path): + """Verify that the appended attribute is added to the samples""" + p = Project(cfg=example_pep_cfg_path) + assert all([s["read_type"] == "SINGLE" for s in p.samples]) + + @pytest.mark.parametrize("example_pep_cfg_path", ["imports"], indirect=True) + def test_imports(self, example_pep_cfg_path): + """Verify that the imported attribute is added to the samples""" + p = Project(cfg=example_pep_cfg_path) + assert all([s["imported_attr"] == "imported_val" for s in p.samples]) + + @pytest.mark.parametrize("example_pep_cfg_path", ["imply"], indirect=True) + def test_imply(self, example_pep_cfg_path): + """ + Verify that the implied attribute is added to the correct samples + """ + p = Project(cfg=example_pep_cfg_path) + assert all( + [s["genome"] == "hg38" for s in p.samples if s["organism"] == "human"] + ) + assert all( + [s["genome"] == "mm10" for s in p.samples if s["organism"] == "mouse"] + ) + + @pytest.mark.parametrize("example_pep_cfg_path", ["duplicate"], indirect=True) + def test_duplicate(self, example_pep_cfg_path): + """ + Verify that the duplicated attribute is identical to the original + """ + p = Project(cfg=example_pep_cfg_path) + assert all([s["organism"] == s["animal"] for s in p.samples]) + + @pytest.mark.parametrize("example_pep_cfg_path", ["derive"], indirect=True) + def test_derive(self, example_pep_cfg_path): + """ + Verify that the declared attr derivation happened + """ + p = Project(cfg=example_pep_cfg_path) + assert all(["file_path" in s for s in p.samples]) + assert all(["file_path" in s._derived_cols_done for s in p.samples]) + + @pytest.mark.parametrize("example_pep_cfg_path", ["remove"], indirect=True) + def test_remove(self, example_pep_cfg_path): + """ + Verify that the declared attr was eliminated from every sample + """ + p = Project(cfg=example_pep_cfg_path) + assert all(["protocol" not in s for s in p.samples]) + + @pytest.mark.parametrize("example_pep_cfg_path", ["subtable2"], indirect=True) + def test_subtable(self, example_pep_cfg_path): + """ + Verify that the sample merging takes place + """ + p = Project(cfg=example_pep_cfg_path) + assert all( + [ + isinstance(s["file"], list) + for s in p.samples + if s["sample_name"] in ["frog_1", "frog2"] + ] + ) diff --git a/tests/smoketests/test_Sample.py b/tests/peppytests/test_Sample.py similarity index 99% rename from tests/smoketests/test_Sample.py rename to tests/peppytests/test_Sample.py index ba41b003..62b9bf0c 100644 --- a/tests/smoketests/test_Sample.py +++ b/tests/peppytests/test_Sample.py @@ -2,7 +2,6 @@ import tempfile import pytest - from peppy import Project __author__ = "Michal Stolarczyk" diff --git a/tests/phctests/__init__.py b/tests/phctests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/phctests/conftest.py b/tests/phctests/conftest.py new file mode 100644 index 00000000..39ba8caf --- /dev/null +++ b/tests/phctests/conftest.py @@ -0,0 +1,51 @@ +import os + +import pytest + + +@pytest.fixture +def SAMPLE_PEP(): + return os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + "data", + "phcdata", + "sample_pep", + "subsamp_config.yaml", + ) + + +@pytest.fixture() +def test_raw_pep_return(): + sample_prj = { + "config": { + "This": "is config", + "description": "desc", + "name": "sample name", + }, + "subsample_list": [], + "sample_list": [ + {"time": "0", "file_path": "source1", "sample_name": "pig_0h"}, + {"time": "1", "file_path": "source1", "sample_name": "pig_1h"}, + {"time": "0", "file_path": "source1", "sample_name": "frog_0h"}, + ], + } + return sample_prj + + +@pytest.fixture +def requests_get_mock(mocker): + return mocker.patch("requests.get") + + +@pytest.fixture +def input_return_mock(monkeypatch): + return monkeypatch.setattr("builtins.input", lambda: None) + + +@pytest.fixture +def test_jwt(): + return ( + "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9." + "eyJsb2dpbiI6InJhZmFsc3RlcGllbiIsImlkIjo0MzkyNjUyMiwib3JnYW5pemF0aW9ucyI6bnVsbH0." + "mgBP-7x5l9cqufhzdVi0OFA78pkYDEymwPFwud02BAc" + ) diff --git a/tests/phctests/test_manual.py b/tests/phctests/test_manual.py new file mode 100644 index 00000000..4ff5162b --- /dev/null +++ b/tests/phctests/test_manual.py @@ -0,0 +1,101 @@ +import pytest + +from peppy.pephubclient.pephubclient import PEPHubClient + + +@pytest.mark.skip(reason="Manual test") +class TestViewsManual: + def test_get(self): + ff = PEPHubClient().view.get( + "databio", + "bedset1", + "default", + "test_view", + ) + print(ff) + + def test_create(self): + PEPHubClient().view.create( + "databio", + "bedset1", + "default", + "test_view", + sample_list=["orange", "grape1", "apple1"], + ) + + def test_delete(self): + PEPHubClient().view.delete( + "databio", + "bedset1", + "default", + "test_view", + ) + + def test_add_sample(self): + PEPHubClient().view.add_sample( + "databio", + "bedset1", + "default", + "test_view", + "name", + ) + + def test_delete_sample(self): + PEPHubClient().view.remove_sample( + "databio", + "bedset1", + "default", + "test_view", + "name", + ) + + +@pytest.mark.skip(reason="Manual test") +class TestSamplesManual: + def test_manual(self): + ff = PEPHubClient().sample.get( + "databio", + "bedset1", + "default", + "grape1", + ) + ff + + def test_update(self): + ff = PEPHubClient().sample.get( + "databio", + "bedset1", + "default", + "newf", + ) + ff.update({"shefflab": "test1"}) + ff["sample_type"] = "new_type" + PEPHubClient().sample.update( + "databio", + "bedset1", + "default", + "newf", + sample_dict=ff, + ) + + def test_add(self): + ff = { + "genome": "phc_test1", + "sample_type": "phc_test", + } + PEPHubClient().sample.create( + "databio", + "bedset1", + "default", + "new_2222", + overwrite=False, + sample_dict=ff, + ) + + def test_delete(self): + PEPHubClient().sample.remove( + "databio", + "bedset1", + "default", + "new_2222", + ) diff --git a/tests/phctests/test_pephubclient.py b/tests/phctests/test_pephubclient.py new file mode 100644 index 00000000..21c7c4e8 --- /dev/null +++ b/tests/phctests/test_pephubclient.py @@ -0,0 +1,615 @@ +from unittest.mock import Mock + +import pytest + +from peppy.pephubclient.exceptions import ResponseError +from peppy.pephubclient.helpers import is_registry_path +from peppy.pephubclient.pephub_oauth.models import InitializeDeviceCodeResponse +from peppy.pephubclient.pephubclient import PEPHubClient + + +@pytest.fixture() +def device_code_return(): + device_code = "asdf2345" + return InitializeDeviceCodeResponse( + device_code=device_code, + auth_url=f"any_base_url/auth/device/login/{device_code}", + ) + + +class TestSmoke: + def test_login(self, mocker, device_code_return, test_jwt): + """ + Test if device login request was sent to pephub + """ + requests_mock = mocker.patch( + "requests.request", + return_value=Mock(content=device_code_return, status_code=200), + ) + pephub_response_mock = mocker.patch( + "peppy.pephubclient.pephub_oauth.pephub_oauth.PEPHubAuth._handle_pephub_response", + return_value=device_code_return, + ) + + pephub_exchange_code_mock = mocker.patch( + "peppy.pephubclient.pephub_oauth.pephub_oauth.PEPHubAuth._exchange_device_code_on_token", + return_value=test_jwt, + ) + + pathlib_mock = mocker.patch( + "peppy.pephubclient.files_manager.FilesManager.save_jwt_data_to_file" + ) + + PEPHubClient().login() + + assert requests_mock.called + assert pephub_response_mock.called + assert pephub_exchange_code_mock.called + assert pathlib_mock.called + + def test_logout(self, mocker): + os_remove_mock = mocker.patch("os.remove") + PEPHubClient().logout() + + assert os_remove_mock.called + + def test_pull(self, mocker, test_jwt, test_raw_pep_return): + jwt_mock = mocker.patch( + "peppy.pephubclient.files_manager.FilesManager.load_jwt_data_from_file", + return_value=test_jwt, + ) + requests_mock = mocker.patch( + "requests.request", + return_value=Mock(content="some return", status_code=200), + ) + mocker.patch( + "peppy.pephubclient.helpers.RequestManager.decode_response", + return_value=test_raw_pep_return, + ) + save_yaml_mock = mocker.patch( + "peppy.pephubclient.files_manager.FilesManager.save_yaml" + ) + save_sample_mock = mocker.patch( + "peppy.pephubclient.files_manager.FilesManager.save_pandas" + ) + mocker.patch( + "peppy.pephubclient.files_manager.FilesManager.create_project_folder" + ) + + PEPHubClient().pull("some/project") + + assert jwt_mock.called + assert requests_mock.called + assert save_yaml_mock.called + assert save_sample_mock.called + + @pytest.mark.parametrize( + "status_code, expected_error_message", + [ + ( + 404, + "File does not exist, or you are unauthorized.", + ), + ( + 500, + "Internal server error. Unexpected return value. Error: 500", + ), + ], + ) + def test_pull_with_pephub_error_response( + self, mocker, test_jwt, status_code, expected_error_message + ): + mocker.patch( + "peppy.pephubclient.files_manager.FilesManager.load_jwt_data_from_file", + return_value=test_jwt, + ) + mocker.patch( + "requests.request", + return_value=Mock( + content=b'{"detail": "Some error message"}', status_code=status_code + ), + ) + + with pytest.raises(ResponseError) as e: + PEPHubClient().pull("some/project") + + assert e.value.message == expected_error_message + + def test_push(self, mocker, test_jwt, SAMPLE_PEP): + requests_mock = mocker.patch( + "requests.request", return_value=Mock(status_code=202) + ) + + PEPHubClient().push( + SAMPLE_PEP, + namespace="s_name", + name="name", + ) + + assert requests_mock.called + + @pytest.mark.parametrize( + "status_code, expected_error_message", + [ + ( + 409, + "Project already exists. Set force to overwrite project.", + ), + ( + 401, + "Unauthorized! Failure in uploading project.", + ), + (233, "Unexpected Response Error."), + ], + ) + def test_push_with_pephub_error_response( + self, mocker, status_code, expected_error_message, SAMPLE_PEP + ): + mocker.patch("requests.request", return_value=Mock(status_code=status_code)) + with pytest.raises(ResponseError, match=expected_error_message): + PEPHubClient().push( + SAMPLE_PEP, + namespace="s_name", + name="name", + ) + + def test_search_prj(self, mocker): + return_value = { + "count": 1, + "limit": 100, + "offset": 0, + "results": [ + { + "namespace": "namespace1", + "name": "basic", + "tag": "default", + "is_private": False, + "number_of_samples": 2, + "description": "None", + "last_update_date": "2023-08-27 19:07:31.552861+00:00", + "submission_date": "2023-08-27 19:07:31.552858+00:00", + "digest": "08cbcdbf4974fc84bee824c562b324b5", + "pep_schema": "random_schema_name", + "pop": False, + "stars_number": 0, + "forked_from": None, + } + ], + "session_info": None, + "can_edit": False, + } + mocker.patch( + "requests.request", + return_value=Mock(content=return_value, status_code=200), + ) + mocker.patch( + "peppy.pephubclient.helpers.RequestManager.decode_response", + return_value=return_value, + ) + + return_value = PEPHubClient().find_project(namespace="namespace1") + assert return_value.count == 1 + assert len(return_value.results) == 1 + + +class TestHelpers: + @pytest.mark.parametrize( + "input_str, expected_output", + [ + ( + "databio/pep:default", + True, + ), + ( + "pephub.databio.org::databio/pep:default", + True, + ), + ( + "pephub.databio.org://databio/pep:default", + True, + ), + ( + "databio/pep", + True, + ), + ( + "databio/pep/default", + False, + ), + ( + "some/random/path/to.yaml", + False, + ), + ( + "path_to.csv", + False, + ), + ( + "this/is/path/to.csv", + False, + ), + ], + ) + def test_is_registry_path(self, input_str, expected_output): + assert is_registry_path(input_str) is expected_output + + +class TestSamples: + def test_get(self, mocker): + return_value = { + "genome": "phc_test1", + "sample_type": "phc_test", + "sample_name": "gg1", + } + mocker.patch( + "requests.request", + return_value=Mock(content=return_value, status_code=200), + ) + mocker.patch( + "peppy.pephubclient.helpers.RequestManager.decode_response", + return_value=return_value, + ) + return_value = PEPHubClient().sample.get( + "test_namespace", + "test_name", + "default", + "gg1", + ) + assert return_value == return_value + + @pytest.mark.parametrize( + "status_code, expected_error_message", + [ + ( + 404, + "Sample does not exist.", + ), + ( + 500, + "Internal server error. Unexpected return value.", + ), + ( + 403, + "Unexpected return value. Error: 403", + ), + ], + ) + def test_sample_get_with_pephub_error_response( + self, mocker, status_code, expected_error_message + ): + mocker.patch("requests.request", return_value=Mock(status_code=status_code)) + with pytest.raises(ResponseError, match=expected_error_message): + PEPHubClient().sample.get( + "test_namespace", + "test_name", + "default", + "gg1", + ) + + @pytest.mark.parametrize( + "prj_dict", + [ + {"genome": "phc_test1", "sample_type": "phc_test", "sample_name": "gg1"}, + {"genome": "phc_test1", "sample_type": "phc_test"}, + ], + ) + def test_create(self, mocker, prj_dict): + return_value = prj_dict + mocker_obj = mocker.patch( + "requests.request", + return_value=Mock(content=return_value, status_code=202), + ) + + PEPHubClient().sample.create( + "test_namespace", + "test_name", + "default", + "gg1", + sample_dict=return_value, + ) + assert mocker_obj.called + + @pytest.mark.parametrize( + "status_code, expected_error_message", + [ + ( + 404, + "does not exist.", + ), + ( + 409, + "already exists. Set overwrite to True to overwrite sample.", + ), + ( + 500, + "Unexpected return value.", + ), + ], + ) + def test_sample_create_with_pephub_error_response( + self, mocker, status_code, expected_error_message + ): + mocker.patch("requests.request", return_value=Mock(status_code=status_code)) + with pytest.raises(ResponseError, match=expected_error_message): + PEPHubClient().sample.create( + "test_namespace", + "test_name", + "default", + "gg1", + sample_dict={ + "genome": "phc_test1", + "sample_type": "phc_test", + "sample_name": "gg1", + }, + ) + + def test_delete(self, mocker): + mocker_obj = mocker.patch( + "requests.request", + return_value=Mock(status_code=202), + ) + + PEPHubClient().sample.remove( + "test_namespace", + "test_name", + "default", + "gg1", + ) + assert mocker_obj.called + + @pytest.mark.parametrize( + "status_code, expected_error_message", + [ + ( + 404, + "does not exist.", + ), + ( + 500, + "Unexpected return value.", + ), + ], + ) + def test_sample_delete_with_pephub_error_response( + self, mocker, status_code, expected_error_message + ): + mocker.patch("requests.request", return_value=Mock(status_code=status_code)) + with pytest.raises(ResponseError, match=expected_error_message): + PEPHubClient().sample.remove( + "test_namespace", + "test_name", + "default", + "gg1", + ) + + def test_update(self, mocker): + mocker_obj = mocker.patch( + "requests.request", + return_value=Mock(status_code=202), + ) + + PEPHubClient().sample.update( + "test_namespace", + "test_name", + "default", + "gg1", + sample_dict={ + "genome": "phc_test1", + "sample_type": "phc_test", + "new_col": "column", + }, + ) + assert mocker_obj.called + + @pytest.mark.parametrize( + "status_code, expected_error_message", + [ + ( + 404, + "does not exist.", + ), + ( + 500, + "Unexpected return value.", + ), + ], + ) + def test_sample_update_with_pephub_error_response( + self, mocker, status_code, expected_error_message + ): + mocker.patch("requests.request", return_value=Mock(status_code=status_code)) + with pytest.raises(ResponseError, match=expected_error_message): + PEPHubClient().sample.update( + "test_namespace", + "test_name", + "default", + "gg1", + sample_dict={ + "genome": "phc_test1", + "sample_type": "phc_test", + "new_col": "column", + }, + ) + + +class TestViews: + def test_get(self, mocker, test_raw_pep_return): + return_value = test_raw_pep_return + mocker.patch( + "requests.request", + return_value=Mock(content=return_value, status_code=200), + ) + mocker.patch( + "peppy.pephubclient.helpers.RequestManager.decode_response", + return_value=return_value, + ) + + return_value = PEPHubClient().view.get( + "test_namespace", + "test_name", + "default", + "gg1", + ) + assert return_value == return_value + + @pytest.mark.parametrize( + "status_code, expected_error_message", + [ + ( + 404, + "does not exist.", + ), + ( + 500, + "Internal server error.", + ), + ], + ) + def test_view_get_with_pephub_error_response( + self, mocker, status_code, expected_error_message + ): + mocker.patch("requests.request", return_value=Mock(status_code=status_code)) + with pytest.raises(ResponseError, match=expected_error_message): + PEPHubClient().view.get( + "test_namespace", + "test_name", + "default", + "gg1", + ) + + def test_create(self, mocker): + mocker_obj = mocker.patch( + "requests.request", + return_value=Mock(status_code=202), + ) + + PEPHubClient().view.create( + "test_namespace", + "test_name", + "default", + "gg1", + sample_list=["sample1", "sample2"], + ) + assert mocker_obj.called + + @pytest.mark.parametrize( + "status_code, expected_error_message", + [ + ( + 404, + "does not exist.", + ), + ( + 409, + "already exists in the project.", + ), + ], + ) + def test_view_create_with_pephub_error_response( + self, mocker, status_code, expected_error_message + ): + mocker.patch("requests.request", return_value=Mock(status_code=status_code)) + with pytest.raises(ResponseError, match=expected_error_message): + PEPHubClient().view.create( + "test_namespace", + "test_name", + "default", + "gg1", + sample_list=["sample1", "sample2"], + ) + + def test_delete(self, mocker): + mocker_obj = mocker.patch( + "requests.request", + return_value=Mock(status_code=202), + ) + + PEPHubClient().view.delete( + "test_namespace", + "test_name", + "default", + "gg1", + ) + assert mocker_obj.called + + @pytest.mark.parametrize( + "status_code, expected_error_message", + [ + ( + 404, + "does not exist.", + ), + ( + 401, + "You are unauthorized to delete this view.", + ), + ], + ) + def test_view_delete_with_pephub_error_response( + self, mocker, status_code, expected_error_message + ): + mocker.patch("requests.request", return_value=Mock(status_code=status_code)) + with pytest.raises(ResponseError, match=expected_error_message): + PEPHubClient().view.delete( + "test_namespace", + "test_name", + "default", + "gg1", + ) + + def test_add_sample(self, mocker): + mocker_obj = mocker.patch( + "requests.request", + return_value=Mock(status_code=202), + ) + + PEPHubClient().view.add_sample( + "test_namespace", + "test_name", + "default", + "gg1", + "sample1", + ) + assert mocker_obj.called + + def test_delete_sample(self, mocker): + mocker_obj = mocker.patch( + "requests.request", + return_value=Mock(status_code=202), + ) + + PEPHubClient().view.remove_sample( + "test_namespace", + "test_name", + "default", + "gg1", + "sample1", + ) + assert mocker_obj.called + + +### + + +# test add sample: +# 1. add correct 202 +# 2. add existing 409 +# 3. add with sample_name +# 4. add without sample_name +# 5. add with overwrite +# 6. add to nonexistent project 404 + +# delete sample: +# 1. delete existing 202 +# 2. delete nonexistent 404 + +# get sample: +# 1. get existing 200 +# 2. get nonexistent 404 +# 3. get with raw 200 +# 4. get from nonexistent project 404 + +# update sample: +# 1. update existing 202 +# 2. update nonexistent sample 404 +# 3. update nonexistent project 404 diff --git a/tests/smoketests/test_Project.py b/tests/smoketests/test_Project.py deleted file mode 100644 index 197ba9b0..00000000 --- a/tests/smoketests/test_Project.py +++ /dev/null @@ -1,69 +0,0 @@ -import pytest - -from peppy.project import Project - - -class TestSampleModifiers: - @pytest.mark.parametrize("example_pep_cfg_path", ["append"], indirect=True) - def test_append(self, example_pep_cfg_path): - """Verify that the appended attribute is added to the samples""" - p = Project(cfg=example_pep_cfg_path) - assert all([s["read_type"] == "SINGLE" for s in p.samples]) - - @pytest.mark.parametrize("example_pep_cfg_path", ["imports"], indirect=True) - def test_imports(self, example_pep_cfg_path): - """Verify that the imported attribute is added to the samples""" - p = Project(cfg=example_pep_cfg_path) - assert all([s["imported_attr"] == "imported_val" for s in p.samples]) - - @pytest.mark.parametrize("example_pep_cfg_path", ["imply"], indirect=True) - def test_imply(self, example_pep_cfg_path): - """ - Verify that the implied attribute is added to the correct samples - """ - p = Project(cfg=example_pep_cfg_path) - assert all( - [s["genome"] == "hg38" for s in p.samples if s["organism"] == "human"] - ) - assert all( - [s["genome"] == "mm10" for s in p.samples if s["organism"] == "mouse"] - ) - - @pytest.mark.parametrize("example_pep_cfg_path", ["duplicate"], indirect=True) - def test_duplicate(self, example_pep_cfg_path): - """ - Verify that the duplicated attribute is identical to the original - """ - p = Project(cfg=example_pep_cfg_path) - assert all([s["organism"] == s["animal"] for s in p.samples]) - - @pytest.mark.parametrize("example_pep_cfg_path", ["derive"], indirect=True) - def test_derive(self, example_pep_cfg_path): - """ - Verify that the declared attr derivation happened - """ - p = Project(cfg=example_pep_cfg_path) - assert all(["file_path" in s for s in p.samples]) - assert all(["file_path" in s._derived_cols_done for s in p.samples]) - - @pytest.mark.parametrize("example_pep_cfg_path", ["remove"], indirect=True) - def test_remove(self, example_pep_cfg_path): - """ - Verify that the declared attr was eliminated from every sample - """ - p = Project(cfg=example_pep_cfg_path) - assert all(["protocol" not in s for s in p.samples]) - - @pytest.mark.parametrize("example_pep_cfg_path", ["subtable2"], indirect=True) - def test_subtable(self, example_pep_cfg_path): - """ - Verify that the sample merging takes place - """ - p = Project(cfg=example_pep_cfg_path) - assert all( - [ - isinstance(s["file"], list) - for s in p.samples - if s["sample_name"] in ["frog_1", "frog2"] - ] - )