diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..7152fe29 --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +exclude = .git,__pycache__,docs/source/conf.py,old,build,dist +max-line-length = 120 diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml index 8b48ddf1..90c97050 100644 --- a/.github/workflows/black.yml +++ b/.github/workflows/black.yml @@ -1,6 +1,8 @@ name: Lint -on: [pull_request] +on: + pull_request: + branches: [ main ] jobs: lint: diff --git a/.github/workflows/cli-coverage.yml b/.github/workflows/cli-coverage.yml new file mode 100644 index 00000000..8b4e90fb --- /dev/null +++ b/.github/workflows/cli-coverage.yml @@ -0,0 +1,40 @@ +name: test coverage + +on: + push: + branches: [master, dev] + +jobs: + cli-coverage-report: + strategy: + matrix: + python-version: [ "3.12" ] + os: [ ubuntu-latest ] + r: [ release ] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v5 + + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install test dependencies + run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi + + - run: pip install . + + - name: Run tests + run: coverage run -m pytest + + - name: build coverage + run: coverage html -i + + - run: smokeshow upload htmlcov + env: + SMOKESHOW_GITHUB_STATUS_DESCRIPTION: Coverage {coverage-percentage} + SMOKESHOW_GITHUB_COVERAGE_THRESHOLD: 80 + SMOKESHOW_GITHUB_CONTEXT: coverage + SMOKESHOW_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + SMOKESHOW_GITHUB_PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + SMOKESHOW_AUTH_KEY: ${{ secrets.SMOKESHOW_AUTH_KEY }} \ No newline at end of file diff --git a/.github/workflows/pytest-windows.yml b/.github/workflows/pytest-windows.yml new file mode 100644 index 00000000..7d9cb327 --- /dev/null +++ b/.github/workflows/pytest-windows.yml @@ -0,0 +1,35 @@ +name: Run pytests windows + +on: + push: + branches: [dev] + pull_request: + branches: [main, dev, peppy0_50] + +jobs: + pytest: + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: ["3.11"] + os: [windows-latest] + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install all dependencies + run: pip install -r requirements/requirements-all.txt + + - name: Install test dependencies + run: pip install -r requirements/requirements-test.txt + + - name: Install package + run: python -m pip install . + + - name: Run pytest tests + run: pytest tests -v \ No newline at end of file diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/pytest.yml similarity index 65% rename from .github/workflows/run-pytest.yml rename to .github/workflows/pytest.yml index 7864e39a..2010fe77 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/pytest.yml @@ -4,7 +4,7 @@ on: push: branches: [dev] pull_request: - branches: [master, dev] + branches: [main, dev, peppy0_50] jobs: pytest: @@ -15,15 +15,15 @@ jobs: os: [ubuntu-latest] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v5 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Install dev dependencies - run: if [ -f requirements/requirements-dev.txt ]; then pip install -r requirements/requirements-dev.txt; fi + - name: Install all dependencies + run: if [ -f requirements/requirements-all.txt ]; then pip install -r requirements/requirements-all.txt; fi - name: Install test dependencies run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi @@ -32,4 +32,4 @@ jobs: run: python -m pip install . - name: Run pytest tests - run: pytest tests -x -vv --cov=./ --cov-report=xml --remote-data + run: pytest tests -v diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index ddf9a38b..533ef328 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -1,6 +1,3 @@ -# This workflows will upload a Python Package using Twine when a release is created -# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries - name: Upload Python Package on: @@ -14,9 +11,7 @@ jobs: permissions: id-token: write steps: - - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies @@ -27,4 +22,4 @@ jobs: run: | python setup.py sdist bdist_wheel - name: Publish package distributions to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file diff --git a/.github/workflows/run-codecov.yml b/.github/workflows/run-codecov.yml deleted file mode 100644 index a41a1fde..00000000 --- a/.github/workflows/run-codecov.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Run codecov - -on: - pull_request: - branches: [master] - -jobs: - pytest: - runs-on: ${{ matrix.os }} - strategy: - matrix: - python-version: [3.9] - os: [ubuntu-latest] - - steps: - - uses: actions/checkout@v2 - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v2 - with: - file: ./coverage.xml - name: py-${{ matrix.python-version }}-${{ matrix.os }} diff --git a/.gitignore b/.gitignore index eda9361f..0d062a9a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,87 +1,9 @@ -# ignore test results -tests/test/* - -# toy/experimental files -*.tsv -*.pkl - -# ignore eggs -.eggs/ - -# generic ignore list: -*.lst - -# Compiled source -*.com -*.class -*.dll -*.exe -*.o *.so -*.pyc - -# Packages -# it's better to unpack these files and commit the raw source -# git has its own built in compression methods -*.7z -*.dmg -*.gz -*.iso -*.jar -*.rar -*.tar -*.zip - -# Logs and databases -*.log -*.sql -*.sqlite - -# OS generated files -.DS_Store -.DS_Store? -._* -.Spotlight-V100 -.Trashes -ehthumbs.db -Thumbs.db - -# Gedit temporary files -*~ - -# libreoffice lock files: -.~lock* - -# Default-named test output -microtest/ -open_pipelines/ - -# IDE-specific items -.idea/ -.vscode/ - -# pytest-related -.cache/ -.coverage -.pytest_cache/ - -# Reserved files for comparison -*RESERVE* -# Build-related stuff build/ dist/ -peppy.egg-info/ -.ipynb_checkpoints/ -# env stuff -env/ .env venv/ -.venv/ -# swap files -.swp -.swo -.swn diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fd759dcf..53e07571 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,21 +1,2 @@ repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.0.1 hooks: - - id: trailing-whitespace - - id: check-yaml - - id: end-of-file-fixer - - id: requirements-txt-fixer - - id: trailing-whitespace - - id: check-ast - - - repo: https://github.com/PyCQA/isort - rev: 5.8.0 - hooks: - - id: isort - args: ["--profile", "black"] - - - repo: https://github.com/psf/black - rev: 21.5b2 - hooks: - - id: black diff --git a/.readthedocs.yaml b/.readthedocs.yaml deleted file mode 100644 index 3c789e4c..00000000 --- a/.readthedocs.yaml +++ /dev/null @@ -1,15 +0,0 @@ -version: 2 - -build: - os: ubuntu-22.04 - tools: - python: "3.10" - -mkdocs: - configuration: mkdocs.yml - fail_on_warning: false - -# Optionally declare the Python requirements required to build your docs -python: - install: - - requirements: requirements/requirements-doc.txt diff --git a/LICENSE.txt b/LICENSE.txt index 1b78bad2..e3476396 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,5 +1,3 @@ -Copyright 2017 Nathan Sheffield - Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. diff --git a/MANIFEST.in b/MANIFEST.in index fd948e04..43e283b4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,6 @@ -include LICENSE.txt include requirements/* include README.md +include peppy/pephubclient/* +include peppy/eido/* +include peppy/pephubclient/pephub_oauth/* +include peppy/pephubclient/modules/* diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..40d1dfea --- /dev/null +++ b/Makefile @@ -0,0 +1,13 @@ +lint: + ruff format . + +run-coverage: + coverage run -m pytest + +html-report: + coverage html --omit="*/test*" + +open-coverage: + cd htmlcov && google-chrome index.html + +coverage: run-coverage html-report open-coverage diff --git a/README.md b/README.md index 3c09e42d..98b99dc2 100644 --- a/README.md +++ b/README.md @@ -12,4 +12,4 @@ Links to complete documentation: * Complete documentation and API for the `peppy` python package is at [peppy.databio.org](https://peppy.databio.org). * Reference documentation for standard **PEP** format is at [pep.databio.org](https://pep.databio.org/). * Example PEPs for testing `peppy` are in the [example_peps repository](https://github.com/pepkit/example_peps). -* The package [on PyPI](https://pypi.org/project/peppy/). +* The package [on PyPI](https://pypi.org/project/peppy/). \ No newline at end of file diff --git a/docs/.gitignore b/docs/.gitignore deleted file mode 100644 index 64beb194..00000000 --- a/docs/.gitignore +++ /dev/null @@ -1 +0,0 @@ -autodoc_build/ diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 5c7dab21..00000000 --- a/docs/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# peppy - -## Introduction - -`peppy` is a Python package that provides an API for handling standardized project and sample metadata. -If you define your project in [Portable Encapsulated Project](http://pep.databio.org/en/2.0.0/) (PEP) format, -you can use the `peppy` package to instantiate an in-memory representation of your project and sample metadata. -You can then use `peppy` for interactive analysis, or to develop Python tools so you don't have to handle sample processing. `peppy` is useful to tool developers and data analysts who want a standard way of representing sample-intensive research project metadata. - -## What is a PEP? - -A [PEP](http://pep.databio.org/en/2.0.0/) is a collection of metadata files conforming to a standardized structure. -These files are written using the simple **YAML** and **TSV/CSV** formats, -and they can be read by a variety of tools in the pep toolkit, including `peppy`. If you don't already understand why the PEP concept is useful to you, -start by reading the [PEP specification](http://pep.databio.org/en/2.0.0/), -where you can also find example projects. - -## Why use `peppy`? - -`peppy` provides an API with which to interact from Python with PEP metadata. -This is often useful on its own, but the big wins include: - -- *Portability* between computing environments -- *Reusability* among different tools and project stages -- *Durability* with respect to data movement - -## Who should use `peppy`? - -There are **two main kinds of user** that may have interest: - -- A tool *developer* -- A data *analyst* - -If you neither of those describes you, you may be interested in [`pepr`](http://code.databio.org/pepr) (R package), -which provides an R interface to PEP objects, or [looper](http://github.com/pepkit/looper) (command-line application), -which lets you run any command-line tool or pipeline on samples in a project. - -**Developer** - -As a tool developer, you should `import peppy` in your Python tool and read PEP projects as its input. - -This will simplify use of your tool, because users may already have PEP-formatted projects for other tools. - -**Analyst** - -`peppy` provides an easy way to read project metadata into Python. -You will have access to an API to access samples and their attributes, facilitating downstream analysis. diff --git a/docs/changelog.md b/docs/changelog.md deleted file mode 100644 index 4db79aac..00000000 --- a/docs/changelog.md +++ /dev/null @@ -1,410 +0,0 @@ -# Changelog - -This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. - -## [0.40.2] -- 2024-05-28 -### Added -- added `sample_name` property to samples object. - -## [0.40.1] -- 2024-01-11 -### Fixed -- Initializing Project with `NaN` value instead of `None` in `from_pandas` method - - -## [0.40.0] -- 2023-12-18 - -**This version introduced backwards-incompatible changes.** - -### Changed -- Replaced `attmap` with `MutableMapping` (which removes some attributes) -- Replaced OrderedDict with dict -- Deprecated support for Python versions <= 3.7 - -_Due to the changes mentioned above, a few functionalities may be disabled. For example, the `name` and `description` project properties can no longer be accessed with `getitem`; use the `getattr` syntax instead_ - -### Added -- Constructor methods: `Project.from_dict`, `Project.from_pandas`, `Project.from_sample_yaml`, `Project.from_pep_config` - - -## [0.35.7] -- 2023-07-19 -### Fixed -- incorrect setting of sample and subsample indexes using from_dict function (#452) -- clarified debug messages - -## [0.35.6] -- 2023-06-27 -### Added -- `orient` argument to `to_dict` method - -### Fixed -- The name of the raw subsample object to match the actual name (list). Commit: #442 - -### Changed -- Reduced the number of items returned in the to_dict(extended=True) method to 3, with the name and description now stored in the config key. - - -## [0.35.5] -- 2023-03-27 -### Fixed -- A [bug](https://github.com/pepkit/peppy/issues/435) with custom sample ids -- Improved performance for large tables dramatically - -## [0.35.4] -- 2023-01-17 -### Fixed -- Fixed disabling rich progress on small datasets bug -- Disabled progressbar if object variable `progressbar` is set False - -## [0.35.3] -- 2022-11-16 -### Fixed -- Returning `NaN` value in initialization project from pandas df - -## [0.35.2] -- 2022-09-13 -### Fixed -- Returning `NaN` value within `to_dict` method was fixed and method now returns `None` instead - -## [0.35.1] -- 2022-09-07 -### Changed -- Organization of test files. Separated unittests from smoketests. - -### Fixed -- The root cause of `np.nan` values showing up in Pandas dataframes. Replaced the values with None right after reading the database, which made it possible to remove all custom `np.nan` to `None` converters used later in the code. -- Typing in some methods. -- Code redundancy in fixtures in conftest. - -### Added -- New test cases with test data - -## [0.35.0] -- 2022-08-25 - -### Changed - -- Optimized converting Projects to and from dict. Now, `to_dict(extended=True)` returns only essential properties to save space and time. -- Small refactors. - -### Fixed - -- Initialization of `peppy.Project` from `pandas.DataFrame`. Now `from_pandas()` can receive sample table, subsample table and config file -- Multiple bugs introduced during initialization of the project with custom index column names - -### Added -- New test cases and test data - -## [0.34.0] -- 2022-08-17 - -### Changed - -- Way of initialization project from dictionary. Now it's possible as follows: `Project().from_dict()` - -### Fixed - -- Fix error that was raised when duplicated sample in `sample_table` had different read types (single-end mixed with paired-end). - -### Added - -- Feature of initializing `peppy.Project` from `pandas.DataFrame` - -## [0.33.0] -- 2022-07-25 - -### Changed - -- `pep_version` is no longer a required parameter to create a `peppy.Project` instance from a configuration file. - -### Fixed - -- Performance issues during sample parsing. Two list comprehensions were combined to speed up this functionality. -- `KeyError` is thrown when attempting to access the `pep_version` of a `peppy.Project` instance instatiated from a sample table (`csv`) - -### Added - -- Implementation of `__eq__` for the `peppy.Project` class such that two instances of the class can be compared using python's equality operators (`==`, `!=`). -- New `from_dict` function that lets a user instatiate a new `peppy.Project` object using an in-memory representation of a PEP (a `dict`). This supports database storage of PEPs. -- New `extended` flag for the `to_dict` method on `peppy.Project` objects. This creates a **richer** dictionary representation of PEPs. -- Better sample parsing - -## [0.32.0] -- 2022-05-03 - -### Changed - -- Unify exceptions related to remote YAML file reading in `read_yaml` function. Now always a `RemoteYAMLError` is thrown. -- `Project` dict representation - -### Added - -- Support for PEP `2.1.0`, whichi includes support for no YAML configuration file component (CSV only), automatic sample merging if there are any duplicates in sample table index column, and new project attributes: `sample_table_index` and `subsample_table_index`. - -### Fixed - -- Project string representation; [Issue 368](https://github.com/pepkit/peppy/issues/368) - -## [0.31.2] -- 2021-11-04 -### Fixed -- Bug with setuptools 58 - -## [0.31.1] -- 2021-04-15 - -### Added - -- Support for remote URL config files - -### Fixed - -- Error when accessing `Project.subsample_table` property when no subsample tables were defined - -## [0.31.0] - 2020-10-07 - -### Added - -- `to_dict` method in `Sample` class that can include or exclude `Project` reference - -## [0.30.3] - 2020-09-22 - -### Changed - -- If there's just one `subsample_table` specified, `Project.subsample_table` property will return an object of `pandas.DataFrame` class rather than a `list` of ones - -### Fixed - -- `TypeError` when `subsample_table` is set to `null` - -## [0.30.2] - 2020-08-06 - -### Added - -- Support for multiple subsample tables -- License file to the package source distribution - -## [0.30.1] - 2020-05-26 - -### Changed - -- Package authors list - -## [0.30.0] - 2020-05-26 - -**This version introduced backwards-incompatible changes.** - -### Added - -- attribute duplication functionality -- config importing functionality -- attribute removal functionality -- possibility to define multi-attribute rules in attribute implication - -### Changed - -- Project configuration file to follow [PEP2.0.0](http://pep.databio.org/en/2.0.0/specification/) specification. Browse the specification for changes related to config format -- Do not require `sample_name` attribute in the sample table - -## [0.22.3] - 2019-12-13 - -### Changed - -- Remove `is_command_callable` from `utils` module; instead, refer to [`ubiquerg`](https://pypi.org/project/ubiquerg/). -- It's now exceptional (rather than just a warning) for a sample table file to be missing a valid name column. - -### Fixed - -- Empty columns in subsample tables are treated just as empty columns in sample tables (respective attributes are not included rather than populated with `nan`) - -## [0.22.2] - 2019-06-20 - -### Changed - -- Remove `ngstk` requirement. - -## [0.22.1] - 2019-06-19 - -### Changed - -- Prohibit storing reference to full `Project` object on a `Sample`. - -## [0.22.0] -- (2019-06-06) - -### Changed - -- Deprecate `Project` `constants` in favor of `constant_attributes.` -- Improved `Project` text representation for interactive/terminal display (`__repr__`): [Issue 296](https://github.com/pepkit/peppy/issues/296) - -### Fixed - -- Properly use `constant_attributes` if present from subproject. [Issue 292](https://github.com/pepkit/peppy/issues/292) -- Fixed a bug with subproject activation paths -- Revert deprecation of `sample_name` to `name`; so `sample_name` is again approved. - -## [0.21.0] -- (2019-05-02) - -### Added - -- Support for Snakemake projects (particularly `SnakeProject`) -- Hook for `get_arg_string` on `Project` to omit some pipeline options/arguments from the returned argument string -- `sample_table` and `subsample_table` functions, providing a functional syntax for requesting the respective attribute values from a `Project` -- Hook on `merge_sample` for specifying name of subannotation column that stores name for each sample - -### Changed - -- Improved messaging: ["Unmatched regex-like"](https://github.com/pepkit/peppy/issues/223), ["Missing and/or empty attribute(s)"](https://github.com/pepkit/peppy/issues/282) -- On `Project`, `sheet` is deprecated in favor of `sample_table`. -- On `Project`, `sample_subannotation` is deprecated in favor of `subsample_table`. -- On `Sample`, reference to `sample_name` is deprecated in favor of simply `name`. - -## [0.20.0] -- (2019-04-17) - -### Added - -- `subsample_table` on a `Project` gives the table of sample subannotation / "units" if applicable. - -### Changed - -- Add `attribute` parameter to `fetch_samples` function to enable more general applicability. - Additionally, the attribute value matching is more strict now -- requires perfect match. -- Remove Python 3.4 support. -- Use `attmap` for implementation of attribute-style access into a key-value collection. -- Deprecate `sample_annotation` and `sample_subannotation` in favor of `sample_table` and `subsample_table`, respectively. - -## [0.19.0] -- (2019-01-16) - -### New - -- Added `activate_subproject` method to `Project`. - -### Changed - -- `Project` construction no longer requires sample annotations sheet. -- Specification of assembly/ies in project config outside of `implied_attributes` is deprecated. -- `implied_columns` and `derived_columns` are deprecated in favor of `implied_attributes` and `derived_attributes`. - -## [0.18.2] -- (2018-07-23) - -### Fixed - -- Made requirements more lenient to allow for newer versions of required packages. - -## [0.18.1] -- (2018-06-29) - -### Fixed - -- Fixed a bug that would cause sample attributes to lose order. -- Fixed a bug that caused an install error with newer `numexpr` versions. - -### New - -- Project names are now inferred with the `infer_name` function, which uses a priority lookup to infer the project name: First, the `name` attribute in the `yaml` file; otherwise, the containing folder unless it is `metadata`, in which case, it's the parent of that folder. -- Add `get_sample` and `get_samples` functions to `Project` objects. -- Add `get_subsamples`and `get_subsample` functions to both `Project` and `Sample` objects. -- Subsamples are now objects that can be retrieved individually by name, with the `subsample_name` as the index column header. - -## [0.17.2] -- (2018-04-03) - -## Fixed - -- Ensure data source path relativity is with respect to project config file's folder. - -## [0.17.1] -- (2017-12-21) - -### Changed - -- Version bump for first pypi release -- Fixed bug with packaging for pypi release - -## [0.9.0] -- (2017-12-21) - -### New - -- Separation completed, `peppy` package is now standalone -- `looper` can now rely on `peppy` - -### Changed - -- `merge_table` renamed to `sample_subannotation` -- setup changed for compatibility with PyPI - -## [0.8.1] -- (2017-11-16) - -### New - -- Separated from looper into its own python package (originally called `pep`) - -## [0.7.2] -- (2017-11-16) - -### Fixed - -- Correctly count successful command submissions when not using `--dry-run`. - -## [0.7.1] -- (2017-11-15) - -### Fixed - -- No longer falsely display that there's a submission failure. -- Allow non-string values to be unquoted in the `pipeline_args` section. - -## [0.7.0] -- (2017-11-15) - -### New - -- Add `--lump` and `--lumpn` options -- Catch submission errors from cluster resource managers -- Implied columns can now be derived -- Now protocols can be specified on the command-line `--include-protocols` -- Add rudimentary figure summaries -- Allow wildcard protocol_mapping for catch-all pipeline assignment -- New sample_subtypes section in pipeline_interface - -### Changed - -- Sample child classes are now defined explicitly in the pipeline interface. Previously, they were guessed based on presence of a class extending Sample in a pipeline script. -- Changed 'library' key sample attribute to 'protocol' -- Improve user messages -- Simplifies command-line help display - -## [0.6.0] -- (2017-07-21) - -### New - -- Add support for implied_column section of the project config file -- Add support for Python 3 -- Merges pipeline interface and protocol mappings. This means we now allow direct pointers to `pipeline_interface.yaml` files, increasing flexibility, so this relaxes the specified folder structure that was previously used for `pipelines_dir` (with `config` subfolder). -- Allow URLs as paths to sample sheets. -- Allow tsv format for sample sheets. -- Checks that the path to a pipeline actually exists before writing the submission script. - -### Changed - -- Changed LOOPERENV environment variable to PEPENV, generalizing it to generic models -- Changed name of `pipelines_dir` to `pipeline_interfaces` (but maintained backwards compatibility for now). -- Changed name of `run` column to `toggle`, since `run` can also refer to a sequencing run. -- Relaxes many constraints (like resources sections, pipelines_dir columns), making project configuration files useful outside looper. This moves us closer to dividing models from looper, and improves flexibility. -- Various small bug fixes and dev improvements. -- Require `setuptools` for installation, and `pandas 0.20.2`. If `numexpr` is installed, version `2.6.2` is required. -- Allows tilde in `pipeline_interfaces` - -## [0.5.0] -- (2017-03-01) - -### New - -- Add new looper version tracking, with `--version` and `-V` options and printing version at runtime -- Add support for asterisks in file paths -- Add support for multiple pipeline directories in priority order -- Revamp of messages make more intuitive output -- Colorize output -- Complete rehaul of logging and test infrastructure, using logging and pytest packages - -### Changed - -- Removes pipelines_dir requirement for models, making it useful outside looper -- Small bug fixes related to `all_input_files` and `required_input_files` attributes -- More robust installation and more explicit requirement of Python 2.7 - -## [0.4.0] -- (2017-01-12) - -### New - -- New command-line interface (CLI) based on sub-commands -- New subcommand (`looper summarize`) replacing the `summarizePipelineStats.R` script -- New subcommand (`looper check`) replacing the `flagCheck.sh` script -- New command (`looper destroy`) to remove all output of a project -- New command (`looper clean`) to remove intermediate files of a project flagged for deletion -- Support for portable and pipeline-independent allocation of computing resources with Looperenv. - -### Changed - -- Removed requirement to have `pipelines` repository installed in order to extend base Sample objects -- Maintenance of sample attributes as provided by user by means of reading them in as strings (to be improved further -- Improved serialization of Sample objects diff --git a/docs/contributing.md b/docs/contributing.md deleted file mode 100644 index 41916b5b..00000000 --- a/docs/contributing.md +++ /dev/null @@ -1,13 +0,0 @@ -# Contributing - -Pull requests are welcome. - -After adding tests in `tests` for a new feature or a bug fix, please run the test suite. -To do so, the only additional dependencies (beyond those needed for the package itself) can be -installed with: - -```{bash} -pip install -r requirements/requirements-dev.txt -``` - -Once those are installed, the tests can be run with `pytest`. Alternatively, `python setup.py test` can be used. diff --git a/docs/hello-world.md b/docs/hello-world.md deleted file mode 100644 index 9ba73230..00000000 --- a/docs/hello-world.md +++ /dev/null @@ -1,48 +0,0 @@ -# Installation and Hello, World! - -## Installation - -With `pip` you can install the [latest release from PyPI](https://pypi.python.org/pypi/peppy): - -```bash -pip install --user peppy -``` - -Update `peppy` with `pip`: - -```bash -pip install --user --upgrade peppy -``` - -Releases and development versions may also be installed from the [GitHub releases](https://github.com/pepkit/peppy/releases): - -```bash -pip install --user https://github.com/pepkit/peppy/zipball/master -``` - - -## Hello world! - -Now, to test `peppy`, let's grab an clone an example project that follows PEP format. -We've produced a bunch of example PEPs in the [`example_peps` repository](https://github.com/pepkit/example_peps). -Let's clone it: - -```bash -git clone https://github.com/pepkit/example_peps.git -``` - -Then, from within the `example_peps` folder, enter the following commands in a Python session: - -```python -import peppy - -project = peppy.Project("example_basic/project_config.yaml") # instantiate in-memory Project representation -samples = project.samples # grab the list of Sample objects defined in this Project - -# Find the input file for the first sample in the project -samples[0]["file"] -``` - -That's it! You've got `peppy` running on an example project. -Now you can play around with project metadata from within python. -There are lots of other ways to initialize a project, which we will in the next section. diff --git a/docs/img/format_convert.svg b/docs/img/format_convert.svg deleted file mode 100644 index bc08ce4e..00000000 --- a/docs/img/format_convert.svg +++ /dev/null @@ -1,1275 +0,0 @@ - - - -PEPpeppy.ProjectDataFramesamplesYAMLInput formatsOutput formatsCSVURLCSVDataFramesamplesYAMLPEP diff --git a/docs/initialize.md b/docs/initialize.md deleted file mode 100644 index f8d74632..00000000 --- a/docs/initialize.md +++ /dev/null @@ -1,91 +0,0 @@ -# How to initiate peppy using different methods - -The primary use case of `peppy` is to create a `peppy.Project` object, which will give you an API for interacting with your project and sample metadata. There are multiple ways to instantiate a `peppy.Project`. -The most common is to use a configuration file; however, you can also use a `CSV` file (sample sheet), or a sample `YAML` file (sample sheet), or use Python objects directly, such as a `pandas` DataFrame, or a Python `dict`. - - -
- -
peppy can read from and produce various metadata formats
-
- - -## 1. From PEP configuration file - -```python -import peppy -project = peppy.Project.from_pep_config("path/to/project/config.yaml") -``` - -## 2. FROM `CSV` file (sample sheet) - -```python -import peppy -project = peppy.Project.from_pep_config("path/to/project/sample_sheet.csv") -``` - -You can also instantiate directly from a URL to a CSV file: - -```python -import peppy -project = peppy.Project("https://raw.githubusercontent.com/pepkit/example_peps/master/example_basic/sample_table.csv") -``` - - -## 3. From `YAML` sample sheet - -```python -import peppy - -project = peppy.Project.from_sample_yaml("path/to/project/sample_sheet.yaml") -``` - - -## 4. From a `pandas` DataFrame - -```python -import pandas as pd -import peppy -df = pd.read_csv("path/to/project/sample_sheet.csv") -project = peppy.Project.from_pandas(df) -``` - -## 5. From a `peppy`-generated `dict` - -Store a `peppy.Project` object as a dict using `prj.to_dict()`. Then, load it with `Project.from_dict()`: - -```python -import peppy - -project = peppy.Project("https://raw.githubusercontent.com/pepkit/example_peps/master/example_basic/sample_table.csv") -project_dict = project.to_dict(extended=True) -project_copy = peppy.Project.from_dict(project_dict) - -# now you can check if this project is the same as the original project -print(project_copy == project) -``` - -Or, you could generate an equivalent dictionary in some other way: - - -```python -import peppy -project = peppy.Project.from_dict( - {'_config': {'description': None, - 'name': 'example_basic', - 'pep_version': '2.0.0', - 'sample_table': 'sample_table.csv',}, - '_sample_dict': [{'organism': 'pig', 'sample_name': 'pig_0h', 'time': '0'}, - {'organism': 'pig', 'sample_name': 'pig_1h', 'time': '1'}, - {'organism': 'frog', 'sample_name': 'frog_0h', 'time': '0'}, - {'organism': 'frog', 'sample_name': 'frog_1h', 'time': '1'}], - '_subsample_list': [[{'read1': 'frog1a_data.txt', - 'read2': 'frog1a_data2.txt', - 'sample_name': 'frog_0h'}, - {'read1': 'frog1b_data.txt', - 'read2': 'frog1b_data2.txt', - 'sample_name': 'pig_0h'}, - {'read1': 'frog1c_data.txt', - 'read2': 'frog1b_data2.txt', - 'sample_name': 'pig_0h'}]]}) -``` diff --git a/docs/models.md b/docs/models.md deleted file mode 100644 index 5f4628d5..00000000 --- a/docs/models.md +++ /dev/null @@ -1,199 +0,0 @@ -# Project models - -`peppy` models projects and samples as Python objects. - -```python -import peppy - -my_project = peppy.Project("path/to/project_config.yaml") -my_samples = my_project.samples -``` - -Once you have your project and samples in your Python session, the possibilities are endless. For example, one way we use these objects is for post-pipeline processing. After we use looper to run each sample through its pipeline, we can load the project and it sample objects into an analysis session, where we do comparisons across samples. - -**Exploration:** - -To interact with the various `models` and become acquainted with their -features and behavior, there is a lightweight module that provides small -working versions of a couple of the core objects. Specifically, from -within the `tests` directory, the Python code in the `tests.interactive` -module can be copied and pasted into an interpreter. This provides a -`Project` instance called `proj` and a `PipelineInterface` instance -called `pi`. Additionally, this provides logging information in great detail, -affording visibility into some what's happening as the `models` are created -and used. - - -## Extending sample objects - -By default we use *generic* models (see [API docs](autodoc_build/peppy.md) for more) that can be used in many contexts -via Python import, or by object serialization and deserialization via YAML. - -Since these models provide useful methods to store, update, and read attributes in the objects created from them -(most notably a *sample* - `Sample` object), a frequent use case is during the run of a pipeline. -A pipeline can create a more custom `Sample` model, adding or altering properties and methods. - -### Use case - -You have several samples, of different experiment types, -each yielding different varieties of data and files. For each sample of a given -experiment type that uses a particular pipeline, the set of file path types -that are relevant for the initial pipeline processing or for downstream -analysis is known. For instance, a peak file with a certain genomic location -will likely be relevant for a ChIP-seq sample, while a transcript -abundance/quantification file will probably be used when working with a RNA-seq -sample. This common situation, in which one or more file types are specific -to a pipeline and analysis both benefits from and is amenable to a bespoke -`Sample` *type*. - -Rather than working with a base `Sample` instance and -repeatedly specifying paths to relevant files, those locations can be provided -just once, stored in an instance of the custom `Sample` *type*, and later -used or modified as needed by referencing a named attribute on the object. -This approach can dramatically reduce the number of times that a full filepath -must be precisely typed, improving pipeline readability and accuracy. - -### Mechanics - -It's the specification of *both an experiment or data type* ("library" or -"protocol") *and a pipeline with which to process that input type* that -`looper` uses to determine which type of `Sample` object(s) to create for -pipeline processing and analysis (i.e., which `Sample` extension to use). -There's a pair of symmetric reasons for this--the relationship between input -type and pipeline can be one-to-many, in both directions. That is, it's -possible for a single pipeline to process more than one input type, and a -single input type may be processed by more than one pipeline. - -There are a few different `Sample` extension scenarios. Most basic is the -one in which an extension, or *subtype*, is neither defined nor needed--the -pipeline author does not provide one, and users do not request one. Almost -equally effortless on the user side is the case in which a pipeline author -intends for a single subtype to be used with her pipeline. In this situation, -the pipeline author simply implements the subtype within the pipeline module, -and nothing further is required--of the pipeline author or of a user! The -`Sample` subtype will be found within the pipeline module, and the inference -will be made that it's intended to be used as the fundamental representation -of a sample within that pipeline. - -If a pipeline author extends the base`Sample` type in the pipeline module, it's -likely that the pipeline's proper functionality depends on the use of that subtype. -In some cases, though, it may be desirable to use the base `Sample` type even if -the pipeline author has provided a more customized version with the pipeline. -To favor the base `Sample` over the tailored one created by a pipeline author, -the user may simply set `sample_subtypes` to `null` in an altered version of the pipeline -interface, either for all types of inpute to that pipeline, or just a subset. - - -```python -# atacseq.py - -import os -from peppy import Sample - -class ATACseqSample(Sample): - """ - Class to model ATAC-seq samples based on the generic Sample class. - - :param pandas.Series series: data defining the Sample - """ - - def __init__(self, series): - if not isinstance(series, pd.Series): - raise TypeError("Provided object is not a pandas Series.") - super(ATACseqSample, self).__init__(series) - self.make_sample_dirs() - - def set_file_paths(self, project=None): - """Sets the paths of all files for this sample.""" - # Inherit paths from Sample by running Sample's set_file_paths() - super(ATACseqSample, self).set_file_paths(project) - - self.fastqc = os.path.join(self.paths.sample_root, self.name + ".fastqc.zip") - self.trimlog = os.path.join(self.paths.sample_root, self.name + ".trimlog.txt") - self.fastq = os.path.join(self.paths.sample_root, self.name + ".fastq") - self.trimmed = os.path.join(self.paths.sample_root, self.name + ".trimmed.fastq") - self.mapped = os.path.join(self.paths.sample_root, self.name + ".bowtie2.bam") - self.peaks = os.path.join(self.paths.sample_root, self.name + "_peaks.bed") -``` - - -To leverage the power of a `Sample` subtype, the relevant model is the -`PipelineInterface`. For each pipeline defined in the `pipelines` section -of `pipeline_interface.yaml`, there's accommodation for a `sample_subtypes` -subsection to communicate this information. The value for each such key may be -either a single string or a collection of key-value pairs. If it's a single -string, the value is the name of the class that's to be used as the template -for each `Sample` object created for processing by that pipeline. If instead -it's a collection of key-value pairs, the keys should be names of input data -types (as in the `protocol_mapping`), and each value is the name of the class -that should be used for each sample object of the corresponding key*for that -pipeline*. This underscores that it's the ***combination** of a pipeline and input -type* that determines the subtype. - - -```yaml -# Content of pipeline_interface.yaml - -protocol_mapping: - ATAC: atacseq.py - -pipelines: - atacseq.py: - ... - ... - sample_subtypes: ATACseqSample - ... - ... - ... - ... -``` - - -If a pipeline author provides more than one subtype, the `sample_subtypes` -section is needed to select from among them once it's time to create -`Sample` objects. If multiple options are available, and the -`sample_subtypes` section fails to clarify the decision, the base/generic -type will be used. The responsibility for supplying the `sample_subtypes` -section, as is true for the rest of the pipeline interface, therefore rests -primarily with the pipeline developer. It is possible for an end user to -modify these settings, though. - -Since the mechanism for subtype detection is `inspect`-ion of each of the -pipeline module's classes and retention of those which satisfy a subclass -status check against `Sample`, it's possible for pipeline authors to -implement a class hierarchy with multi-hop inheritance relationships. For -example, consider the addition of the following class to the previous example -of a pipeline module `atacseq.py`: - - -```python -class DNaseSample(ATACseqSample): - ... -``` - -In this case there are now two `Sample` subtypes available, and more -generally, there will necessarily be multiple subtypes available in any -pipeline module that uses a subtype scheme with multiple, serial inheritance -steps. In such cases, the pipeline interface should include an unambiguous -`sample_subtypes` section. - - -```yaml -# Content of pipeline_interface.yaml - -protocol_mapping: - ATAC: atacseq.py - DNase: atacseq.py - -pipelines: - atacseq.py: - ... - ... - sample_subtypes: - ATAC: ATACseqSample - DNase: DNaseSample - ... - ... - ... - ... -``` diff --git a/docs/support.md b/docs/support.md deleted file mode 100644 index f185391d..00000000 --- a/docs/support.md +++ /dev/null @@ -1,4 +0,0 @@ -# Support - -Please use the issue tracker at GitHub to file bug reports or feature requests -on the [project's issues page](https://github.com/pepkit/peppy/issues). diff --git a/docs/templates/usage.template b/docs/templates/usage.template new file mode 100644 index 00000000..c7211be4 --- /dev/null +++ b/docs/templates/usage.template @@ -0,0 +1,6 @@ +# Usage reference + +pephubclient is a command line tool that can be used to interact with the PEPhub API. +It can be used to create, update, delete PEPs in the PEPhub database. + +Below are usage examples for the different commands that can be used with pephubclient. \ No newline at end of file diff --git a/docs/validating.md b/docs/validating.md deleted file mode 100644 index 82ba0c5c..00000000 --- a/docs/validating.md +++ /dev/null @@ -1,9 +0,0 @@ -# How to validate a PEP - -Starting with version `0.30.0`, peppy now includes a powerful validation framework. We provide a schema for the basic PEP specification, so you can validate that a PEP fills that spec. Then, you can also write an extended schema to validate a pep for a specific analysis. All of the PEP validation functionality is handled by a separate package called `eido`. You can read more in the eido documentation, including: - -- How to validate a PEP against the generic PEP format -- How to validate a PEP against a custom schema -- How to write your own custom schema - -See the [eido documentation](http://eido.databio.org/) for further detail. diff --git a/mkdocs.yml b/mkdocs.yml deleted file mode 100644 index c2989c63..00000000 --- a/mkdocs.yml +++ /dev/null @@ -1,31 +0,0 @@ -site_name: Peppy -site_url: http://peppy.databio.org/ -repo_url: http://github.com/pepkit/peppy -pypi_name: peppy - -nav: - - Getting started: - - Introduction: README.md - - Installing and Hello World: hello-world.md - - How-to Guides: - - How to initialize a Project: initialize.md - - How to use peppy: tutorial.md - - How to use subsample table: feature4_subsample_table.md - - How to use amendments: feature5_amend.md - - How to use append sample modifier: feature1_append.md - - How to use imply sample modifier: feature2_imply.md - - How to validate a PEP: validating.md - - Reference: - - API: autodoc_build/peppy.md - - Support: support.md - - Contributing: contributing.md - - Changelog: changelog.md - -theme: databio - -plugins: - - databio: - autodoc_build: "docs/autodoc_build" - autodoc_package: "peppy" - no_top_level: true - - search diff --git a/peppy/_version.py b/peppy/_version.py index fe4ef03f..f4759ace 100644 --- a/peppy/_version.py +++ b/peppy/_version.py @@ -1 +1 @@ -__version__ = "0.40.8" +__version__ = "0.50.0a1" diff --git a/peppy/cli.py b/peppy/cli.py new file mode 100644 index 00000000..3ff8980c --- /dev/null +++ b/peppy/cli.py @@ -0,0 +1,33 @@ +import typer + +from ._version import __version__ +from .const import PKG_NAME +from .eido.cli import app as eido_app +from .pephubclient.cli import app as phc_app + + +def version_callback(value: bool): + if value: + typer.echo(f"{PKG_NAME} version: {__version__}") + raise typer.Exit() + + +app = typer.Typer(help=f"{PKG_NAME} - Portable Encapsulated Projects toolkit") + + +@app.callback() +def common( + ctx: typer.Context, + version: bool = typer.Option( + None, "--version", "-v", callback=version_callback, help="package version" + ), +): + pass + + +app.add_typer(phc_app, name="phc", help="Client for the PEPhub server") +app.add_typer(eido_app, name="eido", help="PEP validation, conversion, and inspection") + + +def main(): + app(prog_name=PKG_NAME) diff --git a/peppy/const.py b/peppy/const.py index 6295806f..a6dd02b7 100644 --- a/peppy/const.py +++ b/peppy/const.py @@ -123,3 +123,13 @@ SUBSAMPLE_RAW_LIST_KEY = "_subsample_list" __all__ = PROJECT_CONSTANTS + SAMPLE_CONSTANTS + OTHER_CONSTANTS + + +SCHEMA_SECTIONS = ["PROP_KEY", "TANGIBLE_KEY", "SIZING_KEY"] + +SCHEMA_VALIDAION_KEYS = [ + "MISSING_KEY", + "REQUIRED_INPUTS_KEY", + "ALL_INPUTS_KEY", + "INPUT_FILE_SIZE_KEY", +] diff --git a/peppy/eido/__init__.py b/peppy/eido/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/peppy/eido/__init__.py @@ -0,0 +1 @@ + diff --git a/peppy/eido/cli.py b/peppy/eido/cli.py new file mode 100644 index 00000000..0fc9dce1 --- /dev/null +++ b/peppy/eido/cli.py @@ -0,0 +1,355 @@ +import sys +from logging import CRITICAL, DEBUG, ERROR, INFO, WARN, Logger +from typing import Dict, List, Optional + +import typer +from logmuse import init_logger + +from ..const import PKG_NAME, SAMPLE_NAME_ATTR +from ..project import Project +from .const import CONVERT_CMD, INSPECT_CMD, LOGGING_LEVEL, SUBPARSER_MSGS, VALIDATE_CMD +from .conversion import ( + convert_project, + get_available_pep_filters, + pep_conversion_plugins, +) +from .exceptions import EidoFilterError, EidoValidationError +from .inspection import inspect_project +from .validation import validate_config, validate_project, validate_sample + +LEVEL_BY_VERBOSITY = [ERROR, CRITICAL, WARN, INFO, DEBUG] + +app = typer.Typer() + + +def _configure_logging( + verbosity: Optional[int], + logging_level: Optional[str], + dbg: bool, +) -> str: + """Mimic old verbosity / logging-level behavior.""" + if dbg: + level = logging_level or DEBUG + elif verbosity is not None: + # Verbosity-framed specification trumps logging_level. + level = LEVEL_BY_VERBOSITY[verbosity] + else: + level = LOGGING_LEVEL + return level + + +def _parse_filter_args_str(input: Optional[List[str]]) -> Dict[str, str]: + """ + Parse user input specification. + + :param Iterable[Iterable[str]] input: user command line input, + formatted as follows: [[arg=txt, arg1=txt]] + :return dict: mapping of keys, which are input names and values + """ + lst = [] + for i in input or []: + lst.extend(i) + return ( + {x.split("=")[0]: x.split("=")[1] for x in lst if "=" in x} + if lst is not None + else lst + ) + + +def print_error_summary( + errors_by_type: Dict[str, List[Dict[str, str]]], _LOGGER: Logger +): + """Print a summary of errors, organized by error type""" + n_error_types = len(errors_by_type) + _LOGGER.error(f"Found {n_error_types} types of error:") + for err_type, items in errors_by_type.items(): + n = len(items) + msg = f" - {err_type}: ({n} samples) " + if n < 50: + msg += ", ".join(x["sample_name"] for x in items) + _LOGGER.error(msg) + + if len(errors_by_type) > 1: + final_msg = f"Validation unsuccessful. {len(errors_by_type)} error types found." + else: + final_msg = f"Validation unsuccessful. {len(errors_by_type)} error type found." + + _LOGGER.error(final_msg) + + +@app.callback() +def common( + ctx: typer.Context, + verbosity: Optional[int] = typer.Option( + None, + "--verbosity", + min=0, + max=len(LEVEL_BY_VERBOSITY) - 1, + help=f"Choose level of verbosity (default: {None})", + ), + logging_level: Optional[str] = typer.Option( + None, + "--logging-level", + help="logging level", + ), + dbg: bool = typer.Option( + False, + "--dbg", + help=f"Turn on debug mode (default: {False})", + ), +): + ctx.obj = { + "verbosity": verbosity, + "logging_level": logging_level, + "dbg": dbg, + } + + logger_level = _configure_logging(verbosity, logging_level, dbg) + logger_kwargs = {"level": logger_level, "devmode": dbg} + + global _LOGGER + _LOGGER = init_logger(name=PKG_NAME, **logger_kwargs) + + +@app.command(name=CONVERT_CMD, help=SUBPARSER_MSGS[CONVERT_CMD]) +def convert( + ctx: typer.Context, + pep: Optional[str] = typer.Argument( + None, + metavar="PEP", + help="Path to a PEP configuration file in yaml format.", + ), + st_index: Optional[str] = typer.Option( + None, "--st-index", help="Sample table index to use" + ), + sst_index: Optional[str] = typer.Option( + None, "--sst-index", help="Subsample table index to use" + ), + amendments: Optional[List[str]] = typer.Option( + None, + "--amendments", + help="Names of the amendments to activate.", + ), + format_: str = typer.Option( + "yaml", + "-f", + "--format", + help="Output format (name of filter; use -l to see available).", + ), + sample_name: Optional[List[str]] = typer.Option( + None, + "-n", + "--sample-name", + help="Name of the samples to inspect.", + ), + args: Optional[List[str]] = typer.Option( + None, + "-a", + "--args", + help=( + "Provide arguments to the filter function " "(e.g. arg1=val1 arg2=val2)." + ), + ), + list_filters: bool = typer.Option( + False, + "-l", + "--list", + help="List available filters.", + ), + describe: bool = typer.Option( + False, + "-d", + "--describe", + help="Show description for a given filter.", + ), + paths_: Optional[List[str]] = typer.Option( + None, + "-p", + "--paths", + help="Paths to dump conversion result as key=value pairs.", + ), +): + filters = get_available_pep_filters() + if list_filters: + _LOGGER.info("Available filters:") + if len(filters) < 1: + _LOGGER.info("No available filters") + for filter_name in filters: + _LOGGER.info(f" - {filter_name}") + sys.exit(0) + if describe: + if format_ not in filters: + raise EidoFilterError( + f"'{format_}' filter not found. Available filters: {', '.join(filters)}" + ) + filter_functions_by_name = pep_conversion_plugins() + print(filter_functions_by_name[format_].__doc__) + sys.exit(0) + if pep is None: + typer.echo(ctx.get_help(), err=True) + _LOGGER.info("The following arguments are required: PEP") + sys.exit(1) + + if paths_: + paths = {y[0]: y[1] for y in [x.split("=") for x in paths_]} + else: + paths = None + + p = Project( + pep, + sample_table_index=st_index, + subsample_table_index=sst_index, + amendments=amendments, + ) + + plugin_kwargs = _parse_filter_args_str(args) + + # append paths + plugin_kwargs["paths"] = paths + + convert_project(p, format_, plugin_kwargs) + _LOGGER.info("Conversion successful") + sys.exit(0) + + +@app.command(name=VALIDATE_CMD, help=SUBPARSER_MSGS[VALIDATE_CMD]) +def validate( + pep: str = typer.Argument( + None, + metavar="PEP", + help="Path to a PEP configuration file in yaml format.", + ), + schema: str = typer.Option( + None, + "-s", + "--schema", + metavar="S", + help="Path to a PEP schema file in yaml format.", + ), + st_index: Optional[str] = typer.Option( + None, + "--st-index", + help=( + f"Sample table index to use; samples are identified by " + f"'{SAMPLE_NAME_ATTR}' by default." + ), + ), + sst_index: Optional[str] = typer.Option( + None, + "--sst-index", + help=( + f"Subsample table index to use; samples are identified by " + f"'{SAMPLE_NAME_ATTR}' by default." + ), + ), + amendments: Optional[List[str]] = typer.Option( + None, + "--amendments", + help="Names of the amendments to activate.", + ), + sample_name: Optional[str] = typer.Option( + None, + "-n", + "--sample-name", + metavar="S", + help=( + "Name or index of the sample to validate. " + "Only this sample will be validated." + ), + ), + just_config: bool = typer.Option( + False, + "-c", + "--just-config", + help="Whether samples should be excluded from the validation.", + ), +): + if sample_name and just_config: + raise typer.BadParameter( + "Use only one of --sample-name or --just-config for 'validate'." + ) + p = Project( + pep, + sample_table_index=st_index, + subsample_table_index=sst_index, + amendments=amendments, + ) + if sample_name: + try: + sample_name = int(sample_name) + except ValueError: + pass + _LOGGER.debug( + f"Comparing Sample ('{pep}') in Project ('{pep}') " + f"against a schema: {schema}" + ) + validator = validate_sample + arguments = [p, sample_name, schema] + elif just_config: + _LOGGER.debug(f"Comparing Project ('{pep}') against a schema: {schema}") + + validator = validate_config + arguments = [p, schema] + else: + _LOGGER.debug(f"Comparing Project ('{pep}') against a schema: {schema}") + + validator = validate_project + arguments = [p, schema] + try: + validator(*arguments) + except EidoValidationError as e: + print_error_summary(e.errors_by_type, _LOGGER) + sys.exit(1) + _LOGGER.info("Validation successful") + sys.exit(0) + + +@app.command(name=INSPECT_CMD, help=SUBPARSER_MSGS[INSPECT_CMD]) +def inspect( + pep: str = typer.Argument( + None, + metavar="PEP", + help="Path to a PEP configuration file in yaml format.", + ), + st_index: Optional[str] = typer.Option( + None, + "--st-index", + help=( + f"Sample table index to use; samples are identified by " + f"'{SAMPLE_NAME_ATTR}' by default." + ), + ), + sst_index: Optional[str] = typer.Option( + None, + "--sst-index", + help=( + f"Subsample table index to use; samples are identified by " + f"'{SAMPLE_NAME_ATTR}' by default." + ), + ), + amendments: Optional[List[str]] = typer.Option( + None, + "--amendments", + help="Names of the amendments to activate.", + ), + sample_name: Optional[List[str]] = typer.Option( + None, + "-n", + "--sample-name", + metavar="SN", + help="Name of the samples to inspect.", + ), + attr_limit: int = typer.Option( + 10, + "-l", + "--attr-limit", + help="Number of sample attributes to display.", + ), +): + p = Project( + pep, + sample_table_index=st_index, + subsample_table_index=sst_index, + amendments=amendments, + ) + inspect_project(p, sample_name, attr_limit) diff --git a/peppy/eido/const.py b/peppy/eido/const.py new file mode 100644 index 00000000..9f3f1919 --- /dev/null +++ b/peppy/eido/const.py @@ -0,0 +1,50 @@ +""" +Constant variables for eido package +""" + +LOGGING_LEVEL = "INFO" +PKG_NAME = "eido" +INSPECT_CMD = "inspect" +VALIDATE_CMD = "validate" +CONVERT_CMD = "convert" +FILTERS_CMD = "filters" +SUBPARSER_MSGS = { + VALIDATE_CMD: "Validate a PEP or its components", + INSPECT_CMD: "Inspect a PEP", + CONVERT_CMD: "Convert PEP format using filters", +} +PROP_KEY = "properties" + +SAMPLES_KEY = "samples" + +TANGIBLE_KEY = "tangible" +SIZING_KEY = "sizing" + +# sample schema input validation key names, these values are required by looper +# to refer to the dict values +MISSING_KEY = "missing" +REQUIRED_INPUTS_KEY = "required_inputs" +ALL_INPUTS_KEY = "all_inputs" +INPUT_FILE_SIZE_KEY = "input_file_size" + +# groups of constants +GENERAL = [ + "LOGGING_LEVEL", + "PKG_NAME", + "INSPECT_CMD", + "VALIDATE_CMD", + "CONVERT_CMD", + "FILTERS_CMD", + "SUBPARSER_MSGS", +] + +SCHEMA_SECTIONS = ["PROP_KEY", "TANGIBLE_KEY", "SIZING_KEY"] + +SCHEMA_VALIDATION_KEYS = [ + "MISSING_KEY", + "REQUIRED_INPUTS_KEY", + "ALL_INPUTS_KEY", + "INPUT_FILE_SIZE_KEY", +] + +__all__ = GENERAL + SCHEMA_SECTIONS + SCHEMA_VALIDATION_KEYS diff --git a/peppy/eido/conversion.py b/peppy/eido/conversion.py new file mode 100644 index 00000000..817ccfd5 --- /dev/null +++ b/peppy/eido/conversion.py @@ -0,0 +1,142 @@ +import sys + +if sys.version_info < (3, 10): + from importlib_metadata import entry_points +else: + from importlib.metadata import entry_points + +import inspect +import os +from logging import getLogger +from typing import Callable, Dict, List, NoReturn, Optional + +from ..project import Project +from .exceptions import EidoFilterError + +_LOGGER = getLogger(__name__) + + +def pep_conversion_plugins() -> Dict[str, Callable]: + """Plugins registered by entry points in the current Python env. + + Returns: + Dict which keys are names of all possible hooks and values are dicts + mapping registered functions names to their values + + Raises: + EidoFilterError: If any of the filters has an invalid signature + """ + plugins = {} + for ep in entry_points(group="pep.filters"): + plugin_fun = ep.load() + if len(list(inspect.signature(plugin_fun).parameters)) != 2: + raise EidoFilterError( + f"Invalid filter plugin signature: {ep.name}. " + f"Filter functions must take 2 arguments: peppy.Project and **kwargs" + ) + plugins[ep.name] = plugin_fun + return plugins + + +def convert_project( + prj: Project, target_format: str, plugin_kwargs: Optional[Dict] = None +) -> Dict[str, str]: + """Convert a `peppy.Project` object to a selected format. + + Args: + prj: A Project object to convert + target_format: The format to convert the Project object to + plugin_kwargs: Kwargs to pass to the plugin function + + Returns: + Dictionary with conversion results + + Raises: + EidoFilterError: If the requested filter is not defined + """ + return run_filter(prj, target_format, plugin_kwargs=plugin_kwargs or dict()) + + +def run_filter( + prj: Project, + filter_name: str, + verbose: bool = True, + plugin_kwargs: Optional[Dict] = None, +) -> Dict[str, str]: + """Run a selected filter on a peppy.Project object. + + Args: + prj: A Project to run filter on + filter_name: Name of the filter to run + verbose: Whether to print output to stdout + plugin_kwargs: Kwargs to pass to the plugin function + + Returns: + Dictionary with conversion results + + Raises: + EidoFilterError: If the requested filter is not defined + """ + # convert to empty dictionary if no plugin_kwargs are passed + plugin_kwargs = plugin_kwargs or dict() + + # get necessary objects + installed_plugins = pep_conversion_plugins() + installed_plugin_names = list(installed_plugins.keys()) + paths = plugin_kwargs.get("paths") + env = plugin_kwargs.get("env") + + # set environment + if env is not None: + for var in env: + os.environ[var] = env[var] + + # check for valid filter + if filter_name not in installed_plugin_names: + raise EidoFilterError( + f"Requested filter ({filter_name}) not found. " + f"Available: {', '.join(installed_plugin_names)}" + ) + _LOGGER.info(f"Running plugin {filter_name}") + func = installed_plugins[filter_name] + + # run filter + conv_result = func(prj, **plugin_kwargs) + + # if paths supplied, write to disk + if paths is not None: + # map conversion result to the + # specified path + for result_key in conv_result: + result_path = paths.get(result_key) + if result_path is None: + _LOGGER.warning( + f"Conversion plugin returned key that doesn't exist in specified paths: '{result_key}'." + ) + else: + # create path if it doesn't exist + if not os.path.exists(result_path) and os.path.isdir( + os.path.dirname(result_path) + ): + os.makedirs(os.path.dirname(result_path), exist_ok=True) + save_result(result_path, conv_result[result_key]) + + if verbose: + for result_key in conv_result: + sys.stdout.write(conv_result[result_key]) + + return conv_result + + +def save_result(result_path: str, content: str) -> NoReturn: + with open(result_path, "w") as f: + f.write(content) + + +def get_available_pep_filters() -> List[str]: + """Get a list of available target formats. + + Returns: + A list of available formats + """ + return list(pep_conversion_plugins().keys()) diff --git a/peppy/eido/conversion_plugins.py b/peppy/eido/conversion_plugins.py new file mode 100644 index 00000000..e4405543 --- /dev/null +++ b/peppy/eido/conversion_plugins.py @@ -0,0 +1,85 @@ +"""built-in PEP filters""" + +from typing import Dict + +from .output_formatters import MultilineOutputFormatter + + +def basic_pep_filter(p, **kwargs) -> Dict[str, str]: + """ + Basic PEP filter, that does not convert the Project object. + + This filter can save the PEP representation to file, if kwargs include `path`. + + :param peppy.Project p: a Project to run filter on + """ + return {"project": str(p)} + + +def yaml_samples_pep_filter(p, **kwargs) -> Dict[str, str]: + """ + YAML samples PEP filter, that returns only Sample object representations. + + This filter can save the YAML to file, if kwargs include `path`. + + :param peppy.Project p: a Project to run filter on + """ + from yaml import dump + + samples_yaml = [] + for s in p.samples: + samples_yaml.append(s.to_dict()) + + return {"samples": dump(samples_yaml, default_flow_style=False)} + + +def yaml_pep_filter(p, **kwargs) -> Dict[str, str]: + """ + YAML PEP filter, that returns Project object representation. + + This filter can save the YAML to file, if kwargs include `path`. + + :param peppy.Project p: a Project to run filter on + """ + from yaml import dump + + return {"project": dump(p.config, default_flow_style=False)} + + +def csv_pep_filter(p, **kwargs) -> Dict[str, str]: + """ + CSV PEP filter, that returns Sample object representations + + This filter can save the CSVs to files, if kwargs include + `sample_table_path` and/or `subsample_table_path`. + + :param peppy.Project p: a Project to run filter on + """ + return {"samples": MultilineOutputFormatter.format(p.samples)} + + +def processed_pep_filter(p, **kwargs) -> Dict[str, str]: + """ + Processed PEP filter, that returns the converted sample and subsample tables. + This filter can return the tables as a table or a document. + :param peppy.Project p: a Project to run filter on + :param bool samples_as_objects: Flag to write as a table + :param bool subsamples_as_objects: Flag to write as a table + """ + # get params + samples_as_objects = kwargs.get("samples_as_objects") + subsamples_as_objects = kwargs.get("subsamples_as_objects") + + prj_repr = p.config + + return { + "project": str(prj_repr), + "samples": ( + str(p.samples) if samples_as_objects else str(p.sample_table.to_csv()) + ), + "subsamples": ( + str(p.subsamples) + if subsamples_as_objects + else str(p.subsample_table.to_csv()) + ), + } diff --git a/peppy/eido/exceptions.py b/peppy/eido/exceptions.py new file mode 100644 index 00000000..ac9a9c9e --- /dev/null +++ b/peppy/eido/exceptions.py @@ -0,0 +1,50 @@ +"""Exceptions for specific eido issues.""" + +from abc import ABCMeta +from typing import Dict, List + +__all__ = [ + "EidoFilterError", + "EidoSchemaInvalidError", + "EidoValidationError", + "PathAttrNotFoundError", +] + + +class EidoException(Exception): + """Base type for custom package errors.""" + + __metaclass__ = ABCMeta + + +class PathAttrNotFoundError(EidoException): + """Path-like argument does not exist.""" + + def __init__(self, key): + super(PathAttrNotFoundError, self).__init__(key) + + +class EidoSchemaInvalidError(EidoException): + """Schema does not comply to eido-specific requirements.""" + + def __init__(self, key): + super(EidoSchemaInvalidError, self).__init__(key) + + +class EidoFilterError(EidoException): + """Issue with the PEP filter.""" + + def __init__(self, key): + super(EidoFilterError, self).__init__(key) + + +class EidoValidationError(EidoException): + """Object was not validated successfully according to schema.""" + + def __init__(self, message: str, errors_by_type: Dict[str, List[Dict[str, str]]]): + super().__init__(message) + self.errors_by_type = errors_by_type + self.message = message + + def __str__(self): + return f"EidoValidationError ({self.message}): {self.errors_by_type}" diff --git a/peppy/eido/inspection.py b/peppy/eido/inspection.py new file mode 100644 index 00000000..ecaf8fdc --- /dev/null +++ b/peppy/eido/inspection.py @@ -0,0 +1,124 @@ +import os +from logging import getLogger +from typing import Dict, Iterable, List, Set, Union +from warnings import catch_warnings + +from ubiquerg import size + +from .. import Project, Sample +from .const import ( + ALL_INPUTS_KEY, + INPUT_FILE_SIZE_KEY, + MISSING_KEY, + PROP_KEY, + REQUIRED_INPUTS_KEY, + SAMPLES_KEY, + SIZING_KEY, + TANGIBLE_KEY, +) +from .schema import read_schema +from .validation import _get_attr_values, _validate_sample_object + +_LOGGER = getLogger(__name__) + + +def inspect_project( + p: Project, sample_names: Union[None, List[str]] = None, max_attr: int = 10 +) -> None: + """Print inspection info: Project or, if sample_names argument is provided, matched samples. + + Args: + p: Project to inspect + sample_names: List of samples to inspect + max_attr: Max number of sample attributes to display + """ + if sample_names: + samples = p.get_samples(sample_names) + if not samples: + print("No samples matched by names: {}".format(sample_names)) + return + for s in samples: + print(s.__str__(max_attr=max_attr)) + print("\n") + return + print(p) + return + + +def get_input_files_size( + sample: Sample, schema: Union[str, List[Dict]] +) -> Dict[str, Union[List[str], Set[str], float]]: + """Determine which of this Sample's required attributes/files are missing and calculate sizes. + + The names of the attributes that are required and/or deemed as inputs + are sourced from the schema, more specifically from required_input_attrs + and input_attrs sections in samples section. Note, this function does + perform actual Sample object validation with jsonschema. + + Args: + sample: Sample to investigate + schema: Schema dict to validate against or a path to one + + Returns: + Dictionary with validation data, i.e missing, required_inputs, + all_inputs, input_file_size + + Raises: + ValidationError: If any required sample attribute is missing + """ + + def _compute_input_file_size(inputs: Iterable[str]) -> float: + """ + Compute total size of input files. + """ + with catch_warnings(record=True) as w: + total_bytes = sum( + size(f, size_str=False) or 0.0 + for f in inputs + if f != "" and f is not None + ) + if w: + _LOGGER.warning( + f"{len(w)} input files missing, job input size was " + f"not calculated accurately" + ) + return total_bytes / (1024**3) + + if isinstance(schema, str): + schema = read_schema(schema) + + # first, validate attrs existence using jsonschema + _validate_sample_object(schemas=schema, sample=sample) + + all_inputs = set() + required_inputs = set() + schema = schema[-1] # use only first schema, in case there are imports + sample_schema_dict = schema[PROP_KEY][SAMPLES_KEY]["items"] + if SIZING_KEY in sample_schema_dict: + all_inputs.update(_get_attr_values(sample, sample_schema_dict[SIZING_KEY])) + if TANGIBLE_KEY in sample_schema_dict: + required_inputs = set( + _get_attr_values(sample, sample_schema_dict[TANGIBLE_KEY]) + ) + all_inputs.update(required_inputs) + with catch_warnings(record=True) as w: + # input_file_size = sum( + # [ + # size(f, size_str=False) or 0.0 + # for f in all_inputs + # if f != "" and f is not None + # ] + # ) / (1024**3) + input_file_size = _compute_input_file_size(all_inputs) + if w: + _LOGGER.warning( + f"{len(w)} input files missing, job input size was " + f"not calculated accurately" + ) + + return { + MISSING_KEY: [i for i in required_inputs if not os.path.exists(i)], + REQUIRED_INPUTS_KEY: required_inputs, + ALL_INPUTS_KEY: all_inputs, + INPUT_FILE_SIZE_KEY: input_file_size, + } diff --git a/peppy/eido/output_formatters.py b/peppy/eido/output_formatters.py new file mode 100644 index 00000000..ad5e6f84 --- /dev/null +++ b/peppy/eido/output_formatters.py @@ -0,0 +1,126 @@ +from abc import ABC, abstractmethod +from typing import Iterable, List, Union + +from ..sample import Sample + + +class BaseOutputFormatter(ABC): + @staticmethod + @abstractmethod + def format(samples: List[Sample]) -> str: + """ + Convert the samples to correct format. + """ + pass + + +class MultilineOutputFormatter(BaseOutputFormatter): + @staticmethod + def format(samples: List[Sample]) -> str: + output_rows = [] + sample_attributes = [ + attribute + for attribute in samples[0].keys() + if not attribute.startswith("_") and not attribute == "subsample_name" + ] + header = MultilineOutputFormatter._get_header(sample_attributes) + + for sample in samples: + attribute_with_multiple_properties = MultilineOutputFormatter._get_the_name_of_the_first_attribute_with_multiple_properties( + sample, sample_attributes + ) + if attribute_with_multiple_properties: + sample_rows = MultilineOutputFormatter._split_sample_to_multiple_rows( + sample, sample_attributes, attribute_with_multiple_properties + ) + output_rows.extend(sample_rows) + else: + one_sample_row = MultilineOutputFormatter._convert_sample_to_row( + sample, sample_attributes + ) + output_rows.append(one_sample_row) + + return "\n".join(header + output_rows) + "\n" + + @staticmethod + def _get_header(header_column_names: List[str]) -> List[str]: + return [",".join(header_column_names)] + + @staticmethod + def _get_the_name_of_the_first_attribute_with_multiple_properties( + sample: Sample, sample_attributes: List[str] + ) -> Union[str, None]: + for attribute in sample_attributes: + if MultilineOutputFormatter._sample_attribute_is_list(sample, attribute): + return attribute + + @staticmethod + def _split_sample_to_multiple_rows( + sample: Sample, sample_attributes: List, attribute_with_multiple_properties: str + ) -> Iterable[str]: + """ + If one sample object contains array properties instead of single value, then it will be converted + to multiple rows. + + Args: + sample: Sample from project. + sample_attributes: List of all sample properties names (name of columns from sample_table). + + Returns: + List of rows created from given sample object. + """ + number_of_samples_after_split = len( + getattr(sample, attribute_with_multiple_properties) + ) + sample_rows_after_split = [] + + for sample_index in range(number_of_samples_after_split): + sample_row = MultilineOutputFormatter._convert_sample_to_row( + sample, sample_attributes, sample_index + ) + sample_rows_after_split.append(sample_row) + + return sample_rows_after_split + + @staticmethod + def _convert_sample_to_row( + sample: Sample, sample_attributes: List, sample_index: int = 0 + ) -> str: + """ + Converts single sample object to CSV row. + + Some samples have a list of values instead of single value for given attribute (column), and + sample_index indicates index of the value that will be used to create a row. For samples that don't + have any attributes with given names this will always be zero. + + Args: + sample: Single sample object. + sample_attributes: Array of all attributes names (column names) for given sample. + sample_index: Number indicating which value will be used to create row. Some samples + + Returns: + Representation of sample as a CSV row. + """ + sample_row = [] + + for attribute in sample_attributes: + if ( + MultilineOutputFormatter._sample_attribute_is_list(sample, attribute) + and sample[attribute] + ): + value = sample[attribute][sample_index] + else: + value = sample.get(attribute) + + sample_row.append(value or "") + + return ",".join(sample_row) + + @staticmethod + def _sample_attribute_is_list(sample: Sample, attribute: str) -> bool: + return isinstance(getattr(sample, attribute, ""), list) + + +class SampleSubsampleOutputFormatter(BaseOutputFormatter): + def format(self, samples: List[Sample]) -> str: + pass diff --git a/peppy/eido/schema.py b/peppy/eido/schema.py new file mode 100644 index 00000000..851ddd29 --- /dev/null +++ b/peppy/eido/schema.py @@ -0,0 +1,95 @@ +import os +from logging import getLogger +from typing import Dict, List, Union + +from ubiquerg import is_url + +from ..utils import load_yaml +from .const import PROP_KEY, SAMPLES_KEY + +_LOGGER = getLogger(__name__) + + +def preprocess_schema(schema_dict: Dict) -> Dict: + """Preprocess schema before validation for user's convenience. + + Preprocessing includes: + - renaming 'samples' to '_samples' since in the peppy.Project object + _samples attribute holds the list of peppy.Samples objects. + - adding array of strings entry for every string specified to accommodate + subsamples in peppy.Project + + Args: + schema_dict: Schema dictionary to preprocess + + Returns: + Preprocessed schema + """ + _LOGGER.debug(f"schema ori: {schema_dict}") + if "project" not in schema_dict[PROP_KEY]: + _LOGGER.debug("No project section found in schema") + + if SAMPLES_KEY in schema_dict[PROP_KEY]: + if ( + "items" in schema_dict[PROP_KEY][SAMPLES_KEY] + and PROP_KEY in schema_dict[PROP_KEY][SAMPLES_KEY]["items"] + ): + s_props = schema_dict[PROP_KEY][SAMPLES_KEY]["items"][PROP_KEY] + for prop, val in s_props.items(): + if "type" in val and val["type"] in ["string", "number", "boolean"]: + s_props[prop] = {} + s_props[prop]["anyOf"] = [val, {"type": "array", "items": val}] + else: + _LOGGER.debug("No samples section found in schema") + _LOGGER.debug(f"schema processed: {schema_dict}") + return schema_dict + + +def read_schema(schema: Union[str, Dict]) -> List[Dict]: + """Safely read schema from YAML-formatted file. + + If the schema imports any other schemas, they will be read recursively. + + Args: + schema: Path to the schema file or schema in a dict form + + Returns: + Read schemas + + Raises: + TypeError: If the schema arg is neither a Mapping nor a file path or + if the 'imports' sections in any of the schemas is not a list + """ + + def _recursively_read_schemas( + x: Dict, lst: List[Dict], parent_folder: Union[str, None] + ) -> List[Dict]: + if "imports" in x: + if isinstance(x["imports"], list): + for sch in x["imports"]: + if (not is_url(sch)) and (not os.path.isabs(sch)): + # resolve relative path + if parent_folder is not None: + sch = os.path.normpath(os.path.join(parent_folder, sch)) + else: + _LOGGER.warning( + f"The schema contains relative path without known parent folder: {sch}" + ) + lst.extend(read_schema(sch)) + else: + raise TypeError("In schema the 'imports' section has to be a list") + lst.append(x) + return lst + + schema_list = [] + schema_folder = None + if isinstance(schema, str): + _LOGGER.debug(f"Reading schema: {schema}") + schema_folder = os.path.split(schema)[0] + schema = load_yaml(schema) + if not isinstance(schema, dict): + raise TypeError( + f"schema has to be a dict, path to an existing file or URL to a remote one. " + f"Got: {type(schema)}" + ) + return _recursively_read_schemas(schema, schema_list, schema_folder) diff --git a/peppy/eido/validation.py b/peppy/eido/validation.py new file mode 100644 index 00000000..cadef703 --- /dev/null +++ b/peppy/eido/validation.py @@ -0,0 +1,280 @@ +import os +from copy import deepcopy as dpcpy +from logging import getLogger +from typing import Dict, List, Mapping, NoReturn, Union +from warnings import warn + +import pandas as pd +from jsonschema import Draft7Validator +from pandas.core.common import flatten + +from ..project import Project +from ..sample import Sample +from ..utils import load_yaml +from .const import PROP_KEY, SAMPLES_KEY, SIZING_KEY, TANGIBLE_KEY +from .exceptions import EidoValidationError, PathAttrNotFoundError +from .schema import preprocess_schema, read_schema + +_LOGGER = getLogger(__name__) + + +def _validate_object( + obj: Mapping, + schema: Union[str, dict], + sample_name_colname: Union[str, bool] = False, +) -> None: + """Generic function to validate object against a schema. + + Args: + obj: An object to validate + schema: Schema dict to validate against or a path to one + sample_name_colname: Column name for sample names in error reporting + + Raises: + EidoValidationError: If validation is unsuccessful + """ + validator = Draft7Validator(schema) + _LOGGER.debug(f"{obj},\n {schema}") + if not validator.is_valid(obj): + errors = sorted(validator.iter_errors(obj), key=lambda e: e.path) + errors_by_type = {} + + # Accumulate and restructure error objects by error type + for error in errors: + if not error.message in errors_by_type: + errors_by_type[error.message] = [] + + try: + instance_name = error.instance[sample_name_colname] + except KeyError: + instance_name = "project" + except TypeError: + instance_name = obj["samples"][error.absolute_path[1]][ + sample_name_colname + ] + errors_by_type[error.message].append( + { + "type": error.message, + "message": f"{error.message} on instance {instance_name}", + "sample_name": instance_name, + } + ) + + raise EidoValidationError("Validation failed", errors_by_type) + else: + _LOGGER.debug("Validation was successful...") + + +def validate_project(project: Project, schema: Union[str, dict]) -> NoReturn: + """Validate a project object against a schema. + + Args: + project: A project object to validate + schema: Schema dict to validate against or a path to one + + Raises: + EidoValidationError: If validation is unsuccessful + """ + sample_name_colname = project.sample_name_colname + schema_dicts = read_schema(schema=schema) + for schema_dict in schema_dicts: + project_dict = project.to_dict() + _validate_object( + project_dict, preprocess_schema(schema_dict), sample_name_colname + ) + _LOGGER.debug("Project validation successful") + + +def _validate_sample_object(sample: Sample, schemas: List[Dict]) -> None: + """Validate a peppy.Sample object without requiring a reference to peppy.Project. + + Args: + sample: A sample object to validate + schemas: List of schemas to validate against or a path to one + """ + for schema_dict in schemas: + schema_dict = preprocess_schema(schema_dict) + sample_schema_dict = schema_dict[PROP_KEY][SAMPLES_KEY]["items"] + _validate_object(sample.to_dict(), sample_schema_dict) + _LOGGER.debug( + f"{getattr(sample, 'sample_name', '')} sample validation successful" + ) + + +def validate_sample( + project: Project, sample_name: Union[str, int], schema: Union[str, dict] +) -> NoReturn: + """Validate the selected sample object against a schema. + + Args: + project: A project object to validate + sample_name: Name or index of the sample to validate + schema: Schema dict to validate against or a path to one + + Raises: + EidoValidationError: If validation is unsuccessful + """ + sample = ( + project.samples[sample_name] + if isinstance(sample_name, int) + else project.get_sample(sample_name) + ) + _validate_sample_object( + sample=sample, + schemas=read_schema(schema=schema), + ) + + +def validate_config( + project: Union[Project, dict, str], schema: Union[str, dict] +) -> NoReturn: + """Validate the config part of the Project object against a schema. + + Args: + project: A project object, dict, or path to config file to validate + schema: Schema dict to validate against or a path to one + """ + schema_dicts = read_schema(schema=schema) + for schema_dict in schema_dicts: + schema_cpy = preprocess_schema(dpcpy(schema_dict)) + try: + del schema_cpy[PROP_KEY][SAMPLES_KEY] + except KeyError: + # Schema doesn't have samples key, which is fine for config-only validation + pass + if "required" in schema_cpy: + try: + schema_cpy["required"].remove(SAMPLES_KEY) + except ValueError: + # SAMPLES_KEY is not in required list, no action needed + pass + if isinstance(project, dict): + _validate_object({"project": project}, schema_cpy) + + elif isinstance(project, str): + try: + project_dict = load_yaml(project) + except (FileNotFoundError, IOError, OSError) as e: + raise ValueError( + f"Please provide a valid yaml config of PEP project; invalid config path: {project}" + ) from e + _validate_object({"project": project_dict}, schema_cpy) + else: + project_dict = project.to_dict() + _validate_object(project_dict, schema_cpy) + _LOGGER.debug("Config validation successful") + + +def _get_attr_values( + obj: Mapping, attrlist: Union[str, List[str]] +) -> Union[None, List[str]]: + """Get value corresponding to each given attribute. + + Args: + obj: An object to get the attributes from + attrlist: Names of attributes to retrieve values for + + Returns: + Value corresponding to each named attribute; None if this Sample's + value for the attribute is empty/null, or if this Sample lacks the + indicated attribute + """ + # If attribute is None, then value is also None. + if not attrlist: + return None + if not isinstance(attrlist, list): + attrlist = [attrlist] + # Strings contained here are appended later so shouldn't be null. + return list(flatten([getattr(obj, attr, "") for attr in attrlist])) + + +def validate_input_files( + project: Project, + schemas: Union[str, dict], + sample_name: Union[str, int, None] = None, +) -> None: + """Determine which of the required and optional files are missing. + + The names of the attributes that are required and/or deemed as inputs + are sourced from the schema, more specifically from `required_files` + and `files` sections in samples section: + + - If any of the required files are missing, this function raises an error. + - If any of the optional files are missing, the function raises a warning. + + Note, this function also performs Sample object validation with jsonschema. + + Args: + project: Project that defines the samples to validate + schemas: Schema dict to validate against or a path to one + sample_name: Name or index of the sample to validate. If None, + validate all samples in the project + + Raises: + PathAttrNotFoundError: If any required sample attribute is missing + """ + + if sample_name is None: + samples = project.samples + else: + samples = ( + project.samples[sample_name] + if isinstance(sample_name, int) + else project.get_sample(sample_name) + ) + samples = [samples] + + if isinstance(schemas, str): + schemas = read_schema(schemas) + + for sample in samples: + # validate attrs existence first + _validate_sample_object(schemas=schemas, sample=sample) + + all_inputs = set() + required_inputs = set() + schema = schemas[-1] # use only first schema, in case there are imports + sample_schema_dict = schema[PROP_KEY][SAMPLES_KEY]["items"] + if SIZING_KEY in sample_schema_dict: + all_inputs.update(_get_attr_values(sample, sample_schema_dict[SIZING_KEY])) + if TANGIBLE_KEY in sample_schema_dict: + required_inputs = set( + _get_attr_values(sample, sample_schema_dict[TANGIBLE_KEY]) + ) + all_inputs.update(required_inputs) + + missing_required_inputs = [i for i in required_inputs if not os.path.exists(i)] + missing_inputs = [i for i in all_inputs if not os.path.exists(i)] + if missing_inputs: + warn( + f"For sample '{getattr(sample, project.sample_table_index)}'. " + f"Optional inputs not found: {missing_inputs}" + ) + if missing_required_inputs: + raise PathAttrNotFoundError( + f"For sample '{getattr(sample, project.sample_table_index)}'. " + f"Required inputs not found: {required_inputs}" + ) + + +def validate_original_samples( + samples: Union[str, pd.DataFrame], schema: Union[str, dict] +) -> None: + """Validate the original samples from the csv table against a schema. + + Args: + samples: The path to the sample table csv or the dataframe from the table + schema: Schema dict to validate against or a path to one + + Raises: + EidoValidationError: If validation is unsuccessful + """ + if isinstance(samples, str): + samples = pd.read_csv(samples) + + assist_project = Project.from_pandas(samples) + for s in assist_project.samples: + _validate_sample_object( + sample=s, + schemas=read_schema(schema=schema), + ) diff --git a/peppy/exceptions.py b/peppy/exceptions.py index ec356f49..d5fd3459 100644 --- a/peppy/exceptions.py +++ b/peppy/exceptions.py @@ -2,6 +2,7 @@ from abc import ABCMeta from collections.abc import Iterable +from typing import Optional __all__ = [ "IllegalStateException", @@ -19,7 +20,7 @@ class PeppyError(Exception): __metaclass__ = ABCMeta - def __init__(self, msg): + def __init__(self, msg: str) -> None: super(PeppyError, self).__init__(msg) @@ -50,12 +51,12 @@ class RemoteYAMLError(PeppyError): class MissingAmendmentError(PeppyError): """Error when project config lacks a requested subproject.""" - def __init__(self, amendment, defined=None): - """ - Create exception with missing amendment request. + def __init__(self, amendment: str, defined: Optional[Iterable[str]] = None) -> None: + """Create exception with missing amendment request. - :param str amendment: the requested (and missing) amendment - :param Iterable[str] defined: collection of names of defined amendment + Args: + amendment: The requested (and missing) amendment + defined: Collection of names of defined amendments """ msg = "Amendment '{}' not found".format(amendment) if isinstance(defined, Iterable): diff --git a/peppy/pephubclient/__init__.py b/peppy/pephubclient/__init__.py new file mode 100644 index 00000000..48c42e3f --- /dev/null +++ b/peppy/pephubclient/__init__.py @@ -0,0 +1 @@ +__author__ = "Oleksandr Khoroshevskyi, Rafal Stepien" diff --git a/peppy/pephubclient/cli.py b/peppy/pephubclient/cli.py new file mode 100644 index 00000000..20c90813 --- /dev/null +++ b/peppy/pephubclient/cli.py @@ -0,0 +1,73 @@ +import typer + +from .helpers import call_client_func +from .pephubclient import PEPHubClient + +_client = PEPHubClient() + +app = typer.Typer() + + +@app.command() +def login(): + """ + Login to PEPhub + """ + call_client_func(_client.login) + + +@app.command() +def logout(): + """ + Logout + """ + _client.logout() + + +@app.command() +def pull( + project_registry_path: str, + force: bool = typer.Option(False, help="Overwrite project if it exists."), + zip: bool = typer.Option(False, help="Save project as zip file."), + output: str = typer.Option(None, help="Output directory."), +): + """ + Download and save project locally. + """ + call_client_func( + _client.pull, + project_registry_path=project_registry_path, + force=force, + output=output, + zip=zip, + ) + + +@app.command() +def push( + cfg: str = typer.Argument( + ..., + help="Project config file (YAML) or sample table (CSV/TSV)" + "with one row per sample to constitute project", + ), + namespace: str = typer.Option(..., help="Project namespace"), + name: str = typer.Option(..., help="Project name"), + tag: str = typer.Option(None, help="Project tag"), + force: bool = typer.Option( + False, help="Force push to the database. Use it to update, or upload project." + ), + is_private: bool = typer.Option(False, help="Upload project as private."), +): + """ + Upload/update project in PEPhub + """ + + call_client_func( + _client.push, + cfg=cfg, + namespace=namespace, + name=name, + tag=tag, + is_private=is_private, + force=force, + ) diff --git a/peppy/pephubclient/constants.py b/peppy/pephubclient/constants.py new file mode 100644 index 00000000..075fc83f --- /dev/null +++ b/peppy/pephubclient/constants.py @@ -0,0 +1,48 @@ +import os +from enum import Enum +from typing import Optional + +from pydantic import BaseModel, field_validator + +PEPHUB_BASE_URL = os.getenv( + "PEPHUB_BASE_URL", default="https://pephub-api.databio.org/" +) +# PEPHUB_BASE_URL = "http://0.0.0.0:8000/" +PEPHUB_PEP_API_BASE_URL = f"{PEPHUB_BASE_URL}api/v1/projects/" +PEPHUB_PEP_SEARCH_URL = f"{PEPHUB_BASE_URL}api/v1/namespaces/{{namespace}}/projects" +PEPHUB_PUSH_URL = f"{PEPHUB_BASE_URL}api/v1/namespaces/{{namespace}}/projects/json" + +PEPHUB_SAMPLE_URL = f"{PEPHUB_BASE_URL}api/v1/projects/{{namespace}}/{{project}}/samples/{{sample_name}}" +PEPHUB_VIEW_URL = ( + f"{PEPHUB_BASE_URL}api/v1/projects/{{namespace}}/{{project}}/views/{{view_name}}" +) +PEPHUB_VIEW_SAMPLE_URL = f"{PEPHUB_BASE_URL}api/v1/projects/{{namespace}}/{{project}}/views/{{view_name}}/{{sample_name}}" + + +class RegistryPath(BaseModel): + protocol: Optional[str] = None + namespace: str + item: str + subitem: Optional[str] = None + tag: Optional[str] = "default" + + @field_validator("tag") + def tag_should_not_be_none(cls, v): + return v or "default" + + +class ResponseStatusCodes(int, Enum): + OK = 200 + ACCEPTED = 202 + UNAUTHORIZED = 401 + FORBIDDEN = 403 + NOT_EXIST = 404 + CONFLICT = 409 + INTERNAL_ERROR = 500 + + +USER_DATA_FILE_NAME = "jwt.txt" +HOME_PATH = os.getenv("HOME") +if not HOME_PATH: + HOME_PATH = os.path.expanduser("~") +PATH_TO_FILE_WITH_JWT = os.path.join(HOME_PATH, ".pephubclient/") + USER_DATA_FILE_NAME diff --git a/peppy/pephubclient/exceptions.py b/peppy/pephubclient/exceptions.py new file mode 100644 index 00000000..bb8787f4 --- /dev/null +++ b/peppy/pephubclient/exceptions.py @@ -0,0 +1,33 @@ +from typing import Optional + + +class BasePephubclientException(Exception): + def __init__(self, message: str): + super().__init__(message) + + +class IncorrectQueryStringError(BasePephubclientException): + def __init__(self, query_string: Optional[str] = None): + self.query_string = query_string + super().__init__( + f"PEP data with passed namespace and project ({self.query_string}) name not found." + ) + + +class ResponseError(BasePephubclientException): + default_message = "The response looks incorrect and must be verified manually." + + def __init__(self, message: Optional[str] = None): + self.message = message + super().__init__(self.message or self.default_message) + + +class PEPExistsError(BasePephubclientException): + default_message = ( + "PEP already exists. Change location, delete previous PEP, or set force argument " + "to overwrite previous PEP" + ) + + def __init__(self, message: Optional[str] = None): + self.message = message + super().__init__(self.message or self.default_message) diff --git a/peppy/pephubclient/files_manager.py b/peppy/pephubclient/files_manager.py new file mode 100644 index 00000000..9cb65d28 --- /dev/null +++ b/peppy/pephubclient/files_manager.py @@ -0,0 +1,95 @@ +import os +import zipfile +from contextlib import suppress +from pathlib import Path + +import pandas +import yaml + +from .exceptions import PEPExistsError + + +class FilesManager: + @staticmethod + def save_jwt_data_to_file(path: str, jwt_data: str) -> None: + """ + Save jwt to provided path + """ + Path(os.path.dirname(path)).mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + f.write(jwt_data) + + @staticmethod + def load_jwt_data_from_file(path: str) -> str: + """ + Open the file with username and ID and load this data. + """ + with suppress(FileNotFoundError): + with open(path, "r") as f: + return f.read() + + @staticmethod + def create_project_folder( + parent_path: str, + folder_name: str, + ) -> str: + """ + Create new project folder + + :param parent_path: parent path to create folder in + :param folder_name: folder name + :return: folder_path + """ + if parent_path: + if not Path(parent_path).exists(): + raise OSError( + f"Parent path does not exist. Provided path: {parent_path}" + ) + folder_path = os.path.join(parent_path or os.getcwd(), folder_name) + Path(folder_path).mkdir(parents=True, exist_ok=True) + return folder_path + + @staticmethod + def save_yaml(config: dict, full_path: str, not_force: bool = False): + FilesManager.check_writable(path=full_path, force=not not_force) + with open(full_path, "w") as outfile: + yaml.dump(config, outfile, default_flow_style=False) + + @staticmethod + def save_pandas(df: pandas.DataFrame, full_path: str, not_force: bool = False): + FilesManager.check_writable(path=full_path, force=not not_force) + df.to_csv(full_path, index=False) + + @staticmethod + def file_exists(full_path: str) -> bool: + return os.path.isfile(full_path) + + @staticmethod + def delete_file_if_exists(filename: str) -> None: + with suppress(FileNotFoundError): + os.remove(filename) + print( + f"\033[38;5;11m{f'File was deleted successfully -> {filename}'}\033[0m" + ) + + @staticmethod + def check_writable(path: str, force: bool = True): + if not force and os.path.isfile(path): + raise PEPExistsError(f"File already exists and won't be updated: {path}") + + @staticmethod + def save_zip_file(files_dict: dict, file_path: str, force: bool = False) -> None: + """ + Save zip file with provided files as dict. + + :param files_dict: dict with files to save. e.g. {"file1.txt": "file1 content"} + :param file_path: filename to save zip file to + :param force: overwrite file if exists + :return: None + """ + FilesManager.check_writable(path=file_path, force=force) + with zipfile.ZipFile( + file_path, mode="w", compression=zipfile.ZIP_DEFLATED + ) as zf: + for name, res in files_dict.items(): + zf.writestr(name, str.encode(res)) diff --git a/peppy/pephubclient/helpers.py b/peppy/pephubclient/helpers.py new file mode 100644 index 00000000..e7119ac4 --- /dev/null +++ b/peppy/pephubclient/helpers.py @@ -0,0 +1,323 @@ +import json +import os +from typing import Any, Callable, Optional, Union +from urllib.parse import urlencode + +import pandas as pd +import requests +import yaml +from pydantic import ValidationError +from requests.exceptions import ConnectionError +from ubiquerg import parse_registry_path + +from ..const import ( + CFG_SAMPLE_TABLE_KEY, + CFG_SUBSAMPLE_TABLE_KEY, + CONFIG_KEY, + DESC_KEY, + NAME_KEY, + SAMPLE_RAW_DICT_KEY, + SUBSAMPLE_RAW_LIST_KEY, +) +from ..project import Project +from .constants import RegistryPath +from .exceptions import PEPExistsError, ResponseError +from .files_manager import FilesManager +from .models import ProjectDict + + +class RequestManager: + @staticmethod + def send_request( + method: str, + url: str, + headers: Optional[dict] = None, + cookies: Optional[dict] = None, + params: Optional[dict] = None, + json: Optional[Union[dict, list]] = None, + ) -> requests.Response: + request_return = requests.request( + method=method, + url=url, + verify=False, + cookies=cookies, + headers=headers, + params=params, + json=json, + timeout=10, + ) + if request_return.status_code == 401: + if ( + RequestManager.decode_response(request_return, output_json=True).get( + "detail" + ) + == "JWT has expired" + ): + raise ResponseError("JWT has expired. Please log in again.") + return request_return + + @staticmethod + def decode_response( + response: requests.Response, encoding: str = "utf-8", output_json: bool = False + ) -> Union[str, dict]: + """ + Decode the response from PEPhub and pack the returned data into appropriate model. + + :param response: Response from PEPhub. + :param encoding: Response encoding [Default: utf-8] + :param output_json: If True, return response in json format + :return: Response data as an instance of correct model. + """ + + try: + if output_json: + return response.json() + else: + return response.content.decode(encoding) + except json.JSONDecodeError as err: + raise ResponseError(f"Error in response encoding format: {err}") + + @staticmethod + def parse_query_param(pep_variables: dict) -> str: + """ + Grab all the variables passed by user (if any) and parse them to match the format specified + by PEPhub API for query parameters. + + :param pep_variables: dict of query parameters + :return: PEPHubClient variables transformed into string in correct format. + """ + return "?" + urlencode(pep_variables) + + @staticmethod + def parse_header(jwt_data: Optional[str] = None) -> dict: + """ + Create Authorization header + + :param jwt_data: jwt string + :return: Authorization dict + """ + if jwt_data: + return {"Authorization": jwt_data} + else: + return {} + + +class MessageHandler: + """ + Class holding print function in different colors + """ + + RED = 9 + YELLOW = 11 + GREEN = 40 + + @staticmethod + def print_error(text: str) -> None: + print(f"\033[38;5;9m{text}\033[0m") + + @staticmethod + def print_success(text: str) -> None: + print(f"\033[38;5;40m{text}\033[0m") + + @staticmethod + def print_warning(text: str) -> None: + print(f"\033[38;5;11m{text}\033[0m") + + +def call_client_func(func: Callable[..., Any], **kwargs) -> Any: + """ + Catch exceptions in functions called through cli. + + :param func: The function to call. + :param kwargs: The keyword arguments to pass to the function. + :return: The result of the function call. + """ + + try: + func(**kwargs) + except ConnectionError as err: + MessageHandler.print_error(f"Failed to connect to server. Try later. {err}") + except ResponseError as err: + MessageHandler.print_error(f"{err}") + except PEPExistsError as err: + MessageHandler.print_warning(f"PEP already exists. {err}") + except OSError as err: + MessageHandler.print_error(f"{err}") + + +def is_registry_path(input_string: str) -> bool: + """ + Check if input is a registry path to pephub + :param str input_string: path to the PEP (or registry path) + :return bool: True if input is a registry path + """ + if input_string.endswith(".yaml"): + return False + try: + RegistryPath(**parse_registry_path(input_string)) + except (ValidationError, TypeError): + return False + return True + + +def unwrap_registry_path(input_string: str) -> RegistryPath: + """ + Unwrap registry path from string + :param str input_string: path to the PEP (or registry path) + :return RegistryPath: RegistryPath object + """ + return RegistryPath(**parse_registry_path(input_string)) + + +def _build_filename(registry_path: RegistryPath) -> str: + """ + Takes query string and creates output filename to save the project to. + + :param registry_path: Query string that was used to find the project. + :return: Filename uniquely identifying the project. + """ + filename = "_".join(filter(bool, [registry_path.namespace, registry_path.item])) + if registry_path.tag: + filename += f"_{registry_path.tag}" + return filename + + +def _save_zip_pep(project: dict, zip_filepath: str, force: bool = False) -> None: + """ + Zip and save a project + + :param project: peppy project to zip + :param zip_filepath: path to save zip file + :param force: overwrite project if exists + """ + + content_to_zip = {} + config = project[CONFIG_KEY] + project_name = config[NAME_KEY] + + if project[SAMPLE_RAW_DICT_KEY] is not None: + config[CFG_SAMPLE_TABLE_KEY] = ["sample_table.csv"] + content_to_zip["sample_table.csv"] = pd.DataFrame( + project[SAMPLE_RAW_DICT_KEY] + ).to_csv(index=False) + + if project[SUBSAMPLE_RAW_LIST_KEY] is not None: + if not isinstance(project[SUBSAMPLE_RAW_LIST_KEY], list): + config[CFG_SUBSAMPLE_TABLE_KEY] = ["subsample_table1.csv"] + content_to_zip["subsample_table1.csv"] = pd.DataFrame( + project[SUBSAMPLE_RAW_LIST_KEY] + ).to_csv(index=False) + else: + config[CFG_SUBSAMPLE_TABLE_KEY] = [] + for number, file in enumerate(project[SUBSAMPLE_RAW_LIST_KEY]): + file_name = f"subsample_table{number + 1}.csv" + config[CFG_SUBSAMPLE_TABLE_KEY].append(file_name) + content_to_zip[file_name] = pd.DataFrame(file).to_csv(index=False) + + content_to_zip[f"{project_name}_config.yaml"] = yaml.dump(config, indent=4) + FilesManager.save_zip_file(content_to_zip, file_path=zip_filepath, force=force) + + MessageHandler.print_success(f"Project was saved successfully -> {zip_filepath}") + return None + + +def _save_unzipped_pep( + project_dict: dict, folder_path: str, force: bool = False +) -> None: + """ + Save unzipped project to specified folder + + :param project_dict: raw pep project + :param folder_path: path to save project + :param force: overwrite project if exists + :return: None + """ + + def full_path(fn: str) -> str: + return os.path.join(folder_path, fn) + + project_name = project_dict[CONFIG_KEY][NAME_KEY] + sample_table_filename = "sample_table.csv" + yaml_full_path = full_path(f"{project_name}_config.yaml") + sample_full_path = full_path(sample_table_filename) + if not force: + extant = [p for p in [yaml_full_path, sample_full_path] if os.path.isfile(p)] + if extant: + raise PEPExistsError(f"{len(extant)} file(s) exist(s): {', '.join(extant)}") + + config_dict = project_dict.get(CONFIG_KEY) + config_dict[NAME_KEY] = project_name + config_dict[DESC_KEY] = project_dict[CONFIG_KEY][DESC_KEY] + config_dict["sample_table"] = sample_table_filename + + sample_pandas = pd.DataFrame(project_dict.get(SAMPLE_RAW_DICT_KEY, {})) + + subsample_list = [ + pd.DataFrame(sub_a) for sub_a in project_dict.get(SUBSAMPLE_RAW_LIST_KEY) or [] + ] + + filenames = [] + for idx, subsample in enumerate(subsample_list): + fn = f"subsample_table{idx + 1}.csv" + filenames.append(fn) + FilesManager.save_pandas(subsample, full_path(fn), not_force=False) + config_dict["subsample_table"] = filenames + + FilesManager.save_yaml(config_dict, yaml_full_path, not_force=False) + FilesManager.save_pandas(sample_pandas, sample_full_path, not_force=False) + + if config_dict.get("subsample_table"): + for number, subsample in enumerate(subsample_list): + FilesManager.save_pandas( + subsample, + os.path.join(folder_path, config_dict["subsample_table"][number]), + not_force=False, + ) + + MessageHandler.print_success(f"Project was saved successfully -> {folder_path}") + return None + + +def save_pep( + project: Union[dict, Project], + reg_path: str = None, + force: bool = False, + project_path: Optional[str] = None, + zip: bool = False, +) -> None: + """ + Save project locally. + + :param dict project: PEP dictionary (raw project) + :param str reg_path: Project registry path in PEPhub (e.g. databio/base:default). If not provided, + folder will be created with just project name. + :param bool force: overwrite project if exists + :param str project_path: Path where project will be saved. By default, it will be saved in current directory. + :param bool zip: If True, save project as zip file + :return: None + """ + if isinstance(project, Project): + project = project.to_dict(extended=True, orient="records") + + project = ProjectDict(**project).model_dump(by_alias=True) + + if not project_path: + project_path = os.getcwd() + + if reg_path: + file_name = _build_filename(RegistryPath(**parse_registry_path(reg_path))) + else: + file_name = project[CONFIG_KEY][NAME_KEY] + + if zip: + _save_zip_pep( + project, + zip_filepath=f"{os.path.join(project_path, file_name)}.zip", + force=force, + ) + return None + + folder_path = FilesManager.create_project_folder( + parent_path=project_path, folder_name=file_name + ) + _save_unzipped_pep(project, folder_path, force=force) diff --git a/peppy/pephubclient/models.py b/peppy/pephubclient/models.py new file mode 100644 index 00000000..e69ef92e --- /dev/null +++ b/peppy/pephubclient/models.py @@ -0,0 +1,56 @@ +import datetime +from typing import List, Optional, Union + +from pydantic import BaseModel, ConfigDict, Field, field_validator + +from ..const import CONFIG_KEY, SAMPLE_RAW_DICT_KEY, SUBSAMPLE_RAW_LIST_KEY + + +class ProjectDict(BaseModel): + """ + Project dict (raw) model + """ + + config: dict = Field(alias=CONFIG_KEY) + subsample_list: Optional[list] = Field(alias=SUBSAMPLE_RAW_LIST_KEY) + sample_list: list = Field(alias=SAMPLE_RAW_DICT_KEY) + + model_config = ConfigDict(populate_by_name=True, extra="allow") + + +class ProjectUploadData(BaseModel): + """ + Model used in post request to upload project + """ + + pep_dict: ProjectDict + tag: Optional[str] = "default" + is_private: Optional[bool] = False + overwrite: Optional[bool] = False + + @field_validator("tag") + def tag_should_not_be_none(cls, v): + return v or "default" + + +class ProjectAnnotationModel(BaseModel): + namespace: str + name: str + tag: str + is_private: bool + number_of_samples: int + description: str + last_update_date: datetime.datetime + submission_date: datetime.datetime + digest: str + pep_schema: Union[str, int, None] = None + pop: bool = False + stars_number: Optional[int] = 0 + forked_from: Optional[Union[str, None]] = None + + +class SearchReturnModel(BaseModel): + count: int + limit: int + offset: int + results: List[ProjectAnnotationModel] diff --git a/tests/__init__.py b/peppy/pephubclient/modules/__init__.py similarity index 100% rename from tests/__init__.py rename to peppy/pephubclient/modules/__init__.py diff --git a/peppy/pephubclient/modules/sample.py b/peppy/pephubclient/modules/sample.py new file mode 100644 index 00000000..3199063a --- /dev/null +++ b/peppy/pephubclient/modules/sample.py @@ -0,0 +1,208 @@ +import logging + +from ..constants import PEPHUB_SAMPLE_URL, ResponseStatusCodes +from ..exceptions import ResponseError +from ..helpers import RequestManager + +_LOGGER = logging.getLogger("pephubclient") + + +class PEPHubSample(RequestManager): + """ + Class for managing samples in PEPhub and provides methods for + getting, creating, updating and removing samples. + This class is not related to peppy.Sample class. + """ + + def __init__(self, jwt_data: str = None): + """ + :param jwt_data: jwt token for authorization + """ + + self.__jwt_data = jwt_data + + def get( + self, + namespace: str, + name: str, + tag: str, + sample_name: str = None, + ) -> dict: + """ + Get sample from project in PEPhub. + + :param namespace: namespace of project + :param name: name of project + :param tag: tag of project + :param sample_name: sample name + :return: Sample object + """ + url = self._build_sample_request_url( + namespace=namespace, name=name, sample_name=sample_name + ) + + url = url + self.parse_query_param(pep_variables={"tag": tag}) + + response = self.send_request( + method="GET", url=url, headers=self.parse_header(self.__jwt_data) + ) + if response.status_code == ResponseStatusCodes.OK: + return self.decode_response(response, output_json=True) + if response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError( + f"Sample does not exist. Project: '{namespace}/{name}:{tag}'. Sample_name: '{sample_name}'" + ) + elif response.status_code == ResponseStatusCodes.INTERNAL_ERROR: + raise ResponseError("Internal server error. Unexpected return value.") + else: + raise ResponseError( + f"Unexpected return value. Error: {response.status_code}" + ) + + def create( + self, + namespace: str, + name: str, + tag: str, + sample_name: str, + sample_dict: dict, + overwrite: bool = False, + ) -> None: + """ + Create sample in project in PEPhub. + + :param namespace: namespace of project + :param name: name of project + :param tag: tag of project + :param sample_dict: sample dict + :param sample_name: sample name + :param overwrite: overwrite sample if it exists + :return: None + """ + url = self._build_sample_request_url( + namespace=namespace, + name=name, + sample_name=sample_name, + ) + + url = url + self.parse_query_param( + pep_variables={"tag": tag, "overwrite": overwrite} + ) + + # add sample name to sample_dict if it is not there + if sample_name not in sample_dict.values(): + sample_dict["sample_name"] = sample_name + + response = self.send_request( + method="POST", + url=url, + headers=self.parse_header(self.__jwt_data), + json=sample_dict, + ) + if response.status_code == ResponseStatusCodes.ACCEPTED: + _LOGGER.info( + f"Sample '{sample_name}' added to project '{namespace}/{name}:{tag}' successfully." + ) + return None + elif response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError(f"Project '{namespace}/{name}:{tag}' does not exist.") + elif response.status_code == ResponseStatusCodes.CONFLICT: + raise ResponseError( + f"Sample '{sample_name}' already exists. Set overwrite to True to overwrite sample." + ) + else: + raise ResponseError( + f"Unexpected return value. Error: {response.status_code}" + ) + + def update( + self, + namespace: str, + name: str, + tag: str, + sample_name: str, + sample_dict: dict, + ): + """ + Update sample in project in PEPhub. + + :param namespace: namespace of project + :param name: name of project + :param tag: tag of project + :param sample_name: sample name + :param sample_dict: sample dict, that contain elements to update, or + :return: None + """ + + url = self._build_sample_request_url( + namespace=namespace, name=name, sample_name=sample_name + ) + + url = url + self.parse_query_param(pep_variables={"tag": tag}) + + response = self.send_request( + method="PATCH", + url=url, + headers=self.parse_header(self.__jwt_data), + json=sample_dict, + ) + if response.status_code == ResponseStatusCodes.ACCEPTED: + _LOGGER.info( + f"Sample '{sample_name}' updated in project '{namespace}/{name}:{tag}' successfully." + ) + return None + elif response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError( + f"Sample '{sample_name}' or project {namespace}/{name}:{tag} does not exist. Error: {response.status_code}" + ) + else: + raise ResponseError( + f"Unexpected return value. Error: {response.status_code}" + ) + + def remove(self, namespace: str, name: str, tag: str, sample_name: str): + """ + Remove sample from project in PEPhub. + + :param namespace: namespace of project + :param name: name of project + :param tag: tag of project + :param sample_name: sample name + :return: None + """ + url = self._build_sample_request_url( + namespace=namespace, name=name, sample_name=sample_name + ) + + url = url + self.parse_query_param(pep_variables={"tag": tag}) + + response = self.send_request( + method="DELETE", + url=url, + headers=self.parse_header(self.__jwt_data), + ) + if response.status_code == ResponseStatusCodes.ACCEPTED: + _LOGGER.info( + f"Sample '{sample_name}' removed from project '{namespace}/{name}:{tag}' successfully." + ) + return None + elif response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError( + f"Sample '{sample_name}' or project {namespace}/{name}:{tag} does not exist. Error: {response.status_code}" + ) + else: + raise ResponseError( + f"Unexpected return value. Error: {response.status_code}" + ) + + @staticmethod + def _build_sample_request_url(namespace: str, name: str, sample_name: str) -> str: + """ + Build url for sample request. + + :param namespace: namespace where project will be uploaded + :return: url string + """ + return PEPHUB_SAMPLE_URL.format( + namespace=namespace, project=name, sample_name=sample_name + ) diff --git a/peppy/pephubclient/modules/view.py b/peppy/pephubclient/modules/view.py new file mode 100644 index 00000000..d45de3b9 --- /dev/null +++ b/peppy/pephubclient/modules/view.py @@ -0,0 +1,269 @@ +import logging +from typing import Union + +# import peppy +from ...project import Project +from ..constants import PEPHUB_VIEW_SAMPLE_URL, PEPHUB_VIEW_URL, ResponseStatusCodes +from ..exceptions import ResponseError +from ..helpers import RequestManager +from ..models import ProjectDict + +_LOGGER = logging.getLogger("pephubclient") + + +class PEPHubView(RequestManager): + """ + Class for managing views in PEPhub and provides methods for + getting, creating, updating and removing views. + + This class aims to warp the Views API for easier maintenance and + better user experience. + """ + + def __init__(self, jwt_data: str = None): + """ + :param jwt_data: jwt token for authorization + """ + + self.__jwt_data = jwt_data + + def get( + self, namespace: str, name: str, tag: str, view_name: str, raw: bool = False + ) -> Union[Project, dict]: + """ + Get view from project in PEPhub. + + :param namespace: namespace of project + :param name: name of project + :param tag: tag of project + :param view_name: name of the view + :param raw: if True, return raw response + :return: peppy.Project object or dictionary of the project (view) + """ + url = self._build_view_request_url( + namespace=namespace, name=name, view_name=view_name + ) + + url = url + self.parse_query_param(pep_variables={"tag": tag}) + + response = self.send_request( + method="GET", url=url, headers=self.parse_header(self.__jwt_data) + ) + if response.status_code == ResponseStatusCodes.OK: + output = self.decode_response(response, output_json=True) + if raw: + return output + output = ProjectDict(**output).model_dump(by_alias=True) + return Project.from_dict(output) + elif response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError("View does not exist, or you are unauthorized.") + else: + raise ResponseError( + f"Internal server error. Unexpected return value. Error: {response.status_code}" + ) + + def create( + self, + namespace: str, + name: str, + tag: str, + view_name: str, + description: str = None, + sample_list: list = None, + no_fail: bool = False, + ): + """ + Create view in project in PEPhub. + + :param namespace: namespace of project + :param name: name of project + :param tag: tag of project + :param description: description of the view + :param view_name: name of the view + :param sample_list: list of sample names + :param no_fail: whether to raise an error if view was not added to the project + """ + + if not sample_list or not isinstance(sample_list, list): + raise ValueError("Sample list must be a list of sample names.") + + url = self._build_view_request_url( + namespace=namespace, name=name, view_name=view_name + ) + + url = url + self.parse_query_param(pep_variables={"tag": tag}) + + response = self.send_request( + method="POST", + url=url, + headers=self.parse_header(self.__jwt_data), + params={"description": description, "no_fail": no_fail}, + json=sample_list, + ) + if response.status_code == ResponseStatusCodes.ACCEPTED: + _LOGGER.info( + f"View '{view_name}' created in project '{namespace}/{name}:{tag}' successfully." + ) + return None + elif response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError( + f"Project '{namespace}/{name}:{tag}' or one of the samples does not exist." + ) + elif response.status_code == ResponseStatusCodes.CONFLICT: + raise ResponseError(f"View '{view_name}' already exists in the project.") + else: + raise ResponseError(f"Unexpected return value.{response.status_code}") + + def delete(self, namespace: str, name: str, tag: str, view_name: str) -> None: + """ + Delete view from project in PEPhub. + + :param namespace: namespace of project + :param name: name of project + :param tag: tag of project + :param view_name: name of the view + :return: None + """ + url = self._build_view_request_url( + namespace=namespace, name=name, view_name=view_name + ) + + url = url + self.parse_query_param(pep_variables={"tag": tag}) + + response = self.send_request( + method="DELETE", url=url, headers=self.parse_header(self.__jwt_data) + ) + + if response.status_code == ResponseStatusCodes.ACCEPTED: + _LOGGER.info( + f"View '{view_name}' deleted from project '{namespace}/{name}:{tag}' successfully." + ) + return None + elif response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError("View does not exists, or you are unauthorized.") + elif response.status_code == ResponseStatusCodes.UNAUTHORIZED: + raise ResponseError("You are unauthorized to delete this view.") + else: + raise ResponseError("Unexpected return value. ") + + def add_sample( + self, + namespace: str, + name: str, + tag: str, + view_name: str, + sample_name: str, + ): + """ + Add sample to view in project in PEPhub. + + :param namespace: namespace of project + :param name: name of project + :param tag: tag of project + :param view_name: name of the view + :param sample_name: name of the sample + """ + url = self._build_view_request_url( + namespace=namespace, + name=name, + view_name=view_name, + sample_name=sample_name, + ) + + url = url + self.parse_query_param(pep_variables={"tag": tag}) + + response = self.send_request( + method="POST", + url=url, + headers=self.parse_header(self.__jwt_data), + ) + if response.status_code == ResponseStatusCodes.ACCEPTED: + _LOGGER.info( + f"Sample '{sample_name}' added to view '{view_name}' in project '{namespace}/{name}:{tag}' successfully." + ) + return None + elif response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError( + f"Sample '{sample_name}' or project {namespace}/{name}:{tag} does not exist." + ) + elif response.status_code == ResponseStatusCodes.CONFLICT: + raise ResponseError(f"Sample '{sample_name}' already exists in the view.") + else: + raise ResponseError( + f"Unexpected return value. Error: {response.status_code}" + ) + + def remove_sample( + self, + namespace: str, + name: str, + tag: str, + view_name: str, + sample_name: str, + ): + """ + Remove sample from view in project in PEPhub. + + :param namespace: namespace of project + :param name: name of project + :param tag: tag of project + :param view_name: name of the view + :param sample_name: name of the sample + :return: None + """ + url = self._build_view_request_url( + namespace=namespace, + name=name, + view_name=view_name, + sample_name=sample_name, + ) + + url = url + self.parse_query_param(pep_variables={"tag": tag}) + + response = self.send_request( + method="DELETE", + url=url, + headers=self.parse_header(self.__jwt_data), + ) + if response.status_code == ResponseStatusCodes.ACCEPTED: + _LOGGER.info( + f"Sample '{sample_name}' removed from view '{view_name}' in project '{namespace}/{name}:{tag}' successfully." + ) + return None + elif response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError( + f"Sample '{sample_name}' or project {namespace}/{name}:{tag} does not exist. " + ) + elif response.status_code == ResponseStatusCodes.UNAUTHORIZED: + raise ResponseError( + "You are unauthorized to remove this sample from the view." + ) + else: + raise ResponseError( + f"Unexpected return value. Error: {response.status_code}" + ) + + @staticmethod + def _build_view_request_url( + namespace: str, name: str, view_name: str, sample_name: str = None + ): + """ + Build URL for view request. + + :param namespace: namespace of project + :param name: name of project + :param view_name: name of view + :return: URL + """ + if sample_name: + return PEPHUB_VIEW_SAMPLE_URL.format( + namespace=namespace, + project=name, + view_name=view_name, + sample_name=sample_name, + ) + return PEPHUB_VIEW_URL.format( + namespace=namespace, + project=name, + view_name=view_name, + ) diff --git a/tests/smoketests/__init__.py b/peppy/pephubclient/pephub_oauth/__init__.py similarity index 100% rename from tests/smoketests/__init__.py rename to peppy/pephubclient/pephub_oauth/__init__.py diff --git a/peppy/pephubclient/pephub_oauth/const.py b/peppy/pephubclient/pephub_oauth/const.py new file mode 100644 index 00000000..0cdfbc8e --- /dev/null +++ b/peppy/pephubclient/pephub_oauth/const.py @@ -0,0 +1,6 @@ +# constants of pephub_auth + +from ..constants import PEPHUB_BASE_URL + +PEPHUB_DEVICE_INIT_URI = f"{PEPHUB_BASE_URL}auth/device/init" +PEPHUB_DEVICE_TOKEN_URI = f"{PEPHUB_BASE_URL}auth/device/token" diff --git a/peppy/pephubclient/pephub_oauth/exceptions.py b/peppy/pephubclient/pephub_oauth/exceptions.py new file mode 100644 index 00000000..d7ef711c --- /dev/null +++ b/peppy/pephubclient/pephub_oauth/exceptions.py @@ -0,0 +1,26 @@ +"""auth exceptions""" + + +class PEPHubResponseException(Exception): + """Request response exception. Used when response != 200""" + + def __init__(self, reason: str = ""): + """ + Optionally provide explanation for exceptional condition. + :param str reason: some context or perhaps just a value that + could not be interpreted as an accession + """ + super(PEPHubResponseException, self).__init__(reason) + + +class PEPHubTokenExchangeException(Exception): + """Exception in exchanging device code on token == 400""" + + def __init__(self, reason: str = ""): + """ + Optionally provide explanation for exceptional condition. + + :param str reason: some context or perhaps just a value that + could not be interpreted as an accession + """ + super(PEPHubTokenExchangeException, self).__init__(reason) diff --git a/peppy/pephubclient/pephub_oauth/models.py b/peppy/pephubclient/pephub_oauth/models.py new file mode 100644 index 00000000..a3d64772 --- /dev/null +++ b/peppy/pephubclient/pephub_oauth/models.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel + + +class InitializeDeviceCodeResponse(BaseModel): + device_code: str + auth_url: str + + +class PEPHubDeviceTokenResponse(BaseModel): + jwt_token: str diff --git a/peppy/pephubclient/pephub_oauth/pephub_oauth.py b/peppy/pephubclient/pephub_oauth/pephub_oauth.py new file mode 100644 index 00000000..d9246979 --- /dev/null +++ b/peppy/pephubclient/pephub_oauth/pephub_oauth.py @@ -0,0 +1,107 @@ +import json +import time +from typing import Type, Union + +import requests +from pydantic import BaseModel + +from ..helpers import MessageHandler, RequestManager +from ..pephub_oauth.const import PEPHUB_DEVICE_INIT_URI, PEPHUB_DEVICE_TOKEN_URI +from ..pephub_oauth.exceptions import ( + PEPHubResponseException, + PEPHubTokenExchangeException, +) +from ..pephub_oauth.models import ( + InitializeDeviceCodeResponse, + PEPHubDeviceTokenResponse, +) + + +class PEPHubAuth(RequestManager): + """ + Class responsible for authorization to PEPhub. + """ + + def login_to_pephub(self): + pephub_response = self._request_pephub_for_device_code() + print( + f"User verification code: {pephub_response.device_code}, please go to the website: " + f"{pephub_response.auth_url} to authenticate." + ) + + # Sleep 2 minutes and then try 3 times exchange device code on token + time.sleep(2) + + number_of_token_exchange_attempts = 3 + for i in range(number_of_token_exchange_attempts): + try: + user_token = self._exchange_device_code_on_token( + pephub_response.device_code + ) + except PEPHubTokenExchangeException: + time.sleep(2) + else: + print("Successfully logged in!") + return user_token + + # If you didn't log in press enter to try again. + input("If you logged in, press enter to continue...") + try: + user_token = self._exchange_device_code_on_token( + pephub_response.device_code + ) + except PEPHubTokenExchangeException: + MessageHandler.print_warning("You are not logged in") + else: + MessageHandler.print_success("Successfully logged in!") + return user_token + + def _request_pephub_for_device_code(self) -> InitializeDeviceCodeResponse: + """ + Requests device code from pephub + """ + response = PEPHubAuth.send_request( + method="POST", + url=PEPHUB_DEVICE_INIT_URI, + params=None, + headers=None, + ) + return self._handle_pephub_response(response, InitializeDeviceCodeResponse) + + def _exchange_device_code_on_token(self, device_code: str) -> str: + """ + Send request with device code to pephub in order to exchange it on JWT + :param device_code: device code that was generated by pephub + """ + response = PEPHubAuth.send_request( + method="POST", + url=PEPHUB_DEVICE_TOKEN_URI, + params=None, + headers={"device-code": device_code}, + ) + pephub_token_response = self._handle_pephub_response( + response, PEPHubDeviceTokenResponse + ) + return pephub_token_response.jwt_token + + @staticmethod + def _handle_pephub_response( + response: requests.Response, model: Type[BaseModel] + ) -> Union[BaseModel, InitializeDeviceCodeResponse, PEPHubDeviceTokenResponse]: + """ + Decode the response from PEPhub and pack the returned data into appropriate model. + :param response: Response from pephub + :param model: Model that the data will be packed to. + + :return: Response data as an instance of correct model. + """ + if response.status_code == 401: + raise PEPHubTokenExchangeException() + if response.status_code != 200: + raise PEPHubResponseException() + try: + content = json.loads(PEPHubAuth.decode_response(response)) + except json.JSONDecodeError: + raise Exception("Something went wrong with PEPhub response") + + return model(**content) diff --git a/peppy/pephubclient/pephubclient.py b/peppy/pephubclient/pephubclient.py new file mode 100644 index 00000000..acaf6de5 --- /dev/null +++ b/peppy/pephubclient/pephubclient.py @@ -0,0 +1,357 @@ +from typing import Literal, NoReturn, Optional + +import urllib3 +from pydantic import ValidationError +from typing_extensions import deprecated +from ubiquerg import parse_registry_path + +from ..const import NAME_KEY +from ..project import Project +from .constants import ( + PATH_TO_FILE_WITH_JWT, + PEPHUB_PEP_API_BASE_URL, + PEPHUB_PEP_SEARCH_URL, + PEPHUB_PUSH_URL, + RegistryPath, + ResponseStatusCodes, +) +from .exceptions import IncorrectQueryStringError, ResponseError +from .files_manager import FilesManager +from .helpers import MessageHandler, RequestManager, save_pep +from .models import ( + ProjectAnnotationModel, + ProjectDict, + ProjectUploadData, + SearchReturnModel, +) +from .modules.sample import PEPHubSample +from .modules.view import PEPHubView +from .pephub_oauth.pephub_oauth import PEPHubAuth + +urllib3.disable_warnings() + + +class PEPHubClient(RequestManager): + def __init__(self): + self.__jwt_data = FilesManager.load_jwt_data_from_file(PATH_TO_FILE_WITH_JWT) + + self.__view = PEPHubView(self.__jwt_data) + self.__sample = PEPHubSample(self.__jwt_data) + + @property + def view(self) -> PEPHubView: + return self.__view + + @property + def sample(self) -> PEPHubSample: + return self.__sample + + def login(self) -> None: + """ + Log in to PEPhub + """ + user_token = PEPHubAuth().login_to_pephub() + + FilesManager.save_jwt_data_to_file(PATH_TO_FILE_WITH_JWT, user_token) + self.__jwt_data = FilesManager.load_jwt_data_from_file(PATH_TO_FILE_WITH_JWT) + + def logout(self) -> None: + """ + Log out from PEPhub + """ + FilesManager.delete_file_if_exists(PATH_TO_FILE_WITH_JWT) + self.__jwt_data = None + + def pull( + self, + project_registry_path: str, + force: Optional[bool] = False, + zip: Optional[bool] = False, + output: Optional[str] = None, + ) -> None: + """ + Download project locally + + :param str project_registry_path: Project registry path in PEPhub (e.g. databio/base:default) + :param bool force: if project exists, overwrite it. + :param bool zip: if True, save project as zip file + :param str output: path where project will be saved + :return: None + """ + project_dict = self.load_raw_pep( + registry_path=project_registry_path, + ) + + save_pep( + project=project_dict, + reg_path=project_registry_path, + force=force, + project_path=output, + zip=zip, + ) + + def load_project( + self, + project_registry_path: str, + query_param: Optional[dict] = None, + ) -> Project: + """ + Load peppy project from PEPhub in Project object + + :param project_registry_path: registry path of the project + :param query_param: query parameters used in get request + :return Project: peppy project. + """ + raw_pep = self.load_raw_pep(project_registry_path, query_param) + peppy_project = Project().from_dict(raw_pep) + return peppy_project + + def push( + self, + cfg: str, + namespace: str, + name: Optional[str] = None, + tag: Optional[str] = None, + is_private: Optional[bool] = False, + force: Optional[bool] = False, + ) -> None: + """ + Push (upload/update) project to Pephub using config/csv path + + :param str cfg: Project config file (YAML) or sample table (CSV/TSV) + with one row per sample to constitute project + :param str namespace: namespace + :param str name: project name + :param str tag: project tag + :param bool is_private: Specifies whether project should be private [Default= False] + :param bool force: Force push to the database. Use it to update, or upload project. [Default= False] + :return: None + """ + peppy_project = Project(cfg=cfg) + self.upload( + project=peppy_project, + namespace=namespace, + name=name, + tag=tag, + is_private=is_private, + force=force, + ) + + def upload( + self, + project: Project, + namespace: str, + name: str = None, + tag: str = None, + is_private: bool = False, + force: bool = True, + ) -> None: + """ + Upload peppy project to the PEPhub. + + :param Project project: Project object that has to be uploaded to the DB + :param namespace: namespace + :param name: project name + :param tag: project tag + :param is_private: Make project private + :param force: overwrite project if it exists, use it to update, or upload project. + :return: None + """ + if name: + project[NAME_KEY] = name + + upload_data = ProjectUploadData( + pep_dict=project.to_dict( + extended=True, + orient="records", + ), + tag=tag, + is_private=is_private, + overwrite=force, + ) + pephub_response = self.send_request( + method="POST", + url=self._build_push_request_url(namespace=namespace), + headers=self.parse_header(self.__jwt_data), + json=upload_data.model_dump(), + cookies=None, + ) + if pephub_response.status_code == ResponseStatusCodes.ACCEPTED: + MessageHandler.print_success( + f"Project '{namespace}/{name}:{upload_data.tag}' was successfully uploaded" + ) + elif pephub_response.status_code == ResponseStatusCodes.CONFLICT: + raise ResponseError( + "Project already exists. Set force to overwrite project." + ) + elif pephub_response.status_code == ResponseStatusCodes.UNAUTHORIZED: + raise ResponseError("Unauthorized! Failure in uploading project.") + elif pephub_response.status_code == ResponseStatusCodes.FORBIDDEN: + raise ResponseError( + "User does not have permission to write to this namespace!" + ) + else: + raise ResponseError( + f"Unexpected Response Error. {pephub_response.status_code}" + ) + return None + + def find_project( + self, + namespace: str, + query_string: str = "", + limit: int = 100, + offset: int = 0, + filter_by: Literal["submission_date", "last_update_date"] = None, + start_date: str = None, + end_date: str = None, + ) -> SearchReturnModel: + """ + Find project in specific namespace and return list of PEP annotation + + :param namespace: Namespace where to search for projects + :param query_string: Search query + :param limit: Return limit + :param offset: Return offset + :param filter_by: Use filter date. Option: [submission_date, last_update_date] + :param start_date: filter beginning date + :param end_date: filter end date (if none today's date is used) + :return: + """ + + query_param = { + "q": query_string, + "limit": limit, + "offset": offset, + } + if filter_by in ["submission_date", "last_update_date"]: + query_param["filter_by"] = filter_by + query_param["filter_start_date"] = start_date + if end_date: + query_param["filter_end_date"] = end_date + + url = self._build_project_search_url( + namespace=namespace, + query_param=query_param, + ) + + pephub_response = self.send_request( + method="GET", + url=url, + headers=self.parse_header(self.__jwt_data), + json=None, + cookies=None, + ) + if pephub_response.status_code == ResponseStatusCodes.OK: + decoded_response = self.decode_response(pephub_response, output_json=True) + project_list = [] + for project_found in decoded_response["results"]: + project_list.append(ProjectAnnotationModel(**project_found)) + return SearchReturnModel(**decoded_response) + + @deprecated("This method is deprecated. Use load_raw_pep instead.") + def _load_raw_pep( + self, + registry_path: str, + jwt_data: Optional[str] = None, + query_param: Optional[dict] = None, + ) -> dict: + """ + !!! This method is deprecated. Use load_raw_pep instead. !!! + + Request PEPhub and return the requested project as Project object. + + :param registry_path: Project namespace, eg. "geo/GSE124224:tag" + :param query_param: Optional variables to be passed to PEPhub + :return: Raw project in dict. + """ + return self.load_raw_pep(registry_path, query_param) + + def load_raw_pep( + self, + registry_path: str, + query_param: Optional[dict] = None, + ) -> dict: + """ + Request PEPhub and return the requested project as Project object. + + :param registry_path: Project namespace, eg. "geo/GSE124224:tag" + :param query_param: Optional variables to be passed to PEPhub + :return: Raw project in dict. + """ + query_param = query_param or {} + query_param["raw"] = "true" + + self._set_registry_data(registry_path) + pephub_response = self.send_request( + method="GET", + url=self._build_pull_request_url(query_param=query_param), + headers=self.parse_header(self.__jwt_data), + cookies=None, + ) + if pephub_response.status_code == ResponseStatusCodes.OK: + decoded_response = self.decode_response(pephub_response, output_json=True) + correct_proj_dict = ProjectDict(**decoded_response) + + # This step is necessary because of this issue: https://github.com/pepkit/pephub/issues/124 + return correct_proj_dict.model_dump(by_alias=True) + + if pephub_response.status_code == ResponseStatusCodes.NOT_EXIST: + raise ResponseError("File does not exist, or you are unauthorized.") + if pephub_response.status_code == ResponseStatusCodes.INTERNAL_ERROR: + raise ResponseError( + f"Internal server error. Unexpected return value. Error: {pephub_response.status_code}" + ) + + def _set_registry_data(self, query_string: str) -> None: + """ + Parse provided query string to extract project name, sample name, etc. + + :param query_string: Passed by user. Contain information needed to locate the project. + :return: Parsed query string. + """ + try: + self.registry_path = RegistryPath(**parse_registry_path(query_string)) + except (ValidationError, TypeError): + raise IncorrectQueryStringError(query_string=query_string) + + def _build_pull_request_url(self, query_param: dict = None) -> str: + """ + Build request for getting projects from pephub + + :param query_param: dict of parameters used in query string + :return: url string + """ + query_param = query_param or {} + query_param["tag"] = self.registry_path.tag + + endpoint = self.registry_path.namespace + "/" + self.registry_path.item + + variables_string = self.parse_query_param(query_param) + endpoint += variables_string + + return PEPHUB_PEP_API_BASE_URL + endpoint + + @staticmethod + def _build_project_search_url(namespace: str, query_param: dict = None) -> str: + """ + Build request for searching projects from pephub + + :param query_param: dict of parameters used in query string + :return: url string + """ + + variables_string = RequestManager.parse_query_param(query_param) + endpoint = variables_string + + return PEPHUB_PEP_SEARCH_URL.format(namespace=namespace) + endpoint + + @staticmethod + def _build_push_request_url(namespace: str) -> str: + """ + Build project upload request used in pephub + + :param namespace: namespace where project will be uploaded + :return: url string + """ + return PEPHUB_PUSH_URL.format(namespace=namespace) diff --git a/peppy/project.py b/peppy/project.py index ed7b3133..7be77e3d 100644 --- a/peppy/project.py +++ b/peppy/project.py @@ -6,8 +6,9 @@ import sys from collections.abc import Mapping, MutableMapping from contextlib import suppress +from copy import deepcopy from logging import getLogger -from typing import Iterable, List, Tuple, Union, Literal +from typing import Iterable, List, Literal, Tuple, Union import numpy as np import pandas as pd @@ -16,11 +17,11 @@ from rich.console import Console from rich.progress import track from ubiquerg import is_url -from copy import deepcopy from .const import ( ACTIVE_AMENDMENTS_KEY, AMENDMENTS_KEY, + APPEND_KEY, ATTR_KEY_PREFIX, CFG_IMPORTS_KEY, CFG_SAMPLE_TABLE_KEY, @@ -28,7 +29,6 @@ CONFIG_FILE_KEY, CONFIG_KEY, CONFIG_VERSION_KEY, - APPEND_KEY, DERIVED_ATTRS_KEY, DERIVED_KEY, DERIVED_SOURCES_KEY, @@ -41,6 +41,7 @@ MAX_PROJECT_SAMPLES_REPR, METADATA_KEY, NAME_KEY, + ORIGINAL_CONFIG_KEY, PEP_LATEST_VERSION, PKG_NAME, PROJ_MODS_KEY, @@ -60,13 +61,12 @@ SUBSAMPLE_RAW_LIST_KEY, SUBSAMPLE_TABLE_INDEX_KEY, SUBSAMPLE_TABLES_FILE_KEY, - ORIGINAL_CONFIG_KEY, ) from .exceptions import ( - InvalidSampleTableFileException, - MissingAmendmentError, IllegalStateException, InvalidConfigFileException, + InvalidSampleTableFileException, + MissingAmendmentError, ) from .parsers import select_parser from .sample import Sample @@ -76,6 +76,7 @@ load_yaml, make_abs_via_cfg, make_list, + unpopulated_env_var, ) _LOGGER = getLogger(PKG_NAME) @@ -669,7 +670,7 @@ def _assert_samples_have_names(self): if self.st_index not in sample: message = ( f"{CFG_SAMPLE_TABLE_KEY} is missing '{self.st_index}' column; " - f"you must specify {CFG_SAMPLE_TABLE_KEY}s in {self.st_index} or derive them" + f"you must specify a {self.st_index} column for your {CFG_SAMPLE_TABLE_KEY} or derive it" ) raise InvalidSampleTableFileException(message) @@ -920,6 +921,7 @@ def attr_derive(self, attrs=None): ds = self[CONFIG_KEY][SAMPLE_MODS_KEY][DERIVED_KEY][DERIVED_SOURCES_KEY] derivations = attrs or (da if isinstance(da, list) else [da]) _LOGGER.debug("Derivations to be done: {}".format(derivations)) + env_var_miss = set() for sample in track( self.samples, description="Deriving sample attributes", @@ -942,6 +944,9 @@ def attr_derive(self, attrs=None): derived_attr = sample.derive_attribute(ds, attr) if derived_attr: + if "$" in derived_attr: + env_var_miss.add(derived_attr) + _LOGGER.debug("Setting '{}' to '{}'".format(attr, derived_attr)) sample[attr] = derived_attr else: @@ -949,6 +954,8 @@ def attr_derive(self, attrs=None): f"Not setting null/empty value for data source '{attr}': {type(derived_attr)}" ) sample._derived_cols_done.append(attr) + if len(env_var_miss) > 0: + unpopulated_env_var(env_var_miss) def activate_amendments(self, amendments): """ diff --git a/peppy/sample.py b/peppy/sample.py index 60d14677..432cf41d 100644 --- a/peppy/sample.py +++ b/peppy/sample.py @@ -4,7 +4,7 @@ from copy import copy as cp from logging import getLogger from string import Formatter -from typing import Optional, Union +from typing import Any, Dict, Optional, Union import pandas as pd import yaml @@ -20,8 +20,8 @@ SAMPLE_SHEET_KEY, ) from .exceptions import InvalidSampleTableFileException -from .utils import copy, grab_project_data from .simple_attr_map import SimpleAttMap +from .utils import copy, grab_project_data _LOGGER = getLogger(PKG_NAME) @@ -39,7 +39,9 @@ class Sample(SimpleAttMap): :param Mapping | pandas.core.series.Series series: Sample's data. """ - def __init__(self, series, prj=None): + def __init__( + self, series: Union[Mapping, Series], prj: Optional[Any] = None + ) -> None: super(Sample, self).__init__() data = dict(series) @@ -75,25 +77,28 @@ def __init__(self, series, prj=None): self._derived_cols_done = [] self._attributes = list(series.keys()) - def get_sheet_dict(self): - """ - Create a K-V pairs for items originally passed in via the sample sheet. + def get_sheet_dict(self) -> Dict: + """Create K-V pairs for items originally passed in via the sample sheet. + This is useful for summarizing; it provides a representation of the sample that excludes things like config files and derived entries. - :return OrderedDict: mapping from name to value for data elements - originally provided via the sample sheet (i.e., the a map-like - representation of the instance, excluding derived items) + Returns: + Mapping from name to value for data elements originally provided + via the sample sheet (i.e., a map-like representation of the + instance, excluding derived items) """ return dict([[k, self[k]] for k in self._attributes]) - def to_dict(self, add_prj_ref=False): - """ - Serializes itself as dict object. + def to_dict(self, add_prj_ref: bool = False) -> Dict: + """Serializes itself as dict object. + + Args: + add_prj_ref: Whether the project reference bound to the Sample + object should be included in the dict representation - :param bool add_prj_ref: whether the project reference bound do the - Sample object should be included in the YAML representation - :return dict: dict representation of this Sample + Returns: + Dict representation of this Sample """ def _obj2dict(obj, name=None): @@ -136,16 +141,19 @@ def _obj2dict(obj, name=None): return serial def to_yaml( - self, path: Optional[str] = None, add_prj_ref=False + self, path: Optional[str] = None, add_prj_ref: bool = False ) -> Union[str, None]: - """ - Serializes itself in YAML format. Writes to file if path is provided, else returns string representation. + """Serializes itself in YAML format. + + Writes to file if path is provided, else returns string representation. + + Args: + path: A file path to write YAML to; defaults to None + add_prj_ref: Whether the project reference bound to the Sample + object should be included in the YAML representation - :param str path: A file path to write yaml to; provide this or - the subs_folder_path, defaults to None - :param bool add_prj_ref: whether the project reference bound do the - Sample object should be included in the YAML representation - :return str | None: returns string representation of sample yaml or None + Returns: + String representation of sample YAML or None if written to file """ serial = self.to_dict(add_prj_ref=add_prj_ref) if path: @@ -199,11 +207,6 @@ def _format_regex(regex, items): keys = [i[1] for i in Formatter().parse(regex) if i[1] is not None] if not keys: return [regex] - if "$" in regex: - _LOGGER.warning( - "Not all environment variables were populated " - "in derived attribute source: {}".format(regex) - ) attr_lens = [ len(v) for k, v in items.items() if (isinstance(v, list) and k in keys) ] diff --git a/peppy/utils.py b/peppy/utils.py index 45d9e557..55d23c0e 100644 --- a/peppy/utils.py +++ b/peppy/utils.py @@ -2,7 +2,10 @@ import logging import os -from typing import Dict, Mapping, Type, Union +import posixpath as psp +import re +from collections import defaultdict +from typing import Any, Dict, Mapping, Optional, Set, Type, Union from urllib.request import urlopen import yaml @@ -14,7 +17,7 @@ _LOGGER = logging.getLogger(__name__) -def copy(obj): +def copy(obj: Any) -> Any: def copy(self): """ Copy self to a new object. @@ -27,8 +30,23 @@ def copy(self): return obj -def make_abs_via_cfg(maybe_relpath, cfg_path, check_exists=False): - """Ensure that a possibly relative path is absolute.""" +def make_abs_via_cfg( + maybe_relpath: str, cfg_path: str, check_exists: bool = False +) -> str: + """Ensure that a possibly relative path is absolute. + + Args: + maybe_relpath: Path that may be relative + cfg_path: Path to configuration file + check_exists: Whether to verify the resulting path exists + + Returns: + Absolute path + + Raises: + TypeError: If maybe_relpath is not a string + OSError: If check_exists is True and path doesn't exist + """ if not isinstance(maybe_relpath, str): raise TypeError( "Attempting to ensure non-text value is absolute path: {} ({})".format( @@ -44,28 +62,40 @@ def make_abs_via_cfg(maybe_relpath, cfg_path, check_exists=False): _LOGGER.debug("Expanded: {}".format(expanded)) return expanded # Set path to an absolute path, relative to project config. - config_dirpath = os.path.dirname(cfg_path) + if is_url(cfg_path): + config_dirpath = psp.dirname(cfg_path) + else: + config_dirpath = os.path.dirname(cfg_path) _LOGGER.debug("config_dirpath: {}".format(config_dirpath)) - abs_path = os.path.join(config_dirpath, maybe_relpath) + + if is_url(cfg_path): + abs_path = psp.join(config_dirpath, maybe_relpath) + else: + abs_path = os.path.join(config_dirpath, maybe_relpath) _LOGGER.debug("Expanded and/or made absolute: {}".format(abs_path)) - if check_exists and not os.path.exists(abs_path): + if check_exists and not is_url(abs_path) and not os.path.exists(abs_path): raise OSError(f"Path made absolute does not exist: {abs_path}") return abs_path -def grab_project_data(prj): - """ - From the given Project, grab Sample-independent data. +def grab_project_data(prj: Any) -> Mapping: + """From the given Project, grab Sample-independent data. There are some aspects of a Project of which it's beneficial for a Sample to be aware, particularly for post-hoc analysis. Since Sample objects within a Project are mutually independent, though, each doesn't need to - know about any of the others. A Project manages its, Sample instances, + know about any of the others. A Project manages its Sample instances, so for each Sample knowledge of Project data is limited. This method facilitates adoption of that conceptual model. - :param Project prj: Project from which to grab data - :return Mapping: Sample-independent data sections from given Project + Args: + prj: Project from which to grab data + + Returns: + Sample-independent data sections from given Project + + Raises: + KeyError: If project lacks required config section """ if not prj: return {} @@ -77,16 +107,17 @@ def grab_project_data(prj): def make_list(arg: Union[list, str], obj_class: Type) -> list: - """ - Convert an object of predefined class to a list of objects of that class or - ensure a list is a list of objects of that class + """Convert an object of predefined class to a list or ensure list contains correct type. - :param list[obj] | obj arg: string or a list of strings to listify - :param str obj_class: name of the class of intrest + Args: + arg: Object or list of objects to listify + obj_class: Class that objects should be instances of - :return list: list of objects of the predefined class + Returns: + List of objects of the predefined class - :raise TypeError: if a faulty argument was provided + Raises: + TypeError: If a faulty argument was provided """ def _raise_faulty_arg(): @@ -106,22 +137,26 @@ def _raise_faulty_arg(): _raise_faulty_arg() -def _expandpath(path: str): - """ - Expand a filesystem path that may or may not contain user/env vars. +def _expandpath(path: str) -> str: + """Expand a filesystem path that may or may not contain user/env vars. + + Args: + path: Path to expand - :param str path: path to expand - :return str: expanded version of input path + Returns: + Expanded version of input path """ return os.path.expandvars(os.path.expanduser(path)) def expand_paths(x: dict) -> dict: - """ - Recursively expand paths in a dict. + """Recursively expand paths in a dict. + + Args: + x: Dict to expand - :param dict x: dict to expand - :return dict: dict with expanded paths + Returns: + Dict with expanded paths """ if isinstance(x, str): return expandpath(x) @@ -130,13 +165,17 @@ def expand_paths(x: dict) -> dict: return x -def load_yaml(filepath): - """ - Load a local or remote YAML file into a Python dict +def load_yaml(filepath: str) -> dict: + """Load a local or remote YAML file into a Python dict. + + Args: + filepath: Path to the file to read - :param str filepath: path to the file to read - :raises RemoteYAMLError: if the remote YAML file reading fails - :return dict: read data + Returns: + Read data + + Raises: + RemoteYAMLError: If the remote YAML file reading fails """ if is_url(filepath): _LOGGER.debug(f"Got URL: {filepath}") @@ -156,13 +195,21 @@ def load_yaml(filepath): return expand_paths(data) -def is_cfg_or_anno(file_path, formats=None): - """ - Determine if the input file seems to be a project config file (based on the file extension). - :param str file_path: file path to examine - :param dict formats: formats dict to use. Must include 'config' and 'annotation' keys. - :raise ValueError: if the file seems to be neither a config nor an annotation - :return bool: True if the file is a config, False if the file is an annotation +def is_cfg_or_anno( + file_path: Optional[str], formats: Optional[dict] = None +) -> Optional[bool]: + """Determine if the input file seems to be a project config file (based on extension). + + Args: + file_path: File path to examine + formats: Formats dict to use. Must include 'config' and 'annotation' keys + + Returns: + True if the file is a config, False if the file is an annotation, + None if file_path is None + + Raises: + ValueError: If the file seems to be neither a config nor an annotation """ formats_dict = formats or { "config": (".yaml", ".yml"), @@ -182,8 +229,15 @@ def is_cfg_or_anno(file_path, formats=None): ) -def extract_custom_index_for_sample_table(pep_dictionary: Dict): - """Extracts a custom index for the sample table if it exists""" +def extract_custom_index_for_sample_table(pep_dictionary: Dict) -> Optional[str]: + """Extracts a custom index for the sample table if it exists. + + Args: + pep_dictionary: PEP configuration dictionary + + Returns: + Custom index name or None if not specified + """ return ( pep_dictionary[SAMPLE_TABLE_INDEX_KEY] if SAMPLE_TABLE_INDEX_KEY in pep_dictionary @@ -191,10 +245,68 @@ def extract_custom_index_for_sample_table(pep_dictionary: Dict): ) -def extract_custom_index_for_subsample_table(pep_dictionary: Dict): - """Extracts a custom index for the subsample table if it exists""" +def extract_custom_index_for_subsample_table(pep_dictionary: Dict) -> Optional[str]: + """Extracts a custom index for the subsample table if it exists. + + Args: + pep_dictionary: PEP configuration dictionary + + Returns: + Custom index name or None if not specified + """ return ( pep_dictionary[SUBSAMPLE_TABLE_INDEX_KEY] if SUBSAMPLE_TABLE_INDEX_KEY in pep_dictionary else None ) + + +def unpopulated_env_var(paths: Set[str]) -> None: + """Print warnings for unpopulated environment variables in paths. + + Given a set of paths that may contain env vars, group by env var and + print a warning for each group with the deepest common directory and + the paths relative to that directory. + + Args: + paths: Set of paths that may contain environment variables + """ + _VAR_RE = re.compile(r"^\$(\w+)/(.*)$") + groups: dict[str, list[str]] = defaultdict(list) + + # 1) Group by env var + for s in paths: + m = _VAR_RE.match(s.strip()) + if not m: + # Not in "$VAR/..." form — skip or collect under a special key if you prefer + continue + var, tail = m.group(1), m.group(2) + # normalize to POSIX-ish, no leading "./" + tail = tail.lstrip("/") + groups[var].append(tail) + + # 2) For each var, compute deepest common directory and print + for var, tails in groups.items(): + if not tails: + continue + + if len(tails) == 1: + # With a single path, use its directory as the common dir + common_dir = psp.dirname(tails[0]) or "." + else: + common_dir = psp.commonpath(tails) or "." + # Ensure it's a directory; commonpath is component-wise, so it's fine. + + warning_message = "Not all environment variables were populated in derived attribute source: $%s/{" + + in_env = [] + for t in tails: + rel = psp.relpath(t, start=common_dir or ".") + in_env.append(rel) + + warning_message += ", ".join(in_env) + warning_message += "}" + _LOGGER.warning( + warning_message, + var, + ) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index ace32606..bf0dbb5c 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -1,6 +1,12 @@ -pandas>=0.24.2 -pyyaml +pandas>=2.2.0 +pyyaml>=6.0.0 rich>=10.3.0 ubiquerg>=0.6.2 numpy -pephubclient>=0.4.2 +logmuse>=0.2.8 +importlib-metadata; python_version < '3.10' +jsonschema>=3.0.1 +typer>=0.20.0 +requests>=2.28.2 +pydantic>2.5.0 +coloredlogs>=15.0.1 \ No newline at end of file diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt index 2461d2eb..56ac11e9 100644 --- a/requirements/requirements-test.txt +++ b/requirements/requirements-test.txt @@ -2,3 +2,14 @@ mock pytest pytest-cov pytest-remotedata +# eido +coveralls +pytest-mock==3.6.1 +# pephubclient +black +ruff +python-dotenv +flake8 +pre-commit +coverage +smokeshow \ No newline at end of file diff --git a/scripts/update_usage_docs.sh b/scripts/update_usage_docs.sh new file mode 100755 index 00000000..95d150ee --- /dev/null +++ b/scripts/update_usage_docs.sh @@ -0,0 +1,21 @@ +#!/bin/bash +cp ../docs/templates/usage.template usage.template + +for cmd in "--help" "pull --help" "push --help"; do + echo $cmd + echo -e "## \`phc $cmd\`" > USAGE_header.temp + phc $cmd --help > USAGE.temp 2>&1 + # sed -i 's/^/\t/' USAGE.temp + sed -i.bak '1s;^;\`\`\`console\ +;' USAGE.temp +# sed -i '1s/^/\n\`\`\`console\n/' USAGE.temp + echo -e "\`\`\`\n" >> USAGE.temp + #sed -i -e "/\`looper $cmd\`/r USAGE.temp" -e '$G' usage.template # for -in place inserts + cat USAGE_header.temp USAGE.temp >> usage.template # to append to the end +done +rm USAGE.temp +rm USAGE_header.temp +rm USAGE.temp.bak +mv usage.template ../docs/usage.md +#cat usage.template +# rm USAGE.temp \ No newline at end of file diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index ec734d81..00000000 --- a/setup.cfg +++ /dev/null @@ -1,6 +0,0 @@ -[aliases] -test = pytest - -[pytest] -# Only request extra info from failures and errors. -addopts = -rfE diff --git a/setup.py b/setup.py index 3d0619c9..79cb7b52 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,13 @@ def get_static(name, condition=None): setup( name=PACKAGE_NAME, - packages=[PACKAGE_NAME], + packages=[ + PACKAGE_NAME, + "peppy.eido", + "peppy.pephubclient", + "peppy.pephubclient.pephub_oauth", + "peppy.pephubclient.modules", + ], version=version, description="A python-based project metadata manager for portable encapsulated projects", long_description=long_description, @@ -59,6 +65,15 @@ def get_static(name, condition=None): url="https://github.com/pepkit/peppy/", author="Michal Stolarczyk, Nathan Sheffield, Vince Reuter, Andre Rendeiro, Oleksandr Khoroshevskyi", license="BSD2", + entry_points={ + "console_scripts": ["peppy = peppy.cli:main"], + "pep.filters": [ + "basic=peppy.eido.conversion_plugins:basic_pep_filter", + "yaml=peppy.eido.conversion_plugins:yaml_pep_filter", + "csv=peppy.eido.conversion_plugins:csv_pep_filter", + "yaml-samples=peppy.eido.conversion_plugins:yaml_samples_pep_filter", + ], + }, include_package_data=True, tests_require=(["pytest"]), setup_requires=( diff --git a/tests/data/eidodata/common/schemas/common_pep_validation.yaml b/tests/data/eidodata/common/schemas/common_pep_validation.yaml new file mode 100644 index 00000000..78ff9a3c --- /dev/null +++ b/tests/data/eidodata/common/schemas/common_pep_validation.yaml @@ -0,0 +1,69 @@ +description: "Schema for a minimal PEP" +version: "2.0.0" +properties: + config: + properties: + name: + type: string + pattern: "^\\S*$" + description: "Project name with no whitespace" + pep_version: + description: "Version of the PEP Schema this PEP follows" + type: string + sample_table: + type: string + description: "Path to the sample annotation table with one row per sample" + subsample_table: + type: string + description: "Path to the subsample annotation table with one row per subsample and sample_name attribute matching an entry in the sample table" + sample_modifiers: + type: object + properties: + append: + type: object + duplicate: + type: object + imply: + type: array + items: + type: object + properties: + if: + type: object + then: + type: object + derive: + type: object + properties: + attributes: + type: array + items: + type: string + sources: + type: object + project_modifiers: + type: object + properties: + amend: + description: "Object overwriting original project attributes" + type: object + import: + description: "List of external PEP project config files to import" + type: array + items: + type: string + required: + - pep_version + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + pattern: "^\\S*$" + description: "Unique name of the sample with no whitespace" + required: + - sample_name +required: + - samples \ No newline at end of file diff --git a/tests/data/eidodata/peps/multiline_output/config.yaml b/tests/data/eidodata/peps/multiline_output/config.yaml new file mode 100644 index 00000000..4196ee81 --- /dev/null +++ b/tests/data/eidodata/peps/multiline_output/config.yaml @@ -0,0 +1,5 @@ +pep_version: "2.0.0" +sample_table: "samplesheet.csv" +subsample_table: "subsamplesheet.csv" +sample_table_index: "sample" +subsample_table_index: "sample" \ No newline at end of file diff --git a/tests/data/eidodata/peps/multiline_output/multiline_output.csv b/tests/data/eidodata/peps/multiline_output/multiline_output.csv new file mode 100644 index 00000000..5e889262 --- /dev/null +++ b/tests/data/eidodata/peps/multiline_output/multiline_output.csv @@ -0,0 +1,8 @@ +sample,strandedness,instrument_platform,run_accession,fastq_1,fastq_2 +WT_REP1,reverse,ABI_SOLID,runaccession1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_2.fastq.gz +WT_REP1,reverse,BGISEQ,runaccession2,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_2.fastq.gz +WT_REP2,reverse,CAPILLARY,123123123,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_2.fastq.gz +RAP1_UNINDUCED_REP1,reverse,COMPLETE_GENOMICS,somerunaccesion,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357073_1.fastq.gz, +RAP1_UNINDUCED_REP2,reverse,DNBSEQ,ERR2412421,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357074_1.fastq.gz, +RAP1_UNINDUCED_REP2,reverse,HELICOS,xxxxxxxxxx,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357075_1.fastq.gz, +RAP1_IAA_30M_REP1,reverse,ILLUMINA,None,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_2.fastq.gz diff --git a/tests/data/example_peps-master/example_nextflow_subsamples/samplesheet.csv b/tests/data/eidodata/peps/multiline_output/samplesheet.csv similarity index 100% rename from tests/data/example_peps-master/example_nextflow_subsamples/samplesheet.csv rename to tests/data/eidodata/peps/multiline_output/samplesheet.csv diff --git a/tests/data/example_peps-master/example_nextflow_subsamples/subsamplesheet.csv b/tests/data/eidodata/peps/multiline_output/subsamplesheet.csv similarity index 100% rename from tests/data/example_peps-master/example_nextflow_subsamples/subsamplesheet.csv rename to tests/data/eidodata/peps/multiline_output/subsamplesheet.csv diff --git a/tests/data/eidodata/peps/multiple_subsamples/project_config.yaml b/tests/data/eidodata/peps/multiple_subsamples/project_config.yaml new file mode 100644 index 00000000..e0e580b7 --- /dev/null +++ b/tests/data/eidodata/peps/multiple_subsamples/project_config.yaml @@ -0,0 +1,19 @@ +pep_version: "2.1.0" +sample_table: sample_table.csv +subsample_table: + - subsample_table1.csv + - subsample_table2.csv + +sample_modifiers: + append: + local_files: LOCAL + genome: "fg" + derive: + attributes: [local_files] + sources: + LOCAL: "../data/{file_path}" + imply: + - if: + identifier: "frog1" + then: + genome: "frog_frog" diff --git a/tests/data/example_peps-master/example_multiple_subsamples/sample_table.csv b/tests/data/eidodata/peps/multiple_subsamples/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_multiple_subsamples/sample_table.csv rename to tests/data/eidodata/peps/multiple_subsamples/sample_table.csv diff --git a/tests/data/example_peps-master/example_multiple_subsamples/subsample_table1.csv b/tests/data/eidodata/peps/multiple_subsamples/subsample_table1.csv similarity index 100% rename from tests/data/example_peps-master/example_multiple_subsamples/subsample_table1.csv rename to tests/data/eidodata/peps/multiple_subsamples/subsample_table1.csv diff --git a/tests/data/example_peps-master/example_multiple_subsamples/subsample_table2.csv b/tests/data/eidodata/peps/multiple_subsamples/subsample_table2.csv similarity index 100% rename from tests/data/example_peps-master/example_multiple_subsamples/subsample_table2.csv rename to tests/data/eidodata/peps/multiple_subsamples/subsample_table2.csv diff --git a/tests/data/example_peps-master/example_nextflow_config/project_config.yaml b/tests/data/eidodata/peps/pep_nextflow_taxprofiler/config.yaml similarity index 100% rename from tests/data/example_peps-master/example_nextflow_config/project_config.yaml rename to tests/data/eidodata/peps/pep_nextflow_taxprofiler/config.yaml diff --git a/tests/data/eidodata/peps/pep_nextflow_taxprofiler/output.csv b/tests/data/eidodata/peps/pep_nextflow_taxprofiler/output.csv new file mode 100644 index 00000000..d70a0e77 --- /dev/null +++ b/tests/data/eidodata/peps/pep_nextflow_taxprofiler/output.csv @@ -0,0 +1,7 @@ +sample,instrument_platform,run_accession,fastq_1,fastq_2,fasta +2611,ILLUMINA,ERR5766174,,,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fasta/ERX5474930_ERR5766174_1.fa.gz +2613,ILLUMINA,ERR5766181,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474937_ERR5766181_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474937_ERR5766181_2.fastq.gz, +ERR3201952,OXFORD_NANOPORE,ERR3201952,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERR3201952.fastq.gz,, +2612,ILLUMINA,ERR5766176,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_2.fastq.gz, +2612,ILLUMINA,ERR5766176_B,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_B_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_B_2.fastq.gz, +2612,ILLUMINA,ERR5766180,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474936_ERR5766180_1.fastq.gz,, diff --git a/tests/data/example_peps-master/example_nextflow_config/samplesheet.csv b/tests/data/eidodata/peps/pep_nextflow_taxprofiler/samplesheet.csv similarity index 100% rename from tests/data/example_peps-master/example_nextflow_config/samplesheet.csv rename to tests/data/eidodata/peps/pep_nextflow_taxprofiler/samplesheet.csv diff --git a/tests/data/eidodata/peps/pep_schema_rel_path/config.yaml b/tests/data/eidodata/peps/pep_schema_rel_path/config.yaml new file mode 100644 index 00000000..2783c73d --- /dev/null +++ b/tests/data/eidodata/peps/pep_schema_rel_path/config.yaml @@ -0,0 +1,3 @@ +description: "Example PEP for this particular pipeline." +pep_version: 2.0.0 +sample_table: sample_sheet.csv diff --git a/tests/data/eidodata/peps/pep_schema_rel_path/sample_sheet.csv b/tests/data/eidodata/peps/pep_schema_rel_path/sample_sheet.csv new file mode 100644 index 00000000..18840dd6 --- /dev/null +++ b/tests/data/eidodata/peps/pep_schema_rel_path/sample_sheet.csv @@ -0,0 +1,3 @@ +"sample_name","patient" +"a","Test" +"b", "Also Test" diff --git a/tests/data/eidodata/peps/pep_with_fasta_column/config.yaml b/tests/data/eidodata/peps/pep_with_fasta_column/config.yaml new file mode 100644 index 00000000..a56d90b3 --- /dev/null +++ b/tests/data/eidodata/peps/pep_with_fasta_column/config.yaml @@ -0,0 +1,3 @@ +pep_version: "2.0.0" +sample_table: "samplesheet.csv" +subsample_table: "subsamplesheet.csv" diff --git a/tests/data/eidodata/peps/pep_with_fasta_column/output.csv b/tests/data/eidodata/peps/pep_with_fasta_column/output.csv new file mode 100644 index 00000000..ac7dc5b2 --- /dev/null +++ b/tests/data/eidodata/peps/pep_with_fasta_column/output.csv @@ -0,0 +1,8 @@ +sample,strandedness,instrument_platform,run_accession,fastq_1,fastq_2,fasta +WT_REP1,reverse,ABI_SOLID,runaccession1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_2.fastq.gz, +WT_REP1,reverse,ABI_SOLID,runaccession2,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_2.fastq.gz, +WT_REP2,reverse,BGISEQ,123123123,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_2.fastq.gz, +RAP1_UNINDUCED_REP1,reverse,CAPILLARY,somerunaccesion,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357073_1.fastq.gz,, +RAP1_UNINDUCED_REP2,reverse,COMPLETE_GENOMICS,ERR2412421,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357074_1.fastq.gz,, +RAP1_UNINDUCED_REP2,reverse,COMPLETE_GENOMICS,xxxxxxxxxx,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357075_1.fastq.gz,, +RAP1_IAA_30M_REP1,reverse,DNBSEQ,None,,,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fasta/ERX5474930_ERR5766174_1.fa.gz diff --git a/tests/data/eidodata/peps/pep_with_fasta_column/samplesheet.csv b/tests/data/eidodata/peps/pep_with_fasta_column/samplesheet.csv new file mode 100644 index 00000000..6d9956d8 --- /dev/null +++ b/tests/data/eidodata/peps/pep_with_fasta_column/samplesheet.csv @@ -0,0 +1,6 @@ +sample,strandedness,instrument_platform +WT_REP1,reverse,ABI_SOLID +WT_REP2,reverse,BGISEQ +RAP1_UNINDUCED_REP1,reverse,CAPILLARY +RAP1_UNINDUCED_REP2,reverse,COMPLETE_GENOMICS +RAP1_IAA_30M_REP1,reverse,DNBSEQ diff --git a/tests/data/eidodata/peps/pep_with_fasta_column/subsamplesheet.csv b/tests/data/eidodata/peps/pep_with_fasta_column/subsamplesheet.csv new file mode 100644 index 00000000..446f9a91 --- /dev/null +++ b/tests/data/eidodata/peps/pep_with_fasta_column/subsamplesheet.csv @@ -0,0 +1,8 @@ +sample,run_accession,fastq_1,fastq_2,fasta,strandedness +WT_REP1,runaccession1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_2.fastq.gz,,reverse +WT_REP1,runaccession2,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_2.fastq.gz,,reverse +WT_REP2,123123123,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_2.fastq.gz,,reverse +RAP1_UNINDUCED_REP1,somerunaccesion,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357073_1.fastq.gz,,,reverse +RAP1_UNINDUCED_REP2,ERR2412421,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357074_1.fastq.gz,,,reverse +RAP1_UNINDUCED_REP2,xxxxxxxxxx,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357075_1.fastq.gz,,,reverse +RAP1_IAA_30M_REP1,None,,,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fasta/ERX5474930_ERR5766174_1.fa.gz,reverse diff --git a/tests/data/eidodata/peps/test_file_existing/project_config.yaml b/tests/data/eidodata/peps/test_file_existing/project_config.yaml new file mode 100644 index 00000000..23ebfee5 --- /dev/null +++ b/tests/data/eidodata/peps/test_file_existing/project_config.yaml @@ -0,0 +1,12 @@ +pep_version: "2.1.0" +sample_table: sample_table.csv +subsample_table: subsample_table.csv + + +sample_modifiers: + append: + local_files: LOCAL + derive: + attributes: [local_files] + sources: + LOCAL: "../data/{file_path}" diff --git a/tests/data/eidodata/peps/test_file_existing/sample_table.csv b/tests/data/eidodata/peps/test_file_existing/sample_table.csv new file mode 100644 index 00000000..1137443a --- /dev/null +++ b/tests/data/eidodata/peps/test_file_existing/sample_table.csv @@ -0,0 +1,5 @@ +sample_name,protocol,identifier +frog_1,anySampleType,frog1 +frog_2,anySampleType,frog2 +frog_3,anySampleType,frog3 +frog_4,anySampleType,frog4 diff --git a/tests/data/eidodata/peps/test_file_existing/subsample_table.csv b/tests/data/eidodata/peps/test_file_existing/subsample_table.csv new file mode 100644 index 00000000..1d4f9553 --- /dev/null +++ b/tests/data/eidodata/peps/test_file_existing/subsample_table.csv @@ -0,0 +1,6 @@ +sample_name,file_path,subsample_name +frog_1,file/a.txt,a +frog_1,file/b.txt,b +frog_1,file/c.txt,c +frog_2,file/a.txt,a +frog_2,file/b.txt,b diff --git a/tests/data/eidodata/peps/test_pep/test_cfg.yaml b/tests/data/eidodata/peps/test_pep/test_cfg.yaml new file mode 100644 index 00000000..32f028d7 --- /dev/null +++ b/tests/data/eidodata/peps/test_pep/test_cfg.yaml @@ -0,0 +1,10 @@ +name: test +pep_version: 2.0.0 +sample_table: test_sample_table.csv + +sample_modifiers: + imply: + - if: + organism: "Homo sapiens" + then: + genome: hg38 diff --git a/tests/data/eidodata/peps/test_pep/test_sample_table.csv b/tests/data/eidodata/peps/test_pep/test_sample_table.csv new file mode 100644 index 00000000..d2881012 --- /dev/null +++ b/tests/data/eidodata/peps/test_pep/test_sample_table.csv @@ -0,0 +1,3 @@ +sample_name,protocol,genome +GSM1558746,GRO,hg38 +GSM1480327,PRO,hg38 diff --git a/tests/data/eidodata/peps/value_check_pep/project_config.yaml b/tests/data/eidodata/peps/value_check_pep/project_config.yaml new file mode 100644 index 00000000..66c4380c --- /dev/null +++ b/tests/data/eidodata/peps/value_check_pep/project_config.yaml @@ -0,0 +1,6 @@ +description: None +name: encode_prj +pep_version: 2.0.0 +project_name: value_check_pep +sample_table: sample_table.csv +subsample_table: [] diff --git a/tests/data/eidodata/peps/value_check_pep/sample_table.csv b/tests/data/eidodata/peps/value_check_pep/sample_table.csv new file mode 100644 index 00000000..cefc2aa3 --- /dev/null +++ b/tests/data/eidodata/peps/value_check_pep/sample_table.csv @@ -0,0 +1,7 @@ +sample_name,file_name,genome,assay,cell_line,target,format_type +encode_4,ENCFF452DAM.bed.gz,hg38,Histone ChIP-seq,skeletal muscle myoblast,H3K36me3,narrowPeak +encode_20,ENCFF121AXG.bed.gz,hg38,DNase-seq,RPMI7951,,tssPeak +encode_21,ENCFF710ECJ.bed.gz,hg38,DNase-seq,RPMI7951,,broadPeak +encode_22,ENCFF945FZN.bed.gz,hg38,DNase-seq,RPMI7951,,narrowPeak +encode_23,ENCFF322PQO.bed.gz,hg38,DNase-seq,RPMI7951,,tssPeak +encode_24,ENCFF322PQO.bed.gz,hg38,DNase-seq,RPMI7951,,tssPeak1 diff --git a/tests/data/eidodata/schemas/schema_test_file_exist.yaml b/tests/data/eidodata/schemas/schema_test_file_exist.yaml new file mode 100644 index 00000000..e1814b8d --- /dev/null +++ b/tests/data/eidodata/schemas/schema_test_file_exist.yaml @@ -0,0 +1,35 @@ +description: test existing files in subsamples + +properties: + dcc: + type: object + properties: + compute_packages: + type: object + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + protocol: + type: string + local_files: + anyOf: + - type: string + - type: array + items: + type: string + sizing: + - local_files + + tangible: + - local_files + + required: + - sample_name + - local_files + +required: + - samples diff --git a/tests/data/eidodata/schemas/test_schema.yaml b/tests/data/eidodata/schemas/test_schema.yaml new file mode 100644 index 00000000..13cdd1d6 --- /dev/null +++ b/tests/data/eidodata/schemas/test_schema.yaml @@ -0,0 +1,22 @@ +description: test PEP schema + +properties: + dcc: + type: object + properties: + compute_packages: + type: object + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + protocol: + type: string + genome: + type: string + +required: + - samples diff --git a/tests/data/eidodata/schemas/test_schema_imports.yaml b/tests/data/eidodata/schemas/test_schema_imports.yaml new file mode 100644 index 00000000..e48db5df --- /dev/null +++ b/tests/data/eidodata/schemas/test_schema_imports.yaml @@ -0,0 +1,17 @@ +imports: + - http://schema.databio.org/pep/2.0.0.yaml +description: Schema for a more restrictive PEP +properties: + samples: + type: array + items: + type: object + properties: + my_numeric_attribute: + type: integer + minimum: 0 + maximum: 1 + required: + - my_numeric_attribute +required: + - samples diff --git a/tests/data/eidodata/schemas/test_schema_imports_rel_path.yaml b/tests/data/eidodata/schemas/test_schema_imports_rel_path.yaml new file mode 100644 index 00000000..527fb341 --- /dev/null +++ b/tests/data/eidodata/schemas/test_schema_imports_rel_path.yaml @@ -0,0 +1,13 @@ +description: "PEP validation schema for this particular pipeline." +version: "2.0.0" +imports: + - "../common/schemas/common_pep_validation.yaml" +properties: + samples: + items: + properties: + patient: + type: string + pattern: "\\S+" + description: >- + Unique identifier of the patient a sample has been taken from. \ No newline at end of file diff --git a/tests/data/eidodata/schemas/test_schema_invalid.yaml b/tests/data/eidodata/schemas/test_schema_invalid.yaml new file mode 100644 index 00000000..715e8243 --- /dev/null +++ b/tests/data/eidodata/schemas/test_schema_invalid.yaml @@ -0,0 +1,25 @@ +description: test PEP schema + +properties: + dcc: + type: object + properties: + compute_packages: + type: object + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + protocol: + type: string + genome: + type: string + invalid: + type: string + +required: + - samples + - invalid diff --git a/tests/data/eidodata/schemas/test_schema_invalid_with_type.yaml b/tests/data/eidodata/schemas/test_schema_invalid_with_type.yaml new file mode 100644 index 00000000..9815bdae --- /dev/null +++ b/tests/data/eidodata/schemas/test_schema_invalid_with_type.yaml @@ -0,0 +1,25 @@ +description: test PEP schema +type: object +properties: + dcc: + type: object + properties: + compute_packages: + type: object + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + protocol: + type: string + genome: + type: string + invalid: + type: string + +required: + - samples + - invalid diff --git a/tests/data/eidodata/schemas/test_schema_sample_invalid.yaml b/tests/data/eidodata/schemas/test_schema_sample_invalid.yaml new file mode 100644 index 00000000..7b429c1e --- /dev/null +++ b/tests/data/eidodata/schemas/test_schema_sample_invalid.yaml @@ -0,0 +1,26 @@ +description: test PEP schema + +properties: + dcc: + type: object + properties: + compute_packages: + type: object + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + protocol: + type: string + genome: + type: string + newattr: + type: string + required: + - newattr + +required: + - samples diff --git a/tests/data/eidodata/schemas/test_schema_samples.yaml b/tests/data/eidodata/schemas/test_schema_samples.yaml new file mode 100644 index 00000000..13cdd1d6 --- /dev/null +++ b/tests/data/eidodata/schemas/test_schema_samples.yaml @@ -0,0 +1,22 @@ +description: test PEP schema + +properties: + dcc: + type: object + properties: + compute_packages: + type: object + samples: + type: array + items: + type: object + properties: + sample_name: + type: string + protocol: + type: string + genome: + type: string + +required: + - samples diff --git a/tests/data/eidodata/schemas/value_check_schema.yaml b/tests/data/eidodata/schemas/value_check_schema.yaml new file mode 100644 index 00000000..fb2352dc --- /dev/null +++ b/tests/data/eidodata/schemas/value_check_schema.yaml @@ -0,0 +1,16 @@ +description: bedboss run-all pep schema +properties: + samples: + items: + properties: + format_type: + description: whether the regions are narrow (transcription factor implies + narrow, histone mark implies broad peaks) + enum: + - narrowPeak + - broadPeak + type: string + type: object + type: array +required: +- samples diff --git a/tests/data/example_peps-master/.gitignore b/tests/data/peppydata/example_peps-master/.gitignore similarity index 100% rename from tests/data/example_peps-master/.gitignore rename to tests/data/peppydata/example_peps-master/.gitignore diff --git a/tests/data/example_peps-master/.pre-commit-config.yaml b/tests/data/peppydata/example_peps-master/.pre-commit-config.yaml similarity index 100% rename from tests/data/example_peps-master/.pre-commit-config.yaml rename to tests/data/peppydata/example_peps-master/.pre-commit-config.yaml diff --git a/tests/data/example_peps-master/README.md b/tests/data/peppydata/example_peps-master/README.md similarity index 100% rename from tests/data/example_peps-master/README.md rename to tests/data/peppydata/example_peps-master/README.md diff --git a/tests/data/example_peps-master/data/frog1_data.txt b/tests/data/peppydata/example_peps-master/data/frog1_data.txt similarity index 100% rename from tests/data/example_peps-master/data/frog1_data.txt rename to tests/data/peppydata/example_peps-master/data/frog1_data.txt diff --git a/tests/data/example_peps-master/data/frog1a_data.txt b/tests/data/peppydata/example_peps-master/data/frog1a_data.txt similarity index 100% rename from tests/data/example_peps-master/data/frog1a_data.txt rename to tests/data/peppydata/example_peps-master/data/frog1a_data.txt diff --git a/tests/data/example_peps-master/data/frog1a_data2.txt b/tests/data/peppydata/example_peps-master/data/frog1a_data2.txt similarity index 100% rename from tests/data/example_peps-master/data/frog1a_data2.txt rename to tests/data/peppydata/example_peps-master/data/frog1a_data2.txt diff --git a/tests/data/example_peps-master/data/frog1b_data.txt b/tests/data/peppydata/example_peps-master/data/frog1b_data.txt similarity index 100% rename from tests/data/example_peps-master/data/frog1b_data.txt rename to tests/data/peppydata/example_peps-master/data/frog1b_data.txt diff --git a/tests/data/example_peps-master/data/frog1b_data2.txt b/tests/data/peppydata/example_peps-master/data/frog1b_data2.txt similarity index 100% rename from tests/data/example_peps-master/data/frog1b_data2.txt rename to tests/data/peppydata/example_peps-master/data/frog1b_data2.txt diff --git a/tests/data/example_peps-master/data/frog1c_data.txt b/tests/data/peppydata/example_peps-master/data/frog1c_data.txt similarity index 100% rename from tests/data/example_peps-master/data/frog1c_data.txt rename to tests/data/peppydata/example_peps-master/data/frog1c_data.txt diff --git a/tests/data/example_peps-master/data/frog1c_data2.txt b/tests/data/peppydata/example_peps-master/data/frog1c_data2.txt similarity index 100% rename from tests/data/example_peps-master/data/frog1c_data2.txt rename to tests/data/peppydata/example_peps-master/data/frog1c_data2.txt diff --git a/tests/data/example_peps-master/data/frog2_data.txt b/tests/data/peppydata/example_peps-master/data/frog2_data.txt similarity index 100% rename from tests/data/example_peps-master/data/frog2_data.txt rename to tests/data/peppydata/example_peps-master/data/frog2_data.txt diff --git a/tests/data/example_peps-master/data/frog2a_data.txt b/tests/data/peppydata/example_peps-master/data/frog2a_data.txt similarity index 100% rename from tests/data/example_peps-master/data/frog2a_data.txt rename to tests/data/peppydata/example_peps-master/data/frog2a_data.txt diff --git a/tests/data/example_peps-master/data/frog2b_data.txt b/tests/data/peppydata/example_peps-master/data/frog2b_data.txt similarity index 100% rename from tests/data/example_peps-master/data/frog2b_data.txt rename to tests/data/peppydata/example_peps-master/data/frog2b_data.txt diff --git a/tests/data/example_peps-master/data/frog3_data.txt b/tests/data/peppydata/example_peps-master/data/frog3_data.txt similarity index 100% rename from tests/data/example_peps-master/data/frog3_data.txt rename to tests/data/peppydata/example_peps-master/data/frog3_data.txt diff --git a/tests/data/example_peps-master/data/frog4_data.txt b/tests/data/peppydata/example_peps-master/data/frog4_data.txt similarity index 100% rename from tests/data/example_peps-master/data/frog4_data.txt rename to tests/data/peppydata/example_peps-master/data/frog4_data.txt diff --git a/tests/data/example_peps-master/example_BiocProject/data/laminB1Lads.bed b/tests/data/peppydata/example_peps-master/example_BiocProject/data/laminB1Lads.bed similarity index 100% rename from tests/data/example_peps-master/example_BiocProject/data/laminB1Lads.bed rename to tests/data/peppydata/example_peps-master/example_BiocProject/data/laminB1Lads.bed diff --git a/tests/data/example_peps-master/example_BiocProject/data/vistaEnhancers.bed b/tests/data/peppydata/example_peps-master/example_BiocProject/data/vistaEnhancers.bed similarity index 100% rename from tests/data/example_peps-master/example_BiocProject/data/vistaEnhancers.bed rename to tests/data/peppydata/example_peps-master/example_BiocProject/data/vistaEnhancers.bed diff --git a/tests/data/example_peps-master/example_BiocProject/project_config.yaml b/tests/data/peppydata/example_peps-master/example_BiocProject/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_BiocProject/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_BiocProject/project_config.yaml diff --git a/tests/data/example_peps-master/example_BiocProject/project_config_resize.yaml b/tests/data/peppydata/example_peps-master/example_BiocProject/project_config_resize.yaml similarity index 100% rename from tests/data/example_peps-master/example_BiocProject/project_config_resize.yaml rename to tests/data/peppydata/example_peps-master/example_BiocProject/project_config_resize.yaml diff --git a/tests/data/example_peps-master/example_BiocProject/readBedFiles.R b/tests/data/peppydata/example_peps-master/example_BiocProject/readBedFiles.R similarity index 100% rename from tests/data/example_peps-master/example_BiocProject/readBedFiles.R rename to tests/data/peppydata/example_peps-master/example_BiocProject/readBedFiles.R diff --git a/tests/data/example_peps-master/example_BiocProject/readBedFiles_resize.R b/tests/data/peppydata/example_peps-master/example_BiocProject/readBedFiles_resize.R similarity index 100% rename from tests/data/example_peps-master/example_BiocProject/readBedFiles_resize.R rename to tests/data/peppydata/example_peps-master/example_BiocProject/readBedFiles_resize.R diff --git a/tests/data/example_peps-master/example_BiocProject/sample_table.csv b/tests/data/peppydata/example_peps-master/example_BiocProject/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_BiocProject/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_BiocProject/sample_table.csv diff --git a/tests/data/example_peps-master/example_BiocProject_exceptions/project_config.yaml b/tests/data/peppydata/example_peps-master/example_BiocProject_exceptions/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_BiocProject_exceptions/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_BiocProject_exceptions/project_config.yaml diff --git a/tests/data/example_peps-master/example_BiocProject_exceptions/readBedFilesExceptions.R b/tests/data/peppydata/example_peps-master/example_BiocProject_exceptions/readBedFilesExceptions.R similarity index 100% rename from tests/data/example_peps-master/example_BiocProject_exceptions/readBedFilesExceptions.R rename to tests/data/peppydata/example_peps-master/example_BiocProject_exceptions/readBedFilesExceptions.R diff --git a/tests/data/example_peps-master/example_BiocProject_exceptions/sample_table.csv b/tests/data/peppydata/example_peps-master/example_BiocProject_exceptions/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_BiocProject_exceptions/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_BiocProject_exceptions/sample_table.csv diff --git a/tests/data/example_peps-master/example_BiocProject_remote/project_config.yaml b/tests/data/peppydata/example_peps-master/example_BiocProject_remote/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_BiocProject_remote/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_BiocProject_remote/project_config.yaml diff --git a/tests/data/example_peps-master/example_BiocProject_remote/project_config_resize.yaml b/tests/data/peppydata/example_peps-master/example_BiocProject_remote/project_config_resize.yaml similarity index 100% rename from tests/data/example_peps-master/example_BiocProject_remote/project_config_resize.yaml rename to tests/data/peppydata/example_peps-master/example_BiocProject_remote/project_config_resize.yaml diff --git a/tests/data/example_peps-master/example_BiocProject_remote/readRemoteData.R b/tests/data/peppydata/example_peps-master/example_BiocProject_remote/readRemoteData.R similarity index 100% rename from tests/data/example_peps-master/example_BiocProject_remote/readRemoteData.R rename to tests/data/peppydata/example_peps-master/example_BiocProject_remote/readRemoteData.R diff --git a/tests/data/example_peps-master/example_BiocProject_remote/readRemoteData_resize.R b/tests/data/peppydata/example_peps-master/example_BiocProject_remote/readRemoteData_resize.R similarity index 100% rename from tests/data/example_peps-master/example_BiocProject_remote/readRemoteData_resize.R rename to tests/data/peppydata/example_peps-master/example_BiocProject_remote/readRemoteData_resize.R diff --git a/tests/data/example_peps-master/example_BiocProject_remote/sample_table.csv b/tests/data/peppydata/example_peps-master/example_BiocProject_remote/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_BiocProject_remote/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_BiocProject_remote/sample_table.csv diff --git a/tests/data/example_peps-master/example_amendments1/project_config.yaml b/tests/data/peppydata/example_peps-master/example_amendments1/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_amendments1/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_amendments1/project_config.yaml diff --git a/tests/data/example_peps-master/example_amendments1/sample_table.csv b/tests/data/peppydata/example_peps-master/example_amendments1/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_amendments1/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_amendments1/sample_table.csv diff --git a/tests/data/example_peps-master/example_amendments1/sample_table_newLib.csv b/tests/data/peppydata/example_peps-master/example_amendments1/sample_table_newLib.csv similarity index 100% rename from tests/data/example_peps-master/example_amendments1/sample_table_newLib.csv rename to tests/data/peppydata/example_peps-master/example_amendments1/sample_table_newLib.csv diff --git a/tests/data/example_peps-master/example_amendments1/sample_table_newLib2.csv b/tests/data/peppydata/example_peps-master/example_amendments1/sample_table_newLib2.csv similarity index 100% rename from tests/data/example_peps-master/example_amendments1/sample_table_newLib2.csv rename to tests/data/peppydata/example_peps-master/example_amendments1/sample_table_newLib2.csv diff --git a/tests/data/example_peps-master/example_amendments1/sample_table_pre.csv b/tests/data/peppydata/example_peps-master/example_amendments1/sample_table_pre.csv similarity index 100% rename from tests/data/example_peps-master/example_amendments1/sample_table_pre.csv rename to tests/data/peppydata/example_peps-master/example_amendments1/sample_table_pre.csv diff --git a/tests/data/peppydata/example_peps-master/example_amendments1/win_project_config.yaml b/tests/data/peppydata/example_peps-master/example_amendments1/win_project_config.yaml new file mode 100644 index 00000000..03347a84 --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_amendments1/win_project_config.yaml @@ -0,0 +1,16 @@ +pep_version: "2.0.0" +sample_table: sample_table.csv +output_dir: "%USERPROFILE%/hello_looper_results" + +sample_modifiers: + derive: + attributes: [file_path] + sources: + source1: /data/lab/project/{organism}_{time}h.fastq + source2: /path/from/collaborator/weirdNamingScheme_{external_id}.fastq +project_modifiers: + amend: + newLib: + sample_table: sample_table_newLib.csv + newLib2: + sample_table: sample_table_newLib2.csv diff --git a/tests/data/example_peps-master/example_amendments2/project_config.yaml b/tests/data/peppydata/example_peps-master/example_amendments2/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_amendments2/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_amendments2/project_config.yaml diff --git a/tests/data/example_peps-master/example_amendments2/sample_table.csv b/tests/data/peppydata/example_peps-master/example_amendments2/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_amendments2/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_amendments2/sample_table.csv diff --git a/tests/data/example_peps-master/example_amendments2/sample_table_noFrog.csv b/tests/data/peppydata/example_peps-master/example_amendments2/sample_table_noFrog.csv similarity index 100% rename from tests/data/example_peps-master/example_amendments2/sample_table_noFrog.csv rename to tests/data/peppydata/example_peps-master/example_amendments2/sample_table_noFrog.csv diff --git a/tests/data/example_peps-master/example_amendments2/sample_table_pre.csv b/tests/data/peppydata/example_peps-master/example_amendments2/sample_table_pre.csv similarity index 100% rename from tests/data/example_peps-master/example_amendments2/sample_table_pre.csv rename to tests/data/peppydata/example_peps-master/example_amendments2/sample_table_pre.csv diff --git a/tests/data/example_peps-master/example_append/project_config.yaml b/tests/data/peppydata/example_peps-master/example_append/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_append/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_append/project_config.yaml diff --git a/tests/data/example_peps-master/example_append/sample_table.csv b/tests/data/peppydata/example_peps-master/example_append/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_append/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_append/sample_table.csv diff --git a/tests/data/example_peps-master/example_append/sample_table_pre.csv b/tests/data/peppydata/example_peps-master/example_append/sample_table_pre.csv similarity index 100% rename from tests/data/example_peps-master/example_append/sample_table_pre.csv rename to tests/data/peppydata/example_peps-master/example_append/sample_table_pre.csv diff --git a/tests/data/example_peps-master/example_automerge/project_config.yaml b/tests/data/peppydata/example_peps-master/example_automerge/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_automerge/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_automerge/project_config.yaml diff --git a/tests/data/example_peps-master/example_automerge/sample_table.csv b/tests/data/peppydata/example_peps-master/example_automerge/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_automerge/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_automerge/sample_table.csv diff --git a/tests/data/example_peps-master/example_basic/project_config.yaml b/tests/data/peppydata/example_peps-master/example_basic/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_basic/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_basic/project_config.yaml diff --git a/tests/data/example_peps-master/example_basic/sample_table.csv b/tests/data/peppydata/example_peps-master/example_basic/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_basic/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_basic/sample_table.csv diff --git a/tests/data/example_peps-master/example_basic_sample_yaml/sample.yaml b/tests/data/peppydata/example_peps-master/example_basic_sample_yaml/sample.yaml similarity index 100% rename from tests/data/example_peps-master/example_basic_sample_yaml/sample.yaml rename to tests/data/peppydata/example_peps-master/example_basic_sample_yaml/sample.yaml diff --git a/tests/data/example_peps-master/example_custom_index/project_config.yaml b/tests/data/peppydata/example_peps-master/example_custom_index/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_custom_index/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_custom_index/project_config.yaml diff --git a/tests/data/example_peps-master/example_custom_index/sample_table.csv b/tests/data/peppydata/example_peps-master/example_custom_index/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_custom_index/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_custom_index/sample_table.csv diff --git a/tests/data/example_peps-master/example_derive/project_config.yaml b/tests/data/peppydata/example_peps-master/example_derive/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_derive/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_derive/project_config.yaml diff --git a/tests/data/example_peps-master/example_derive/sample_table.csv b/tests/data/peppydata/example_peps-master/example_derive/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_derive/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_derive/sample_table.csv diff --git a/tests/data/example_peps-master/example_derive/sample_table_pre.csv b/tests/data/peppydata/example_peps-master/example_derive/sample_table_pre.csv similarity index 100% rename from tests/data/example_peps-master/example_derive/sample_table_pre.csv rename to tests/data/peppydata/example_peps-master/example_derive/sample_table_pre.csv diff --git a/tests/data/example_peps-master/example_derive_imply/project_config.yaml b/tests/data/peppydata/example_peps-master/example_derive_imply/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_derive_imply/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_derive_imply/project_config.yaml diff --git a/tests/data/example_peps-master/example_derive_imply/sample_table.csv b/tests/data/peppydata/example_peps-master/example_derive_imply/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_derive_imply/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_derive_imply/sample_table.csv diff --git a/tests/data/example_peps-master/example_derive_imply/sample_table_pre.csv b/tests/data/peppydata/example_peps-master/example_derive_imply/sample_table_pre.csv similarity index 100% rename from tests/data/example_peps-master/example_derive_imply/sample_table_pre.csv rename to tests/data/peppydata/example_peps-master/example_derive_imply/sample_table_pre.csv diff --git a/tests/data/example_peps-master/example_duplicate/project_config.yaml b/tests/data/peppydata/example_peps-master/example_duplicate/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_duplicate/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_duplicate/project_config.yaml diff --git a/tests/data/example_peps-master/example_duplicate/sample_table.csv b/tests/data/peppydata/example_peps-master/example_duplicate/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_duplicate/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_duplicate/sample_table.csv diff --git a/tests/data/example_peps-master/example_imply/project_config.yaml b/tests/data/peppydata/example_peps-master/example_imply/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_imply/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_imply/project_config.yaml diff --git a/tests/data/example_peps-master/example_imply/sample_table.csv b/tests/data/peppydata/example_peps-master/example_imply/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_imply/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_imply/sample_table.csv diff --git a/tests/data/example_peps-master/example_imply/sample_table_pre.csv b/tests/data/peppydata/example_peps-master/example_imply/sample_table_pre.csv similarity index 100% rename from tests/data/example_peps-master/example_imply/sample_table_pre.csv rename to tests/data/peppydata/example_peps-master/example_imply/sample_table_pre.csv diff --git a/tests/data/example_peps-master/example_imports/project_config.yaml b/tests/data/peppydata/example_peps-master/example_imports/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_imports/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_imports/project_config.yaml diff --git a/tests/data/example_peps-master/example_imports/project_config1.yaml b/tests/data/peppydata/example_peps-master/example_imports/project_config1.yaml similarity index 100% rename from tests/data/example_peps-master/example_imports/project_config1.yaml rename to tests/data/peppydata/example_peps-master/example_imports/project_config1.yaml diff --git a/tests/data/example_peps-master/example_imports/sample_table.csv b/tests/data/peppydata/example_peps-master/example_imports/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_imports/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_imports/sample_table.csv diff --git a/tests/data/example_peps-master/example_incorrect_index/project_config.yaml b/tests/data/peppydata/example_peps-master/example_incorrect_index/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_incorrect_index/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_incorrect_index/project_config.yaml diff --git a/tests/data/example_peps-master/example_incorrect_index/sample_table.csv b/tests/data/peppydata/example_peps-master/example_incorrect_index/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_incorrect_index/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_incorrect_index/sample_table.csv diff --git a/tests/data/peppydata/example_peps-master/example_issue499/project_config.yaml b/tests/data/peppydata/example_peps-master/example_issue499/project_config.yaml new file mode 100644 index 00000000..ee846cb7 --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_issue499/project_config.yaml @@ -0,0 +1,10 @@ +pep_version: "2.0.0" +sample_table: sample_table.csv +output_dir: "$HOME/hello_looper_results" + +sample_modifiers: + derive: + attributes: [file_path] + sources: + source1: $PROJECT/{organism}_{time}h.fastq + source2: $COLLABORATOR/weirdNamingScheme_{external_id}.fastq diff --git a/tests/data/example_peps-master/example_remove/sample_table.csv b/tests/data/peppydata/example_peps-master/example_issue499/sample_table.csv old mode 100644 new mode 100755 similarity index 100% rename from tests/data/example_peps-master/example_remove/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_issue499/sample_table.csv diff --git a/tests/data/peppydata/example_peps-master/example_issue499/sample_table_pre.csv b/tests/data/peppydata/example_peps-master/example_issue499/sample_table_pre.csv new file mode 100755 index 00000000..159fc341 --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_issue499/sample_table_pre.csv @@ -0,0 +1,5 @@ +sample_name,protocol,organism,time,file_path +pig_0h,RRBS,pig,0,data/lab/project/pig_0h.fastq +pig_1h,RRBS,pig,1,data/lab/project/pig_1h.fastq +frog_0h,RRBS,frog,0,data/lab/project/frog_0h.fastq +frog_1h,RRBS,frog,1,data/lab/project/frog_1h.fastq diff --git a/tests/data/example_peps-master/example_missing_version/project_config.yaml b/tests/data/peppydata/example_peps-master/example_missing_version/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_missing_version/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_missing_version/project_config.yaml diff --git a/tests/data/example_peps-master/example_missing_version/sample_table.csv b/tests/data/peppydata/example_peps-master/example_missing_version/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_missing_version/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_missing_version/sample_table.csv diff --git a/tests/data/example_peps-master/example_multiple_subsamples/project_config.yaml b/tests/data/peppydata/example_peps-master/example_multiple_subsamples/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_multiple_subsamples/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_multiple_subsamples/project_config.yaml diff --git a/tests/data/peppydata/example_peps-master/example_multiple_subsamples/sample_table.csv b/tests/data/peppydata/example_peps-master/example_multiple_subsamples/sample_table.csv new file mode 100644 index 00000000..7c06204c --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_multiple_subsamples/sample_table.csv @@ -0,0 +1,5 @@ +sample_id,protocol,identifier +frog_1,anySampleType,frog1 +frog_2,anySampleType,frog2 +frog_3,anySampleType,frog3 +frog_4,anySampleType,frog4 diff --git a/tests/data/peppydata/example_peps-master/example_multiple_subsamples/subsample_table1.csv b/tests/data/peppydata/example_peps-master/example_multiple_subsamples/subsample_table1.csv new file mode 100644 index 00000000..f1b3c2f1 --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_multiple_subsamples/subsample_table1.csv @@ -0,0 +1,6 @@ +sample_id,file_path,subsample_name +frog_1,file/a.txt,a +frog_1,file/b.txt,b +frog_1,file/c.txt,c +frog_2,file/a.txt,a +frog_2,file/b.txt,b diff --git a/tests/data/peppydata/example_peps-master/example_multiple_subsamples/subsample_table2.csv b/tests/data/peppydata/example_peps-master/example_multiple_subsamples/subsample_table2.csv new file mode 100644 index 00000000..5e6d2981 --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_multiple_subsamples/subsample_table2.csv @@ -0,0 +1,6 @@ +sample_id,random_string,subsample_name +frog_1,x_x,x +frog_1,y_y,y +frog_1,z_z,z +frog_2,xy_yx,xy +frog_2,xx_xx,xx diff --git a/tests/data/peppydata/example_peps-master/example_nextflow_config/project_config.yaml b/tests/data/peppydata/example_peps-master/example_nextflow_config/project_config.yaml new file mode 100644 index 00000000..51cc3784 --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_nextflow_config/project_config.yaml @@ -0,0 +1,3 @@ +pep_version: "2.1.0" +sample_table: "samplesheet.csv" + diff --git a/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet.csv b/tests/data/peppydata/example_peps-master/example_nextflow_config/samplesheet.csv similarity index 100% rename from tests/data/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet.csv rename to tests/data/peppydata/example_peps-master/example_nextflow_config/samplesheet.csv diff --git a/tests/data/example_peps-master/example_nextflow_samplesheet/sample_table.csv b/tests/data/peppydata/example_peps-master/example_nextflow_samplesheet/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_nextflow_samplesheet/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_nextflow_samplesheet/sample_table.csv diff --git a/tests/data/example_peps-master/example_nextflow_subsamples/project_config.yaml b/tests/data/peppydata/example_peps-master/example_nextflow_subsamples/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_nextflow_subsamples/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_nextflow_subsamples/project_config.yaml diff --git a/tests/data/peppydata/example_peps-master/example_nextflow_subsamples/samplesheet.csv b/tests/data/peppydata/example_peps-master/example_nextflow_subsamples/samplesheet.csv new file mode 100644 index 00000000..a26933ef --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_nextflow_subsamples/samplesheet.csv @@ -0,0 +1,6 @@ +sample,strandedness +WT_REP1,reverse +WT_REP2,reverse +RAP1_UNINDUCED_REP1,reverse +RAP1_UNINDUCED_REP2,reverse +RAP1_IAA_30M_REP1,reverse diff --git a/tests/data/peppydata/example_peps-master/example_nextflow_subsamples/subsamplesheet.csv b/tests/data/peppydata/example_peps-master/example_nextflow_subsamples/subsamplesheet.csv new file mode 100644 index 00000000..1e56c363 --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_nextflow_subsamples/subsamplesheet.csv @@ -0,0 +1,8 @@ +sample,instrument_platform,run_accession,fastq_1,fastq_2 +WT_REP1,ABI_SOLID,runaccession1,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357070_2.fastq.gz +WT_REP1,BGISEQ,runaccession2,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357071_2.fastq.gz +WT_REP2,CAPILLARY,123123123,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357072_2.fastq.gz +RAP1_UNINDUCED_REP1,COMPLETE_GENOMICS,somerunaccesion,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357073_1.fastq.gz, +RAP1_UNINDUCED_REP2,DNBSEQ,ERR2412421,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357074_1.fastq.gz, +RAP1_UNINDUCED_REP2,HELICOS,xxxxxxxxxx,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357075_1.fastq.gz, +RAP1_IAA_30M_REP1,ILLUMINA,None,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/testdata/GSE110004/SRR6357076_2.fastq.gz diff --git a/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/config.yaml b/tests/data/peppydata/example_peps-master/example_nextflow_taxprofiler_pep/config.yaml similarity index 100% rename from tests/data/example_peps-master/example_nextflow_taxprofiler_pep/config.yaml rename to tests/data/peppydata/example_peps-master/example_nextflow_taxprofiler_pep/config.yaml diff --git a/tests/data/peppydata/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet.csv b/tests/data/peppydata/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet.csv new file mode 100644 index 00000000..1b17b767 --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet.csv @@ -0,0 +1,7 @@ +sample,instrument_platform,run_accession,fastq_1,fastq_2,fasta +2611,ILLUMINA,ERR5766174,,,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fasta/ERX5474930_ERR5766174_1.fa.gz +2612,ILLUMINA,ERR5766176,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_2.fastq.gz, +2612,ILLUMINA,ERR5766176_B,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_B_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474932_ERR5766176_B_2.fastq.gz, +2612,ILLUMINA,ERR5766180,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474936_ERR5766180_1.fastq.gz,, +2613,ILLUMINA,ERR5766181,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474937_ERR5766181_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERX5474937_ERR5766181_2.fastq.gz, +ERR3201952,OXFORD_NANOPORE,ERR3201952,https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/data/fastq/ERR3201952.fastq.gz,, diff --git a/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet_schema.yaml b/tests/data/peppydata/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet_schema.yaml similarity index 100% rename from tests/data/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet_schema.yaml rename to tests/data/peppydata/example_peps-master/example_nextflow_taxprofiler_pep/samplesheet_schema.yaml diff --git a/tests/data/example_peps-master/example_nextflow_taxprofiler_pep/test_nextflow_original_samplesheet.csv b/tests/data/peppydata/example_peps-master/example_nextflow_taxprofiler_pep/test_nextflow_original_samplesheet.csv similarity index 100% rename from tests/data/example_peps-master/example_nextflow_taxprofiler_pep/test_nextflow_original_samplesheet.csv rename to tests/data/peppydata/example_peps-master/example_nextflow_taxprofiler_pep/test_nextflow_original_samplesheet.csv diff --git a/tests/data/example_peps-master/example_node_alias/README.md b/tests/data/peppydata/example_peps-master/example_node_alias/README.md similarity index 100% rename from tests/data/example_peps-master/example_node_alias/README.md rename to tests/data/peppydata/example_peps-master/example_node_alias/README.md diff --git a/tests/data/example_peps-master/example_node_alias/project_config.yaml b/tests/data/peppydata/example_peps-master/example_node_alias/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_node_alias/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_node_alias/project_config.yaml diff --git a/tests/data/example_peps-master/example_node_alias/project_config1.yaml b/tests/data/peppydata/example_peps-master/example_node_alias/project_config1.yaml similarity index 100% rename from tests/data/example_peps-master/example_node_alias/project_config1.yaml rename to tests/data/peppydata/example_peps-master/example_node_alias/project_config1.yaml diff --git a/tests/data/example_peps-master/example_node_alias/sample_table.csv b/tests/data/peppydata/example_peps-master/example_node_alias/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_node_alias/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_node_alias/sample_table.csv diff --git a/tests/data/example_peps-master/example_noname/project_config.yaml b/tests/data/peppydata/example_peps-master/example_noname/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_noname/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_noname/project_config.yaml diff --git a/tests/data/example_peps-master/example_noname/project_config_noname.yaml b/tests/data/peppydata/example_peps-master/example_noname/project_config_noname.yaml similarity index 100% rename from tests/data/example_peps-master/example_noname/project_config_noname.yaml rename to tests/data/peppydata/example_peps-master/example_noname/project_config_noname.yaml diff --git a/tests/data/example_peps-master/example_noname/sample_table.csv b/tests/data/peppydata/example_peps-master/example_noname/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_noname/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_noname/sample_table.csv diff --git a/tests/data/example_peps-master/example_old/project_config.yaml b/tests/data/peppydata/example_peps-master/example_old/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_old/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_old/project_config.yaml diff --git a/tests/data/example_peps-master/example_old/sample_table.csv b/tests/data/peppydata/example_peps-master/example_old/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_old/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_old/sample_table.csv diff --git a/tests/data/example_peps-master/example_piface/annotation_sheet.csv b/tests/data/peppydata/example_peps-master/example_piface/annotation_sheet.csv similarity index 100% rename from tests/data/example_peps-master/example_piface/annotation_sheet.csv rename to tests/data/peppydata/example_peps-master/example_piface/annotation_sheet.csv diff --git a/tests/data/example_peps-master/example_piface/output_schema.yaml b/tests/data/peppydata/example_peps-master/example_piface/output_schema.yaml similarity index 100% rename from tests/data/example_peps-master/example_piface/output_schema.yaml rename to tests/data/peppydata/example_peps-master/example_piface/output_schema.yaml diff --git a/tests/data/example_peps-master/example_piface/output_schema_project.yaml b/tests/data/peppydata/example_peps-master/example_piface/output_schema_project.yaml similarity index 100% rename from tests/data/example_peps-master/example_piface/output_schema_project.yaml rename to tests/data/peppydata/example_peps-master/example_piface/output_schema_project.yaml diff --git a/tests/data/example_peps-master/example_piface/output_schema_sample.yaml b/tests/data/peppydata/example_peps-master/example_piface/output_schema_sample.yaml similarity index 100% rename from tests/data/example_peps-master/example_piface/output_schema_sample.yaml rename to tests/data/peppydata/example_peps-master/example_piface/output_schema_sample.yaml diff --git a/tests/data/example_peps-master/example_piface/pipeline_interface1_project.yaml b/tests/data/peppydata/example_peps-master/example_piface/pipeline_interface1_project.yaml similarity index 100% rename from tests/data/example_peps-master/example_piface/pipeline_interface1_project.yaml rename to tests/data/peppydata/example_peps-master/example_piface/pipeline_interface1_project.yaml diff --git a/tests/data/example_peps-master/example_piface/pipeline_interface1_sample.yaml b/tests/data/peppydata/example_peps-master/example_piface/pipeline_interface1_sample.yaml similarity index 100% rename from tests/data/example_peps-master/example_piface/pipeline_interface1_sample.yaml rename to tests/data/peppydata/example_peps-master/example_piface/pipeline_interface1_sample.yaml diff --git a/tests/data/example_peps-master/example_piface/pipeline_interface2_project.yaml b/tests/data/peppydata/example_peps-master/example_piface/pipeline_interface2_project.yaml similarity index 100% rename from tests/data/example_peps-master/example_piface/pipeline_interface2_project.yaml rename to tests/data/peppydata/example_peps-master/example_piface/pipeline_interface2_project.yaml diff --git a/tests/data/example_peps-master/example_piface/pipeline_interface2_sample.yaml b/tests/data/peppydata/example_peps-master/example_piface/pipeline_interface2_sample.yaml similarity index 100% rename from tests/data/example_peps-master/example_piface/pipeline_interface2_sample.yaml rename to tests/data/peppydata/example_peps-master/example_piface/pipeline_interface2_sample.yaml diff --git a/tests/data/example_peps-master/example_piface/project_config.yaml b/tests/data/peppydata/example_peps-master/example_piface/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_piface/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_piface/project_config.yaml diff --git a/tests/data/example_peps-master/example_piface/readData.R b/tests/data/peppydata/example_peps-master/example_piface/readData.R similarity index 100% rename from tests/data/example_peps-master/example_piface/readData.R rename to tests/data/peppydata/example_peps-master/example_piface/readData.R diff --git a/tests/data/example_peps-master/example_project_as_dictionary/project.json b/tests/data/peppydata/example_peps-master/example_project_as_dictionary/project.json similarity index 100% rename from tests/data/example_peps-master/example_project_as_dictionary/project.json rename to tests/data/peppydata/example_peps-master/example_project_as_dictionary/project.json diff --git a/tests/data/example_peps-master/example_remove/project_config.yaml b/tests/data/peppydata/example_peps-master/example_remove/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_remove/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_remove/project_config.yaml diff --git a/tests/data/peppydata/example_peps-master/example_remove/sample_table.csv b/tests/data/peppydata/example_peps-master/example_remove/sample_table.csv new file mode 100644 index 00000000..bcfd9bde --- /dev/null +++ b/tests/data/peppydata/example_peps-master/example_remove/sample_table.csv @@ -0,0 +1,5 @@ +sample_name,protocol,organism,time,file_path +pig_0h,RRBS,pig,0,source1 +pig_1h,RRBS,pig,1,source1 +frog_0h,RRBS,frog,0,source1 +frog_1h,RRBS,frog,1,source1 diff --git a/tests/data/example_peps-master/example_subsamples_none/project_config.yaml b/tests/data/peppydata/example_peps-master/example_subsamples_none/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_subsamples_none/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_subsamples_none/project_config.yaml diff --git a/tests/data/example_peps-master/example_subsamples_none/sample_table.csv b/tests/data/peppydata/example_peps-master/example_subsamples_none/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subsamples_none/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_subsamples_none/sample_table.csv diff --git a/tests/data/example_peps-master/example_subtable1/project_config.yaml b/tests/data/peppydata/example_peps-master/example_subtable1/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_subtable1/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_subtable1/project_config.yaml diff --git a/tests/data/example_peps-master/example_subtable1/sample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable1/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable1/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable1/sample_table.csv diff --git a/tests/data/example_peps-master/example_subtable1/subsample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable1/subsample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable1/subsample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable1/subsample_table.csv diff --git a/tests/data/example_peps-master/example_subtable2/project_config.yaml b/tests/data/peppydata/example_peps-master/example_subtable2/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_subtable2/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_subtable2/project_config.yaml diff --git a/tests/data/example_peps-master/example_subtable2/sample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable2/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable2/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable2/sample_table.csv diff --git a/tests/data/example_peps-master/example_subtable2/subsample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable2/subsample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable2/subsample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable2/subsample_table.csv diff --git a/tests/data/example_peps-master/example_subtable3/project_config.yaml b/tests/data/peppydata/example_peps-master/example_subtable3/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_subtable3/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_subtable3/project_config.yaml diff --git a/tests/data/example_peps-master/example_subtable3/sample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable3/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable3/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable3/sample_table.csv diff --git a/tests/data/example_peps-master/example_subtable3/subsample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable3/subsample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable3/subsample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable3/subsample_table.csv diff --git a/tests/data/example_peps-master/example_subtable4/project_config.yaml b/tests/data/peppydata/example_peps-master/example_subtable4/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_subtable4/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_subtable4/project_config.yaml diff --git a/tests/data/example_peps-master/example_subtable4/sample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable4/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable4/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable4/sample_table.csv diff --git a/tests/data/example_peps-master/example_subtable4/subsample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable4/subsample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable4/subsample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable4/subsample_table.csv diff --git a/tests/data/example_peps-master/example_subtable5/project_config.yaml b/tests/data/peppydata/example_peps-master/example_subtable5/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_subtable5/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_subtable5/project_config.yaml diff --git a/tests/data/example_peps-master/example_subtable5/sample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable5/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable5/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable5/sample_table.csv diff --git a/tests/data/example_peps-master/example_subtable5/subsample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable5/subsample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable5/subsample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable5/subsample_table.csv diff --git a/tests/data/example_peps-master/example_subtable_automerge/project_config.yaml b/tests/data/peppydata/example_peps-master/example_subtable_automerge/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_subtable_automerge/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_subtable_automerge/project_config.yaml diff --git a/tests/data/example_peps-master/example_subtable_automerge/sample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable_automerge/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable_automerge/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable_automerge/sample_table.csv diff --git a/tests/data/example_peps-master/example_subtable_automerge/subsample_table.csv b/tests/data/peppydata/example_peps-master/example_subtable_automerge/subsample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtable_automerge/subsample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtable_automerge/subsample_table.csv diff --git a/tests/data/example_peps-master/example_subtables/project_config.yaml b/tests/data/peppydata/example_peps-master/example_subtables/project_config.yaml similarity index 100% rename from tests/data/example_peps-master/example_subtables/project_config.yaml rename to tests/data/peppydata/example_peps-master/example_subtables/project_config.yaml diff --git a/tests/data/example_peps-master/example_subtables/sample_table.csv b/tests/data/peppydata/example_peps-master/example_subtables/sample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtables/sample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtables/sample_table.csv diff --git a/tests/data/example_peps-master/example_subtables/subsample_table.csv b/tests/data/peppydata/example_peps-master/example_subtables/subsample_table.csv similarity index 100% rename from tests/data/example_peps-master/example_subtables/subsample_table.csv rename to tests/data/peppydata/example_peps-master/example_subtables/subsample_table.csv diff --git a/tests/data/example_peps-master/example_subtables/subsample_table1.csv b/tests/data/peppydata/example_peps-master/example_subtables/subsample_table1.csv similarity index 100% rename from tests/data/example_peps-master/example_subtables/subsample_table1.csv rename to tests/data/peppydata/example_peps-master/example_subtables/subsample_table1.csv diff --git a/tests/data/example_peps-master/output/results_pipeline/sample1/other_pipeline1/sample1_GSM2471255_1.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample1/other_pipeline1/sample1_GSM2471255_1.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample1/other_pipeline1/sample1_GSM2471255_1.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample1/other_pipeline1/sample1_GSM2471255_1.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample1/other_pipeline1/sample1_GSM2471255_2.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample1/other_pipeline1/sample1_GSM2471255_2.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample1/other_pipeline1/sample1_GSM2471255_2.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample1/other_pipeline1/sample1_GSM2471255_2.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample1/pipeline1/sample1_GSM2471255_1.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample1/pipeline1/sample1_GSM2471255_1.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample1/pipeline1/sample1_GSM2471255_1.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample1/pipeline1/sample1_GSM2471255_1.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample1/pipeline1/sample1_GSM2471255_2.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample1/pipeline1/sample1_GSM2471255_2.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample1/pipeline1/sample1_GSM2471255_2.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample1/pipeline1/sample1_GSM2471255_2.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample2/other_pipeline1/sample2_GSM2471300_1.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample2/other_pipeline1/sample2_GSM2471300_1.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample2/other_pipeline1/sample2_GSM2471300_1.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample2/other_pipeline1/sample2_GSM2471300_1.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample2/other_pipeline1/sample2_GSM2471300_2.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample2/other_pipeline1/sample2_GSM2471300_2.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample2/other_pipeline1/sample2_GSM2471300_2.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample2/other_pipeline1/sample2_GSM2471300_2.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample2/pipeline1/sample2_GSM2471300_1.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample2/pipeline1/sample2_GSM2471300_1.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample2/pipeline1/sample2_GSM2471300_1.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample2/pipeline1/sample2_GSM2471300_1.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample2/pipeline1/sample2_GSM2471300_2.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample2/pipeline1/sample2_GSM2471300_2.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample2/pipeline1/sample2_GSM2471300_2.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample2/pipeline1/sample2_GSM2471300_2.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample3/other_pipeline2/sample3_GSM2471249_1.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample3/other_pipeline2/sample3_GSM2471249_1.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample3/other_pipeline2/sample3_GSM2471249_1.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample3/other_pipeline2/sample3_GSM2471249_1.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample3/other_pipeline2/sample3_GSM2471249_2.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample3/other_pipeline2/sample3_GSM2471249_2.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample3/other_pipeline2/sample3_GSM2471249_2.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample3/other_pipeline2/sample3_GSM2471249_2.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample3/pipeline2/sample3_GSM2471249_1.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample3/pipeline2/sample3_GSM2471249_1.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample3/pipeline2/sample3_GSM2471249_1.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample3/pipeline2/sample3_GSM2471249_1.bw diff --git a/tests/data/example_peps-master/output/results_pipeline/sample3/pipeline2/sample3_GSM2471249_2.bw b/tests/data/peppydata/example_peps-master/output/results_pipeline/sample3/pipeline2/sample3_GSM2471249_2.bw similarity index 100% rename from tests/data/example_peps-master/output/results_pipeline/sample3/pipeline2/sample3_GSM2471249_2.bw rename to tests/data/peppydata/example_peps-master/output/results_pipeline/sample3/pipeline2/sample3_GSM2471249_2.bw diff --git a/tests/data/example_peps-master/subannotation.ipynb b/tests/data/peppydata/example_peps-master/subannotation.ipynb similarity index 100% rename from tests/data/example_peps-master/subannotation.ipynb rename to tests/data/peppydata/example_peps-master/subannotation.ipynb diff --git a/tests/data/phcdata/sample_pep/sample_table.csv b/tests/data/phcdata/sample_pep/sample_table.csv new file mode 100644 index 00000000..00039113 --- /dev/null +++ b/tests/data/phcdata/sample_pep/sample_table.csv @@ -0,0 +1,5 @@ +file,file_id,protocol,identifier,sample_name +local_files,,anySampleType,frog1,frog_1 +local_files_unmerged,,anySampleType,frog2,frog_2 +local_files_unmerged,,anySampleType,frog3,frog_3 +local_files_unmerged,,anySampleType,frog4,frog_4 diff --git a/tests/data/phcdata/sample_pep/subsamp_config.yaml b/tests/data/phcdata/sample_pep/subsamp_config.yaml new file mode 100644 index 00000000..06fae282 --- /dev/null +++ b/tests/data/phcdata/sample_pep/subsamp_config.yaml @@ -0,0 +1,16 @@ +description: This project contains subsamples! +looper: + output_dir: $HOME/hello_looper_results + pipeline_interfaces: + - ../pipeline/pipeline_interface.yaml +name: subsamp +pep_version: 2.0.0 +sample_modifiers: + derive: + attributes: + - file + sources: + local_files: ../data/{identifier}{file_id}_data.txt + local_files_unmerged: ../data/{identifier}*_data.txt +sample_table: sample_table.csv +subsample_table: subsample_table.csv diff --git a/tests/data/phcdata/sample_pep/subsample_table.csv b/tests/data/phcdata/sample_pep/subsample_table.csv new file mode 100644 index 00000000..e7cd6400 --- /dev/null +++ b/tests/data/phcdata/sample_pep/subsample_table.csv @@ -0,0 +1,4 @@ +file_id,sample_name +a,frog_1 +b,frog_1 +c,frog_1 diff --git a/tests/eidotests/conftest.py b/tests/eidotests/conftest.py new file mode 100644 index 00000000..bb46a5b8 --- /dev/null +++ b/tests/eidotests/conftest.py @@ -0,0 +1,164 @@ +import os + +import pytest +from peppy import Project + + +@pytest.fixture +def data_path(): + return os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data", "eidodata" + ) + + +@pytest.fixture +def schemas_path(data_path): + return os.path.join(data_path, "schemas") + + +@pytest.fixture +def peps_path(data_path): + return os.path.join(data_path, "peps") + + +@pytest.fixture +def project_file_path(peps_path): + return os.path.join(peps_path, "test_pep", "test_cfg.yaml") + + +@pytest.fixture +def project_table_path(peps_path): + return os.path.join(peps_path, "test_pep", "test_sample_table.csv") + + +@pytest.fixture +def project_object(project_file_path): + return Project(project_file_path) + + +@pytest.fixture +def schema_file_path(schemas_path): + return os.path.join(schemas_path, "test_schema.yaml") + + +@pytest.fixture +def schema_samples_file_path(schemas_path): + return os.path.join(schemas_path, "test_schema_samples.yaml") + + +@pytest.fixture +def schema_invalid_file_path(schemas_path): + return os.path.join(schemas_path, "test_schema_invalid.yaml") + + +@pytest.fixture +def schema_sample_invalid_file_path(schemas_path): + return os.path.join(schemas_path, "test_schema_sample_invalid.yaml") + + +@pytest.fixture +def schema_imports_file_path(schemas_path): + return os.path.join(schemas_path, "test_schema_imports.yaml") + + +@pytest.fixture +def schema_rel_path_imports_file_path(schemas_path): + return os.path.join(schemas_path, "test_schema_imports_rel_path.yaml") + + +@pytest.fixture +def taxprofiler_project_path(peps_path): + return os.path.join(peps_path, "multiline_output", "config.yaml") + + +@pytest.fixture +def taxprofiler_project(taxprofiler_project_path): + return Project(taxprofiler_project_path) + + +@pytest.fixture +def path_to_taxprofiler_csv_multiline_output(peps_path): + return os.path.join(peps_path, "multiline_output", "multiline_output.csv") + + +@pytest.fixture +def path_pep_with_fasta_column(peps_path): + return os.path.join(peps_path, "pep_with_fasta_column", "config.yaml") + + +@pytest.fixture +def project_pep_with_fasta_column(path_pep_with_fasta_column): + return Project(path_pep_with_fasta_column, sample_table_index="sample") + + +@pytest.fixture +def output_pep_with_fasta_column(path_pep_with_fasta_column): + with open( + os.path.join(os.path.dirname(path_pep_with_fasta_column), "output.csv") + ) as f: + return f.read() + + +@pytest.fixture +def taxprofiler_csv_multiline_output(path_to_taxprofiler_csv_multiline_output): + with open(path_to_taxprofiler_csv_multiline_output, "r") as file: + data = file.read() + return data + # This is broken unless I add na_filter=False. But it's a bad idea anyway, since + # we're just using this for string comparison anyway... + # return pd.read_csv( + # path_to_taxprofiler_csv_multiline_output, na_filter=False + # ).to_csv(path_or_buf=None, index=None) + + +@pytest.fixture +def path_pep_for_schema_with_rel_path(peps_path): + return os.path.join(peps_path, "pep_schema_rel_path", "config.yaml") + + +@pytest.fixture +def path_pep_nextflow_taxprofiler(peps_path): + return os.path.join(peps_path, "pep_nextflow_taxprofiler", "config.yaml") + + +@pytest.fixture +def project_pep_nextflow_taxprofiler(path_pep_nextflow_taxprofiler): + return Project(path_pep_nextflow_taxprofiler, sample_table_index="sample") + + +@pytest.fixture +def output_pep_nextflow_taxprofiler(path_pep_nextflow_taxprofiler): + with open( + os.path.join(os.path.dirname(path_pep_nextflow_taxprofiler), "output.csv") + ) as f: + return f.read() + + +@pytest.fixture +def save_result_mock(mocker): + return mocker.patch("peppy.eido.conversion.save_result") + + +@pytest.fixture +def test_file_existing_schema(schemas_path): + return os.path.join(schemas_path, "schema_test_file_exist.yaml") + + +@pytest.fixture +def test_file_existing_pep(peps_path): + return os.path.join(peps_path, "test_file_existing", "project_config.yaml") + + +@pytest.fixture +def test_schema_value_check(schemas_path): + return os.path.join(schemas_path, "value_check_schema.yaml") + + +@pytest.fixture +def test_file_value_check(peps_path): + return os.path.join(peps_path, "value_check_pep", "project_config.yaml") + + +@pytest.fixture +def test_multiple_subs(peps_path): + return os.path.join(peps_path, "multiple_subsamples", "project_config.yaml") diff --git a/tests/eidotests/test_conversions.py b/tests/eidotests/test_conversions.py new file mode 100644 index 00000000..e821030b --- /dev/null +++ b/tests/eidotests/test_conversions.py @@ -0,0 +1,106 @@ +from peppy.eido.conversion import ( + convert_project, + get_available_pep_filters, + pep_conversion_plugins, + run_filter, +) +from peppy.project import Project + + +class TestConversionInfrastructure: + def test_plugins_are_read(self): + avail_filters = get_available_pep_filters() + assert isinstance(avail_filters, list) + + def test_plugins_contents(self): + avail_plugins = pep_conversion_plugins() + avail_filters = get_available_pep_filters() + assert all( + [plugin_name in avail_filters for plugin_name in avail_plugins.keys()] + ) + + def test_plugins_are_callable(self): + avail_plugins = pep_conversion_plugins() + assert all( + [callable(plugin_fun) for plugin_name, plugin_fun in avail_plugins.items()] + ) + + def test_basic_filter(self, save_result_mock, project_object): + conv_result = run_filter( + project_object, + "basic", + verbose=False, + plugin_kwargs={"paths": {"project": "out/basic_prj.txt"}}, + ) + + assert save_result_mock.called + assert conv_result["project"] == str(project_object) + + def test_csv_filter( + self, save_result_mock, taxprofiler_project, taxprofiler_csv_multiline_output + ): + conv_result = run_filter( + taxprofiler_project, + "csv", + verbose=False, + plugin_kwargs={"paths": {"samples": "out/basic_prj.txt"}}, + ) + + assert save_result_mock.called + assert conv_result["samples"] == taxprofiler_csv_multiline_output + + def test_csv_filter_handles_empty_fasta_correctly( + self, + project_pep_with_fasta_column, + output_pep_with_fasta_column, + save_result_mock, + ): + conv_result = run_filter( + project_pep_with_fasta_column, + "csv", + verbose=False, + plugin_kwargs={"paths": {"samples": "out/basic_prj.txt"}}, + ) + + assert save_result_mock.called + assert conv_result == {"samples": output_pep_with_fasta_column} + + def test_eido_csv_filter_filters_nextflow_taxprofiler_input_correctly( + self, + project_pep_nextflow_taxprofiler, + output_pep_nextflow_taxprofiler, + save_result_mock, + ): + conv_result = run_filter( + project_pep_nextflow_taxprofiler, + "csv", + verbose=False, + plugin_kwargs={"paths": {"samples": "out/basic_prj.txt"}}, + ) + + assert save_result_mock.called + assert conv_result == {"samples": output_pep_nextflow_taxprofiler} + + def test_multiple_subsamples(self, test_multiple_subs): + project = Project(test_multiple_subs, sample_table_index="sample_id") + + conversion = convert_project( + project, + "csv", + ) + assert isinstance(conversion["samples"], str) + conversion = convert_project( + project, + "basic", + ) + assert isinstance(conversion["project"], str) + conversion = convert_project( + project, + "yaml", + ) + assert isinstance(conversion["project"], str) + conversion = convert_project( + project, + "yaml-samples", + ) + assert isinstance(conversion["samples"], str) diff --git a/tests/eidotests/test_schema_operations.py b/tests/eidotests/test_schema_operations.py new file mode 100644 index 00000000..af0beb87 --- /dev/null +++ b/tests/eidotests/test_schema_operations.py @@ -0,0 +1,15 @@ +from peppy.eido.schema import read_schema +from yaml import safe_load + + +class TestSchemaReading: + def test_imports_file_schema(self, schema_imports_file_path): + s = read_schema(schema_imports_file_path) + assert isinstance(s, list) + assert len(s) == 2 + + def test_imports_dict_schema(self, schema_imports_file_path): + with open(schema_imports_file_path, "r") as f: + s = read_schema(safe_load(f)) + assert isinstance(s, list) + assert len(s) == 2 diff --git a/tests/eidotests/test_validations.py b/tests/eidotests/test_validations.py new file mode 100644 index 00000000..c5ff3c92 --- /dev/null +++ b/tests/eidotests/test_validations.py @@ -0,0 +1,174 @@ +import urllib + +import pytest +from peppy import Project +from peppy.eido.exceptions import EidoValidationError, PathAttrNotFoundError +from peppy.eido.validation import ( + validate_config, + validate_input_files, + validate_original_samples, + validate_project, + validate_sample, +) +from peppy.utils import load_yaml + + +def _check_remote_file_accessible(url): + try: + code = urllib.request.urlopen(url).getcode() + except (urllib.error.URLError, OSError): + pytest.skip(f"Remote file not found: {url}") + else: + if code != 200: + pytest.skip(f"Return code: {code}. Remote file not found: {url}") + + +class TestProjectValidation: + def test_validate_works(self, project_object, schema_file_path): + validate_project(project=project_object, schema=schema_file_path) + + def test_validate_detects_invalid(self, project_object, schema_invalid_file_path): + with pytest.raises(EidoValidationError): + validate_project(project=project_object, schema=schema_invalid_file_path) + + def test_validate_detects_invalid_imports( + self, project_object, schema_imports_file_path + ): + with pytest.raises(EidoValidationError): + validate_project(project=project_object, schema=schema_imports_file_path) + + def test_validate_imports_with_rel_path( + self, path_pep_for_schema_with_rel_path, schema_rel_path_imports_file_path + ): + pep_project = Project(path_pep_for_schema_with_rel_path) + validate_project(project=pep_project, schema=schema_rel_path_imports_file_path) + + def test_validate_converts_samples_to_private_attr( + self, project_object, schema_samples_file_path + ): + """ + In peppy.Project the list of peppy.Sample objects is + accessible via _samples attr. + To make the schema creation more accessible for eido users + samples->_samples key conversion has been implemented + """ + validate_project(project=project_object, schema=schema_samples_file_path) + + def test_validate_works_with_dict_schema(self, project_object, schema_file_path): + validate_project(project=project_object, schema=load_yaml(schema_file_path)) + + @pytest.mark.parametrize("schema_arg", [1, None, [1, 2, 3]]) + def test_validate_raises_error_for_incorrect_schema_type( + self, project_object, schema_arg + ): + with pytest.raises(TypeError): + validate_project(project=project_object, schema=schema_arg) + + +class TestSampleValidation: + @pytest.mark.parametrize("sample_name", [0, 1, "GSM1558746"]) + def test_validate_works( + self, project_object, sample_name, schema_samples_file_path + ): + validate_sample( + project=project_object, + sample_name=sample_name, + schema=schema_samples_file_path, + ) + + @pytest.mark.parametrize("sample_name", [22, "bogus_sample_name"]) + def test_validate_raises_error_for_incorrect_sample_name( + self, project_object, sample_name, schema_samples_file_path + ): + with pytest.raises((ValueError, IndexError)): + validate_sample( + project=project_object, + sample_name=sample_name, + schema=schema_samples_file_path, + ) + + @pytest.mark.parametrize("sample_name", [0, 1, "GSM1558746"]) + def test_validate_detects_invalid( + self, project_object, sample_name, schema_sample_invalid_file_path + ): + with pytest.raises(EidoValidationError): + validate_sample( + project=project_object, + sample_name=sample_name, + schema=schema_sample_invalid_file_path, + ) + + def test_original_sample(self, project_table_path, schema_samples_file_path): + validate_original_samples(project_table_path, schema_samples_file_path) + + +class TestConfigValidation: + def test_validate_succeeds_on_invalid_sample( + self, project_object, schema_sample_invalid_file_path + ): + validate_config(project=project_object, schema=schema_sample_invalid_file_path) + + def test_validate_on_yaml_dict( + self, project_file_path, schema_sample_invalid_file_path + ): + validate_config( + project=project_file_path, schema=schema_sample_invalid_file_path + ) + + +class TestRemoteValidation: + @pytest.mark.parametrize("schema_url", ["http://schema.databio.org/pep/2.0.0.yaml"]) + def test_validate_works_with_remote_schemas(self, project_object, schema_url): + _check_remote_file_accessible(schema_url) + validate_project(project=project_object, schema=schema_url) + validate_config(project=project_object, schema=schema_url) + validate_sample(project=project_object, schema=schema_url, sample_name=0) + + +class TestImportsValidation: + def test_validate(self, project_object, schema_file_path): + validate_project(project=project_object, schema=schema_file_path) + + +class TestProjectWithoutConfigValidation: + @pytest.mark.parametrize( + "remote_pep_cfg", + [ + "https://raw.githubusercontent.com/pepkit/example_peps/master/example_basic/sample_table.csv" + ], + ) + def test_validate_works(self, schema_file_path, remote_pep_cfg): + _check_remote_file_accessible(remote_pep_cfg) + validate_project( + project=Project( + remote_pep_cfg + ), # create Project object from a remote sample table + schema=schema_file_path, + ) + + @pytest.mark.parametrize( + "remote_pep_cfg", + [ + "https://raw.githubusercontent.com/pepkit/example_peps/master/example_basic/sample_table.csv" + ], + ) + def test_validate_detects_invalid(self, schema_invalid_file_path, remote_pep_cfg): + _check_remote_file_accessible(remote_pep_cfg) + with pytest.raises(EidoValidationError): + validate_project( + project=Project(remote_pep_cfg), schema=schema_invalid_file_path + ) + + def test_validate_file_existence( + self, test_file_existing_pep, test_file_existing_schema + ): + schema_path = test_file_existing_schema + prj = Project(test_file_existing_pep) + with pytest.raises(PathAttrNotFoundError): + validate_input_files(prj, schema_path) + + def test_validation_values(self, test_schema_value_check, test_file_value_check): + schema_path = test_schema_value_check + prj = Project(test_file_value_check) + with pytest.raises(EidoValidationError): + validate_project(project=prj, schema=schema_path) diff --git a/tests/conftest.py b/tests/peppytests/conftest.py similarity index 98% rename from tests/conftest.py rename to tests/peppytests/conftest.py index 0d43a209..2d9260f8 100644 --- a/tests/conftest.py +++ b/tests/peppytests/conftest.py @@ -15,8 +15,8 @@ def merge_paths(pep_branch, directory_name): return os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), - "tests", "data", + "peppydata", "example_peps-{}".format(pep_branch), "example_{}".format(directory_name), ) diff --git a/tests/test_Project.py b/tests/peppytests/test_Project.py similarity index 88% rename from tests/test_Project.py rename to tests/peppytests/test_Project.py index 58ff97bd..03775b92 100644 --- a/tests/test_Project.py +++ b/tests/peppytests/test_Project.py @@ -1,15 +1,12 @@ -"""Classes for peppy.Project smoketesting""" - import os +import pickle import socket import tempfile +from pathlib import Path import numpy as np import pytest from pandas import DataFrame -from yaml import dump, safe_load -import pickle - from peppy import Project from peppy.const import SAMPLE_NAME_ATTR, SAMPLE_TABLE_FILE_KEY from peppy.exceptions import ( @@ -18,6 +15,7 @@ MissingAmendmentError, RemoteYAMLError, ) +from yaml import dump, safe_load __author__ = "Michal Stolarczyk" __email__ = "michal.stolarczyk@nih.gov" @@ -38,6 +36,7 @@ "subtable4", "subtable5", "remove", + "issue499", ] @@ -98,12 +97,21 @@ def test_expand_path(self, example_pep_cfg_path, defer): """ Verify output_path is expanded """ + if os.name == "nt": + example_pep_cfg_path = os.path.join( + *os.path.split(example_pep_cfg_path)[:1], + f"win_{os.path.split(example_pep_cfg_path)[1]}", + ) p = Project( cfg=example_pep_cfg_path, amendments="newLib", defer_samples_creation=defer, ) - assert not p.config["output_dir"].startswith("$") + assert not ( + p.config["output_dir"].startswith("$") + or p.config["output_dir"].startswith("%") + ) + # assert p.config["output_dir"] == str(Path.home() / "hello_looper_results") @pytest.mark.parametrize( "config_path", @@ -577,6 +585,15 @@ def test_derive(self, example_pep_cfg_path): p, pd = _get_pair_to_post_init_test(example_pep_cfg_path) _cmp_all_samples_attr(p, pd, "file_path") + @pytest.mark.parametrize("example_pep_cfg_path", ["issue499"], indirect=True) + def test_issue499(self, example_pep_cfg_path): + """ + Verify that the derivation the same way in a post init + sample creation scenario + """ + p, pd = _get_pair_to_post_init_test(example_pep_cfg_path) + _cmp_all_samples_attr(p, pd, "file_path") + @pytest.mark.parametrize("example_pep_cfg_path", ["append"], indirect=True) def test_equality(self, example_pep_cfg_path): """ @@ -744,3 +761,69 @@ def test_nextflow_subsamples(self, example_pep_cfg_path): """ p = Project(cfg=example_pep_cfg_path) assert isinstance(p, Project) + + +class TestSampleModifiers: + @pytest.mark.parametrize("example_pep_cfg_path", ["append"], indirect=True) + def test_append(self, example_pep_cfg_path): + """Verify that the appended attribute is added to the samples""" + p = Project(cfg=example_pep_cfg_path) + assert all([s["read_type"] == "SINGLE" for s in p.samples]) + + @pytest.mark.parametrize("example_pep_cfg_path", ["imports"], indirect=True) + def test_imports(self, example_pep_cfg_path): + """Verify that the imported attribute is added to the samples""" + p = Project(cfg=example_pep_cfg_path) + assert all([s["imported_attr"] == "imported_val" for s in p.samples]) + + @pytest.mark.parametrize("example_pep_cfg_path", ["imply"], indirect=True) + def test_imply(self, example_pep_cfg_path): + """ + Verify that the implied attribute is added to the correct samples + """ + p = Project(cfg=example_pep_cfg_path) + assert all( + [s["genome"] == "hg38" for s in p.samples if s["organism"] == "human"] + ) + assert all( + [s["genome"] == "mm10" for s in p.samples if s["organism"] == "mouse"] + ) + + @pytest.mark.parametrize("example_pep_cfg_path", ["duplicate"], indirect=True) + def test_duplicate(self, example_pep_cfg_path): + """ + Verify that the duplicated attribute is identical to the original + """ + p = Project(cfg=example_pep_cfg_path) + assert all([s["organism"] == s["animal"] for s in p.samples]) + + @pytest.mark.parametrize("example_pep_cfg_path", ["derive"], indirect=True) + def test_derive(self, example_pep_cfg_path): + """ + Verify that the declared attr derivation happened + """ + p = Project(cfg=example_pep_cfg_path) + assert all(["file_path" in s for s in p.samples]) + assert all(["file_path" in s._derived_cols_done for s in p.samples]) + + @pytest.mark.parametrize("example_pep_cfg_path", ["remove"], indirect=True) + def test_remove(self, example_pep_cfg_path): + """ + Verify that the declared attr was eliminated from every sample + """ + p = Project(cfg=example_pep_cfg_path) + assert all(["protocol" not in s for s in p.samples]) + + @pytest.mark.parametrize("example_pep_cfg_path", ["subtable2"], indirect=True) + def test_subtable(self, example_pep_cfg_path): + """ + Verify that the sample merging takes place + """ + p = Project(cfg=example_pep_cfg_path) + assert all( + [ + isinstance(s["file"], list) + for s in p.samples + if s["sample_name"] in ["frog_1", "frog2"] + ] + ) diff --git a/tests/smoketests/test_Sample.py b/tests/peppytests/test_Sample.py similarity index 99% rename from tests/smoketests/test_Sample.py rename to tests/peppytests/test_Sample.py index ba41b003..62b9bf0c 100644 --- a/tests/smoketests/test_Sample.py +++ b/tests/peppytests/test_Sample.py @@ -2,7 +2,6 @@ import tempfile import pytest - from peppy import Project __author__ = "Michal Stolarczyk" diff --git a/tests/phctests/__init__.py b/tests/phctests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/phctests/conftest.py b/tests/phctests/conftest.py new file mode 100644 index 00000000..39ba8caf --- /dev/null +++ b/tests/phctests/conftest.py @@ -0,0 +1,51 @@ +import os + +import pytest + + +@pytest.fixture +def SAMPLE_PEP(): + return os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + "data", + "phcdata", + "sample_pep", + "subsamp_config.yaml", + ) + + +@pytest.fixture() +def test_raw_pep_return(): + sample_prj = { + "config": { + "This": "is config", + "description": "desc", + "name": "sample name", + }, + "subsample_list": [], + "sample_list": [ + {"time": "0", "file_path": "source1", "sample_name": "pig_0h"}, + {"time": "1", "file_path": "source1", "sample_name": "pig_1h"}, + {"time": "0", "file_path": "source1", "sample_name": "frog_0h"}, + ], + } + return sample_prj + + +@pytest.fixture +def requests_get_mock(mocker): + return mocker.patch("requests.get") + + +@pytest.fixture +def input_return_mock(monkeypatch): + return monkeypatch.setattr("builtins.input", lambda: None) + + +@pytest.fixture +def test_jwt(): + return ( + "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9." + "eyJsb2dpbiI6InJhZmFsc3RlcGllbiIsImlkIjo0MzkyNjUyMiwib3JnYW5pemF0aW9ucyI6bnVsbH0." + "mgBP-7x5l9cqufhzdVi0OFA78pkYDEymwPFwud02BAc" + ) diff --git a/tests/phctests/test_manual.py b/tests/phctests/test_manual.py new file mode 100644 index 00000000..4ff5162b --- /dev/null +++ b/tests/phctests/test_manual.py @@ -0,0 +1,101 @@ +import pytest + +from peppy.pephubclient.pephubclient import PEPHubClient + + +@pytest.mark.skip(reason="Manual test") +class TestViewsManual: + def test_get(self): + ff = PEPHubClient().view.get( + "databio", + "bedset1", + "default", + "test_view", + ) + print(ff) + + def test_create(self): + PEPHubClient().view.create( + "databio", + "bedset1", + "default", + "test_view", + sample_list=["orange", "grape1", "apple1"], + ) + + def test_delete(self): + PEPHubClient().view.delete( + "databio", + "bedset1", + "default", + "test_view", + ) + + def test_add_sample(self): + PEPHubClient().view.add_sample( + "databio", + "bedset1", + "default", + "test_view", + "name", + ) + + def test_delete_sample(self): + PEPHubClient().view.remove_sample( + "databio", + "bedset1", + "default", + "test_view", + "name", + ) + + +@pytest.mark.skip(reason="Manual test") +class TestSamplesManual: + def test_manual(self): + ff = PEPHubClient().sample.get( + "databio", + "bedset1", + "default", + "grape1", + ) + ff + + def test_update(self): + ff = PEPHubClient().sample.get( + "databio", + "bedset1", + "default", + "newf", + ) + ff.update({"shefflab": "test1"}) + ff["sample_type"] = "new_type" + PEPHubClient().sample.update( + "databio", + "bedset1", + "default", + "newf", + sample_dict=ff, + ) + + def test_add(self): + ff = { + "genome": "phc_test1", + "sample_type": "phc_test", + } + PEPHubClient().sample.create( + "databio", + "bedset1", + "default", + "new_2222", + overwrite=False, + sample_dict=ff, + ) + + def test_delete(self): + PEPHubClient().sample.remove( + "databio", + "bedset1", + "default", + "new_2222", + ) diff --git a/tests/phctests/test_pephubclient.py b/tests/phctests/test_pephubclient.py new file mode 100644 index 00000000..21c7c4e8 --- /dev/null +++ b/tests/phctests/test_pephubclient.py @@ -0,0 +1,615 @@ +from unittest.mock import Mock + +import pytest + +from peppy.pephubclient.exceptions import ResponseError +from peppy.pephubclient.helpers import is_registry_path +from peppy.pephubclient.pephub_oauth.models import InitializeDeviceCodeResponse +from peppy.pephubclient.pephubclient import PEPHubClient + + +@pytest.fixture() +def device_code_return(): + device_code = "asdf2345" + return InitializeDeviceCodeResponse( + device_code=device_code, + auth_url=f"any_base_url/auth/device/login/{device_code}", + ) + + +class TestSmoke: + def test_login(self, mocker, device_code_return, test_jwt): + """ + Test if device login request was sent to pephub + """ + requests_mock = mocker.patch( + "requests.request", + return_value=Mock(content=device_code_return, status_code=200), + ) + pephub_response_mock = mocker.patch( + "peppy.pephubclient.pephub_oauth.pephub_oauth.PEPHubAuth._handle_pephub_response", + return_value=device_code_return, + ) + + pephub_exchange_code_mock = mocker.patch( + "peppy.pephubclient.pephub_oauth.pephub_oauth.PEPHubAuth._exchange_device_code_on_token", + return_value=test_jwt, + ) + + pathlib_mock = mocker.patch( + "peppy.pephubclient.files_manager.FilesManager.save_jwt_data_to_file" + ) + + PEPHubClient().login() + + assert requests_mock.called + assert pephub_response_mock.called + assert pephub_exchange_code_mock.called + assert pathlib_mock.called + + def test_logout(self, mocker): + os_remove_mock = mocker.patch("os.remove") + PEPHubClient().logout() + + assert os_remove_mock.called + + def test_pull(self, mocker, test_jwt, test_raw_pep_return): + jwt_mock = mocker.patch( + "peppy.pephubclient.files_manager.FilesManager.load_jwt_data_from_file", + return_value=test_jwt, + ) + requests_mock = mocker.patch( + "requests.request", + return_value=Mock(content="some return", status_code=200), + ) + mocker.patch( + "peppy.pephubclient.helpers.RequestManager.decode_response", + return_value=test_raw_pep_return, + ) + save_yaml_mock = mocker.patch( + "peppy.pephubclient.files_manager.FilesManager.save_yaml" + ) + save_sample_mock = mocker.patch( + "peppy.pephubclient.files_manager.FilesManager.save_pandas" + ) + mocker.patch( + "peppy.pephubclient.files_manager.FilesManager.create_project_folder" + ) + + PEPHubClient().pull("some/project") + + assert jwt_mock.called + assert requests_mock.called + assert save_yaml_mock.called + assert save_sample_mock.called + + @pytest.mark.parametrize( + "status_code, expected_error_message", + [ + ( + 404, + "File does not exist, or you are unauthorized.", + ), + ( + 500, + "Internal server error. Unexpected return value. Error: 500", + ), + ], + ) + def test_pull_with_pephub_error_response( + self, mocker, test_jwt, status_code, expected_error_message + ): + mocker.patch( + "peppy.pephubclient.files_manager.FilesManager.load_jwt_data_from_file", + return_value=test_jwt, + ) + mocker.patch( + "requests.request", + return_value=Mock( + content=b'{"detail": "Some error message"}', status_code=status_code + ), + ) + + with pytest.raises(ResponseError) as e: + PEPHubClient().pull("some/project") + + assert e.value.message == expected_error_message + + def test_push(self, mocker, test_jwt, SAMPLE_PEP): + requests_mock = mocker.patch( + "requests.request", return_value=Mock(status_code=202) + ) + + PEPHubClient().push( + SAMPLE_PEP, + namespace="s_name", + name="name", + ) + + assert requests_mock.called + + @pytest.mark.parametrize( + "status_code, expected_error_message", + [ + ( + 409, + "Project already exists. Set force to overwrite project.", + ), + ( + 401, + "Unauthorized! Failure in uploading project.", + ), + (233, "Unexpected Response Error."), + ], + ) + def test_push_with_pephub_error_response( + self, mocker, status_code, expected_error_message, SAMPLE_PEP + ): + mocker.patch("requests.request", return_value=Mock(status_code=status_code)) + with pytest.raises(ResponseError, match=expected_error_message): + PEPHubClient().push( + SAMPLE_PEP, + namespace="s_name", + name="name", + ) + + def test_search_prj(self, mocker): + return_value = { + "count": 1, + "limit": 100, + "offset": 0, + "results": [ + { + "namespace": "namespace1", + "name": "basic", + "tag": "default", + "is_private": False, + "number_of_samples": 2, + "description": "None", + "last_update_date": "2023-08-27 19:07:31.552861+00:00", + "submission_date": "2023-08-27 19:07:31.552858+00:00", + "digest": "08cbcdbf4974fc84bee824c562b324b5", + "pep_schema": "random_schema_name", + "pop": False, + "stars_number": 0, + "forked_from": None, + } + ], + "session_info": None, + "can_edit": False, + } + mocker.patch( + "requests.request", + return_value=Mock(content=return_value, status_code=200), + ) + mocker.patch( + "peppy.pephubclient.helpers.RequestManager.decode_response", + return_value=return_value, + ) + + return_value = PEPHubClient().find_project(namespace="namespace1") + assert return_value.count == 1 + assert len(return_value.results) == 1 + + +class TestHelpers: + @pytest.mark.parametrize( + "input_str, expected_output", + [ + ( + "databio/pep:default", + True, + ), + ( + "pephub.databio.org::databio/pep:default", + True, + ), + ( + "pephub.databio.org://databio/pep:default", + True, + ), + ( + "databio/pep", + True, + ), + ( + "databio/pep/default", + False, + ), + ( + "some/random/path/to.yaml", + False, + ), + ( + "path_to.csv", + False, + ), + ( + "this/is/path/to.csv", + False, + ), + ], + ) + def test_is_registry_path(self, input_str, expected_output): + assert is_registry_path(input_str) is expected_output + + +class TestSamples: + def test_get(self, mocker): + return_value = { + "genome": "phc_test1", + "sample_type": "phc_test", + "sample_name": "gg1", + } + mocker.patch( + "requests.request", + return_value=Mock(content=return_value, status_code=200), + ) + mocker.patch( + "peppy.pephubclient.helpers.RequestManager.decode_response", + return_value=return_value, + ) + return_value = PEPHubClient().sample.get( + "test_namespace", + "test_name", + "default", + "gg1", + ) + assert return_value == return_value + + @pytest.mark.parametrize( + "status_code, expected_error_message", + [ + ( + 404, + "Sample does not exist.", + ), + ( + 500, + "Internal server error. Unexpected return value.", + ), + ( + 403, + "Unexpected return value. Error: 403", + ), + ], + ) + def test_sample_get_with_pephub_error_response( + self, mocker, status_code, expected_error_message + ): + mocker.patch("requests.request", return_value=Mock(status_code=status_code)) + with pytest.raises(ResponseError, match=expected_error_message): + PEPHubClient().sample.get( + "test_namespace", + "test_name", + "default", + "gg1", + ) + + @pytest.mark.parametrize( + "prj_dict", + [ + {"genome": "phc_test1", "sample_type": "phc_test", "sample_name": "gg1"}, + {"genome": "phc_test1", "sample_type": "phc_test"}, + ], + ) + def test_create(self, mocker, prj_dict): + return_value = prj_dict + mocker_obj = mocker.patch( + "requests.request", + return_value=Mock(content=return_value, status_code=202), + ) + + PEPHubClient().sample.create( + "test_namespace", + "test_name", + "default", + "gg1", + sample_dict=return_value, + ) + assert mocker_obj.called + + @pytest.mark.parametrize( + "status_code, expected_error_message", + [ + ( + 404, + "does not exist.", + ), + ( + 409, + "already exists. Set overwrite to True to overwrite sample.", + ), + ( + 500, + "Unexpected return value.", + ), + ], + ) + def test_sample_create_with_pephub_error_response( + self, mocker, status_code, expected_error_message + ): + mocker.patch("requests.request", return_value=Mock(status_code=status_code)) + with pytest.raises(ResponseError, match=expected_error_message): + PEPHubClient().sample.create( + "test_namespace", + "test_name", + "default", + "gg1", + sample_dict={ + "genome": "phc_test1", + "sample_type": "phc_test", + "sample_name": "gg1", + }, + ) + + def test_delete(self, mocker): + mocker_obj = mocker.patch( + "requests.request", + return_value=Mock(status_code=202), + ) + + PEPHubClient().sample.remove( + "test_namespace", + "test_name", + "default", + "gg1", + ) + assert mocker_obj.called + + @pytest.mark.parametrize( + "status_code, expected_error_message", + [ + ( + 404, + "does not exist.", + ), + ( + 500, + "Unexpected return value.", + ), + ], + ) + def test_sample_delete_with_pephub_error_response( + self, mocker, status_code, expected_error_message + ): + mocker.patch("requests.request", return_value=Mock(status_code=status_code)) + with pytest.raises(ResponseError, match=expected_error_message): + PEPHubClient().sample.remove( + "test_namespace", + "test_name", + "default", + "gg1", + ) + + def test_update(self, mocker): + mocker_obj = mocker.patch( + "requests.request", + return_value=Mock(status_code=202), + ) + + PEPHubClient().sample.update( + "test_namespace", + "test_name", + "default", + "gg1", + sample_dict={ + "genome": "phc_test1", + "sample_type": "phc_test", + "new_col": "column", + }, + ) + assert mocker_obj.called + + @pytest.mark.parametrize( + "status_code, expected_error_message", + [ + ( + 404, + "does not exist.", + ), + ( + 500, + "Unexpected return value.", + ), + ], + ) + def test_sample_update_with_pephub_error_response( + self, mocker, status_code, expected_error_message + ): + mocker.patch("requests.request", return_value=Mock(status_code=status_code)) + with pytest.raises(ResponseError, match=expected_error_message): + PEPHubClient().sample.update( + "test_namespace", + "test_name", + "default", + "gg1", + sample_dict={ + "genome": "phc_test1", + "sample_type": "phc_test", + "new_col": "column", + }, + ) + + +class TestViews: + def test_get(self, mocker, test_raw_pep_return): + return_value = test_raw_pep_return + mocker.patch( + "requests.request", + return_value=Mock(content=return_value, status_code=200), + ) + mocker.patch( + "peppy.pephubclient.helpers.RequestManager.decode_response", + return_value=return_value, + ) + + return_value = PEPHubClient().view.get( + "test_namespace", + "test_name", + "default", + "gg1", + ) + assert return_value == return_value + + @pytest.mark.parametrize( + "status_code, expected_error_message", + [ + ( + 404, + "does not exist.", + ), + ( + 500, + "Internal server error.", + ), + ], + ) + def test_view_get_with_pephub_error_response( + self, mocker, status_code, expected_error_message + ): + mocker.patch("requests.request", return_value=Mock(status_code=status_code)) + with pytest.raises(ResponseError, match=expected_error_message): + PEPHubClient().view.get( + "test_namespace", + "test_name", + "default", + "gg1", + ) + + def test_create(self, mocker): + mocker_obj = mocker.patch( + "requests.request", + return_value=Mock(status_code=202), + ) + + PEPHubClient().view.create( + "test_namespace", + "test_name", + "default", + "gg1", + sample_list=["sample1", "sample2"], + ) + assert mocker_obj.called + + @pytest.mark.parametrize( + "status_code, expected_error_message", + [ + ( + 404, + "does not exist.", + ), + ( + 409, + "already exists in the project.", + ), + ], + ) + def test_view_create_with_pephub_error_response( + self, mocker, status_code, expected_error_message + ): + mocker.patch("requests.request", return_value=Mock(status_code=status_code)) + with pytest.raises(ResponseError, match=expected_error_message): + PEPHubClient().view.create( + "test_namespace", + "test_name", + "default", + "gg1", + sample_list=["sample1", "sample2"], + ) + + def test_delete(self, mocker): + mocker_obj = mocker.patch( + "requests.request", + return_value=Mock(status_code=202), + ) + + PEPHubClient().view.delete( + "test_namespace", + "test_name", + "default", + "gg1", + ) + assert mocker_obj.called + + @pytest.mark.parametrize( + "status_code, expected_error_message", + [ + ( + 404, + "does not exist.", + ), + ( + 401, + "You are unauthorized to delete this view.", + ), + ], + ) + def test_view_delete_with_pephub_error_response( + self, mocker, status_code, expected_error_message + ): + mocker.patch("requests.request", return_value=Mock(status_code=status_code)) + with pytest.raises(ResponseError, match=expected_error_message): + PEPHubClient().view.delete( + "test_namespace", + "test_name", + "default", + "gg1", + ) + + def test_add_sample(self, mocker): + mocker_obj = mocker.patch( + "requests.request", + return_value=Mock(status_code=202), + ) + + PEPHubClient().view.add_sample( + "test_namespace", + "test_name", + "default", + "gg1", + "sample1", + ) + assert mocker_obj.called + + def test_delete_sample(self, mocker): + mocker_obj = mocker.patch( + "requests.request", + return_value=Mock(status_code=202), + ) + + PEPHubClient().view.remove_sample( + "test_namespace", + "test_name", + "default", + "gg1", + "sample1", + ) + assert mocker_obj.called + + +### + + +# test add sample: +# 1. add correct 202 +# 2. add existing 409 +# 3. add with sample_name +# 4. add without sample_name +# 5. add with overwrite +# 6. add to nonexistent project 404 + +# delete sample: +# 1. delete existing 202 +# 2. delete nonexistent 404 + +# get sample: +# 1. get existing 200 +# 2. get nonexistent 404 +# 3. get with raw 200 +# 4. get from nonexistent project 404 + +# update sample: +# 1. update existing 202 +# 2. update nonexistent sample 404 +# 3. update nonexistent project 404 diff --git a/tests/smoketests/test_Project.py b/tests/smoketests/test_Project.py deleted file mode 100644 index 197ba9b0..00000000 --- a/tests/smoketests/test_Project.py +++ /dev/null @@ -1,69 +0,0 @@ -import pytest - -from peppy.project import Project - - -class TestSampleModifiers: - @pytest.mark.parametrize("example_pep_cfg_path", ["append"], indirect=True) - def test_append(self, example_pep_cfg_path): - """Verify that the appended attribute is added to the samples""" - p = Project(cfg=example_pep_cfg_path) - assert all([s["read_type"] == "SINGLE" for s in p.samples]) - - @pytest.mark.parametrize("example_pep_cfg_path", ["imports"], indirect=True) - def test_imports(self, example_pep_cfg_path): - """Verify that the imported attribute is added to the samples""" - p = Project(cfg=example_pep_cfg_path) - assert all([s["imported_attr"] == "imported_val" for s in p.samples]) - - @pytest.mark.parametrize("example_pep_cfg_path", ["imply"], indirect=True) - def test_imply(self, example_pep_cfg_path): - """ - Verify that the implied attribute is added to the correct samples - """ - p = Project(cfg=example_pep_cfg_path) - assert all( - [s["genome"] == "hg38" for s in p.samples if s["organism"] == "human"] - ) - assert all( - [s["genome"] == "mm10" for s in p.samples if s["organism"] == "mouse"] - ) - - @pytest.mark.parametrize("example_pep_cfg_path", ["duplicate"], indirect=True) - def test_duplicate(self, example_pep_cfg_path): - """ - Verify that the duplicated attribute is identical to the original - """ - p = Project(cfg=example_pep_cfg_path) - assert all([s["organism"] == s["animal"] for s in p.samples]) - - @pytest.mark.parametrize("example_pep_cfg_path", ["derive"], indirect=True) - def test_derive(self, example_pep_cfg_path): - """ - Verify that the declared attr derivation happened - """ - p = Project(cfg=example_pep_cfg_path) - assert all(["file_path" in s for s in p.samples]) - assert all(["file_path" in s._derived_cols_done for s in p.samples]) - - @pytest.mark.parametrize("example_pep_cfg_path", ["remove"], indirect=True) - def test_remove(self, example_pep_cfg_path): - """ - Verify that the declared attr was eliminated from every sample - """ - p = Project(cfg=example_pep_cfg_path) - assert all(["protocol" not in s for s in p.samples]) - - @pytest.mark.parametrize("example_pep_cfg_path", ["subtable2"], indirect=True) - def test_subtable(self, example_pep_cfg_path): - """ - Verify that the sample merging takes place - """ - p = Project(cfg=example_pep_cfg_path) - assert all( - [ - isinstance(s["file"], list) - for s in p.samples - if s["sample_name"] in ["frog_1", "frog2"] - ] - ) diff --git a/update_test_data.sh b/update_test_data.sh index afca3415..615342b6 100755 --- a/update_test_data.sh +++ b/update_test_data.sh @@ -8,8 +8,8 @@ fi branch=$1 wget https://github.com/pepkit/example_peps/archive/${branch}.zip -mv ${branch}.zip tests/data/ -cd tests/data/ +mv ${branch}.zip tests/data/peppydata/ +cd tests/data/peppydata/ rm -rf example_peps-${branch} unzip ${branch}.zip rm ${branch}.zip