diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml new file mode 100644 index 0000000..2782534 --- /dev/null +++ b/.github/workflows/checks.yml @@ -0,0 +1,75 @@ +name: cwl-WES checks + +on: + push: + branches: [dev] + pull_request: + branches: [dev] + +jobs: + lint: + name: Run linting + runs-on: ubuntu-latest + steps: + - name: Check out repository + uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.8" + - name: Install requirements + run: | + pip install . + pip install -r requirements_dev.txt + - name: Lint with Flake8 + run: flake8 cwl_wes/ setup.py + - name: Lint with Pylint + run: pylint cwl_wes/ setup.py + test: + name: Run tests + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - name: Check out repository + uses: actions/checkout@v3 + - name: Deploy app + run: docker-compose up -d --build + - name: Wait for app startup + shell: bash + run: sleep 20 + - name: Run integration tests + shell: bash + run: bash tests/integration_tests.sh + - name: Tear down app + run: docker-compose down + publish: + name: Build and publish app image + runs-on: ubuntu-latest + if: ${{ github.event_name == 'push' }} + needs: [lint, test] + steps: + - name: Check out repository + uses: actions/checkout@v3 + - name: Generate tag + run: | + echo "TAG=$(date '+%Y%m%d')" >> $GITHUB_ENV + - name: Build and publish image + id: docker + uses: philips-software/docker-ci-scripts@v5.0.0 + with: + dockerfile: . + image-name: "cwl-wes" + tags: "latest ${{ env.TAG }}" + push-branches: "${{ github.event.repository.default_branch }}" + env: + REGISTRY_USERNAME: ${{ secrets.DOCKERHUB_LOGIN }} + REGISTRY_TOKEN: "${{ secrets.DOCKERHUB_TOKEN }}" + DOCKER_ORGANIZATION: ${{ secrets.DOCKERHUB_ORG }} + GITHUB_ORGANIZATION: ${{ github.repository_owner }} + - name: Verify that image was pushed + run: | + echo "Push indicator: ${{ steps.docker.outputs.push-indicator }}" + echo "# Set to 'true' if image was pushed, empty string otherwise" + test "${{ steps.docker.outputs.push-indicator }}" == "true" diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml deleted file mode 100644 index bf8d389..0000000 --- a/.github/workflows/docker-image.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: Docker Image CI - -on: - push: - branches: [ dev ] - -env: - DOCKER_REPO_NAME: elixircloud/cwl-wes - -jobs: - test: - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v2 - - name: Test build - run: docker-compose up -d - - name: Sleep - shell: bash - run: sleep 30; - - name: Test endpoint - shell: bash - run: bash test-http-call.bash - - name: End test - run: docker-compose down - - build: - - runs-on: ubuntu-latest - env: - DOCKER_REPO_NAME: elixircloud/cwl-wes - steps: - - uses: actions/checkout@v3 - - name: Build the Docker image - run: docker build . --file Dockerfile --tag ${DOCKER_REPO_NAME}:$(date +%Y%m%d) --tag ${DOCKER_REPO_NAME}:latest - - name: Login to DockerHub - if: github.event_name != 'pull_request' - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Set today env variable - run: | - echo "today=$(date +%Y%m%d)" >> $GITHUB_ENV - - name: Build and push - uses: docker/build-push-action@v2 - if: github.ref == 'refs/heads/dev' - with: - context: . - push: true - tags: | - ${{ env.DOCKER_REPO_NAME }}:${{ env.today }} - ${{ env.DOCKER_REPO_NAME }}:latest diff --git a/.github/workflows/pr-test.yaml b/.github/workflows/pr-test.yaml deleted file mode 100644 index b4271d3..0000000 --- a/.github/workflows/pr-test.yaml +++ /dev/null @@ -1,30 +0,0 @@ -name: Test - -# This workflow uses actions that are not certified by GitHub. -# They are provided by a third-party and are governed by -# separate terms of service, privacy policy, and support -# documentation. - -on: - pull_request: - branches: [ dev ] - -jobs: - test: - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v2 - - name: Test build - run: docker-compose up -d - - name: Sleep - shell: bash - run: sleep 30; - - name: Test endpoint - shell: bash - run: bash test-http-call.bash - - name: End test - run: docker-compose down \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index f91914d..b343527 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,51 +1,24 @@ -##### BASE IMAGE ##### -FROM python:3.7-slim-stretch +FROM docker.io/elixircloud/foca:20231219-py3.11 -##### METADATA ##### -LABEL base.image="python:3.6-slim-stretch" LABEL version="1.1" LABEL software="cwl-WES" -LABEL software.version="1.0" -LABEL software.description="Flask microservice implementing the Global Alliance for Genomics and Health (GA4GH) Workflow Execution Service (WES) API specification." +LABEL software.description="Trigger CWL workflows via GA4GH WES and TES" LABEL software.website="https://github.com/elixir-cloud-aai/cwl-WES" LABEL software.documentation="https://github.com/elixir-cloud-aai/cwl-WES" -LABEL software.license="https://github.com/elixir-cloud-aai/cwl-WES/blob/master/LICENSE" -LABEL software.tags="General" -LABEL maintainer="alexander.kanitz@alumni.ethz.ch" -LABEL maintainer.organisation="Biozentrum, University of Basel" -LABEL maintainer.location="Klingelbergstrasse 50/70, CH-4056 Basel, Switzerland" -LABEL maintainer.lab="ELIXIR Cloud & AAI" -LABEL maintainer.license="https://spdx.org/licenses/Apache-2.0" +LABEL software.license="https://spdx.org/licenses/Apache-2.0" +LABEL maintainer="cloud-service@elixir-europe.org" +LABEL maintainer.organisation="ELIXIR Cloud & AAI" # Python UserID workaround for OpenShift/K8S ENV LOGNAME=ipython ENV USER=ipython -ENV HOME=/tmp/user -# Install general dependencies -RUN apt-get update && apt-get install -y nodejs openssl git build-essential python3-dev curl jq - -## Set working directory WORKDIR /app - -## Copy Python requirements COPY ./requirements.txt /app/requirements.txt +RUN pip install -r requirements.txt +COPY ./ . +RUN pip install -e . -## Install Python dependencies -RUN cd /app \ - && pip install -r requirements.txt \ - && cd /app/src/cwl-tes \ - && python setup.py develop \ - && cd / \ - && mkdir -p /tmp/user - -## Copy remaining app files -COPY ./ /app - -## Install app & set write permissions for specs directory -RUN cd /app \ - && python setup.py develop \ - && cd / \ - && chmod g+w /app/cwl_wes/api/ \ - && chmod g+w -R /tmp/user - +## Add permissions for storing updated API specification +## (required by FOCA) +RUN chmod -R a+rwx /app/cwl_wes/api diff --git a/README.md b/README.md index 976e091..cd0a693 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ cd app * Via the **app configuration file** ```bash - vi cwl_wes/config/app_config.yaml + vi cwl_wes/config.yaml ``` * Via **environment variables** @@ -253,7 +253,7 @@ question etc. [badge-url-ci]: [badge-url-health]: [badge-url-license]: -[config-app]: cwl_wes/config/app_config.yaml +[config-app]: cwl_wes/config.yaml [docs-kubernetes]: deployment/README.md [elixir-aai]: https://perun.elixir-czech.cz/ [elixir-user-group-apply]: https://perun.elixir-czech.cz/fed/registrar/?vo=elixir&group=ECP_CLN:OSS diff --git a/cwl_wes/__init__.py b/cwl_wes/__init__.py index a842d05..a1e7cda 100644 --- a/cwl_wes/__init__.py +++ b/cwl_wes/__init__.py @@ -1 +1 @@ -__version__ = '0.15.0' +"""cwl-WES package.""" diff --git a/cwl_wes/api/__init__.py b/cwl_wes/api/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cwl_wes/api/register_openapi.py b/cwl_wes/api/register_openapi.py deleted file mode 100644 index 0ad8f57..0000000 --- a/cwl_wes/api/register_openapi.py +++ /dev/null @@ -1,96 +0,0 @@ -"""Functions for amending OpenAPI specs and registering them with a Connexion -app instance.""" - -import logging -import os -from shutil import copyfile -from typing import (List, Dict) - -from connexion import App - -from foca.config.config_parser import get_conf - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def register_openapi( - app: App, - specs: List[Dict] = [], - add_security_definitions: bool = True -) -> App: - """Registers OpenAPI specs with Connexion app.""" - # Iterate over list of API specs - for spec in specs: - - # Get _this_ directory - path = os.path.join( - os.path.abspath( - os.path.dirname( - os.path.realpath(__file__) - ) - ), - get_conf(spec, 'path') - ) - - # Add security definitions to copy of specs - if add_security_definitions: - path = __add_security_definitions(in_file=path) - - # Generate API endpoints from OpenAPI spec - try: - app.add_api( - path, - strict_validation=get_conf(spec, 'strict_validation'), - validate_responses=get_conf(spec, 'validate_responses'), - swagger_ui=get_conf(spec, 'swagger_ui'), - swagger_json=get_conf(spec, 'swagger_json'), - ) - - logger.info("API endpoints specified in '{path}' added.".format( - path=path, - )) - - except (FileNotFoundError, PermissionError) as e: - logger.critical( - ( - "API specification file not found or accessible at " - "'{path}'. Execution aborted. Original error message: " - "{type}: {msg}" - ).format( - path=path, - type=type(e).__name__, - msg=e, - ) - ) - raise SystemExit(1) - - return(app) - - -def __add_security_definitions( - in_file: str, - ext: str = 'modified.yaml' -) -> str: - """Adds 'securityDefinitions' section to OpenAPI YAML specs.""" - # Set security definitions - amend = ''' - -# Amended by cwl-WES -securityDefinitions: - jwt: - type: apiKey - name: Authorization - in: header -''' - - # Create copy for modification - out_file: str = '.'.join([os.path.splitext(in_file)[0], ext]) - copyfile(in_file, out_file) - - # Append security definitions - with open(out_file, 'a') as mod: - mod.write(amend) - - return out_file diff --git a/cwl_wes/app.py b/cwl_wes/app.py index 76ddd89..2ce5661 100644 --- a/cwl_wes/app.py +++ b/cwl_wes/app.py @@ -1,61 +1,35 @@ -"""Entry point to start service.""" - -from cwl_wes.api.register_openapi import register_openapi -from cwl_wes.config.app_config import parse_app_config -from foca.config.config_parser import (get_conf, get_conf_type) -from foca.config.log_config import configure_logging -from cwl_wes.database.register_mongodb import register_mongodb -from cwl_wes.errors.errors import register_error_handlers -from cwl_wes.factories.connexion_app import create_connexion_app -from cwl_wes.tasks.register_celery import register_task_service -from cwl_wes.security.cors import enable_cors - - -def run_server(): - - # Configure logger - configure_logging(config_var='WES_CONFIG_LOG') - - # Parse app configuration - config = parse_app_config(config_var='WES_CONFIG') - - # Create Connexion app - connexion_app = create_connexion_app(config) - - # Register MongoDB - connexion_app.app = register_mongodb(connexion_app.app) - - # Register error handlers - connexion_app = register_error_handlers(connexion_app) - - # Create Celery app and register background task monitoring service - register_task_service(connexion_app.app) - - # Register OpenAPI specs - connexion_app = register_openapi( - app=connexion_app, - specs=get_conf_type( - config, - 'api', - 'specs', - types=(list), - ), - add_security_definitions=get_conf( - config, - 'security', - 'authorization_required' - ) - ) +"""cwl-WES application entry point.""" + +from pathlib import Path - # Enable cross-origin resource sharing - enable_cors(connexion_app.app) +from connexion import App +from foca import Foca - return connexion_app, config +from cwl_wes.ga4gh.wes.endpoints.service_info import ServiceInfo -if __name__ == '__main__': - connexion_app, config = run_server() - # Run app - connexion_app.run( - use_reloader=get_conf(config, 'server', 'use_reloader') +def init_app() -> App: + """Initialize FOCA application. + + Returns: + App: FOCA application. + """ + foca = Foca( + config_file=Path("config.yaml"), + custom_config_model="cwl_wes.custom_config.CustomConfig", ) + app = foca.create_app() + with app.app.app_context(): + service_info = ServiceInfo() + service_info.init_service_info_from_config() + return app + + +def run_app(app: App) -> None: + """Run FOCA application.""" + app.run(port=app.port) + + +if __name__ == "__main__": + my_app = init_app() + run_app(my_app) diff --git a/cwl_wes/celery_worker.py b/cwl_wes/celery_worker.py deleted file mode 100644 index 2dbf1c3..0000000 --- a/cwl_wes/celery_worker.py +++ /dev/null @@ -1,12 +0,0 @@ -"""Entry point for Celery workers.""" - -from cwl_wes.config.app_config import parse_app_config -from cwl_wes.factories.celery_app import create_celery_app -from cwl_wes.factories.connexion_app import create_connexion_app - - -# Parse app configuration -config = parse_app_config(config_var='WES_CONFIG') - -# Create Celery app -celery = create_celery_app(create_connexion_app(config).app) diff --git a/cwl_wes/config.py b/cwl_wes/config.py deleted file mode 100644 index a66aaba..0000000 --- a/cwl_wes/config.py +++ /dev/null @@ -1,36 +0,0 @@ -import os - -from foca.config.config_parser import get_conf -from cwl_wes.config.app_config import parse_app_config - -# Source the WES config for defaults -flask_config = parse_app_config(config_var='WES_CONFIG') - -# Gunicorn number of workers and threads -workers = int(os.environ.get('GUNICORN_PROCESSES', '1')) -threads = int(os.environ.get('GUNICORN_THREADS', '1')) - -forwarded_allow_ips = '*' - -# Gunicorn bind address -bind = '{address}:{port}'.format( - address=get_conf(flask_config, 'server', 'host'), - port=get_conf(flask_config, 'server', 'port'), -) - -# Source the environment variables for the Gunicorn workers -raw_env = [ - "WES_CONFIG=%s" % os.environ.get('WES_CONFIG', ''), - "RABBIT_HOST=%s" % os.environ.get( - 'RABBIT_HOST', get_conf(flask_config, 'celery', 'broker_host')), - "RABBIT_PORT=%s" % os.environ.get( - 'RABBIT_PORT', get_conf(flask_config, 'celery', 'broker_port')), - "MONGO_HOST=%s" % os.environ.get( - 'MONGO_HOST', get_conf(flask_config, 'database', 'host')), - "MONGO_PORT=%s" % os.environ.get( - 'MONGO_PORT', get_conf(flask_config, 'database', 'port')), - "MONGO_DBNAME=%s" % os.environ.get( - 'MONGO_DBNAME', get_conf(flask_config, 'database', 'name')), - "MONGO_USERNAME=%s" % os.environ.get('MONGO_USERNAME', ''), - "MONGO_PASSWORD=%s" % os.environ.get('MONGO_PASSWORD', '') -] diff --git a/cwl_wes/config.yaml b/cwl_wes/config.yaml new file mode 100644 index 0000000..c518afe --- /dev/null +++ b/cwl_wes/config.yaml @@ -0,0 +1,157 @@ +# FOCA configuration +# Available in app context as attributes of `current_app.config.foca` +# Automatically validated via FOCA +# Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html + +# Server configuration +# Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.ServerConfig +server: + host: "0.0.0.0" + port: 8080 + debug: True + environment: development + testing: False + use_reloader: True + +# Security configuration +# Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.SecurityConfig +security: + auth: + required: False + add_key_to_claims: True + algorithms: + - RS256 + allow_expired: False + audience: null + validation_methods: + - userinfo + - public_key + validation_checks: all + +# Database configuration +# Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.DBConfig +db: + host: mongodb + port: 27017 + dbs: + cwl-wes-db: + collections: + runs: + indexes: + - keys: + run_id: 1 + task_id: 1 + options: + "unique": True + "sparse": True + service_info: [] + +# API configuration +# Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.APIConfig +api: + specs: + - path: + - api/20181010.be85140.workflow_execution_service.swagger.yaml + add_security_fields: + x-apikeyInfoFunc: app.validate_token + add_operation_fields: + x-swagger-router-controller: ga4gh.wes.server + disable_auth: True + connexion: + strict_validation: True + validate_responses: False + options: + swagger_ui: True + serve_spec: True + +# Logging configuration +# Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.LogConfig +log: + version: 1 + disable_existing_loggers: False + formatters: + standard: + class: logging.Formatter + style: "{" + format: "[{asctime}: {levelname:<8}] {message} [{name}]" + long: + class: logging.Formatter + style: "{" + format: "[{asctime}: {levelname:<8}] {message} [{name}]" + handlers: + console: + class: logging.StreamHandler + level: 20 + formatter: standard + stream: ext://sys.stderr + root: + level: 10 + handlers: [console] + +# Background job configuration +# Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.JobsConfig +jobs: + host: rabbitmq + port: 5672 + backend: "rpc://" + include: + - cwl_wes.tasks.run_workflow + - cwl_wes.tasks.cancel_run + +# Exception configuration +# Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.ExceptionConfig +exceptions: + required_members: [["message"], ["code"]] + status_member: ["code"] + exceptions: cwl_wes.exceptions.exceptions + +# Custom configuration +# Available in app context as attributes of `current_app.config.foca` +custom: + storage: + permanent_dir: "/data/output" + tmp_dir: "/data/tmp" + remote_storage_url: "ftp://ftp-private.ebi.ac.uk/upload/foivos" + celery: + timeout: 0.1 + message_maxsize: 16777216 + controller: + default_page_size: 5 + timeout_cancel_run: 60 + timeout_run_workflow: null + tes_server: + url: "http://62.217.122.249:31567/" + timeout: 5 + status_query_params: "FULL" + drs_server: + port: null # use this port for resolving DRS URIs; set to `null` to use default (443) + base_path: null # use this base path for resolving DRS URIs; set to `null` to use default (`ga4gh/drs/v1`) + use_http: False # use `http` for resolving DRS URIs; set to `False` to use default (`https`) + file_types: # extensions of files to scan for DRS URI resolution + - cwl + - yaml + - yml + runs_id: + length: 6 + charset: string.ascii_uppercase + string.digits + service_info: + contact_info: "https://github.com/elixir-cloud-aai/cwl-WES" + auth_instructions_url: "https://github.com/elixir-cloud-aai/cwl-WES" + supported_filesystem_protocols: + - ftp + - https + - local + supported_wes_versions: + - 1.0.0 + - 1.0.1 + workflow_type_versions: + CWL: + workflow_type_version: + - v1.0 + - v1.1 + - v1.2 + workflow_engine_versions: + cwl-tes: 0.3.0, commit 7b44cb1 + default_workflow_engine_parameters: [] + tags: + known_tes_endpoints: "https://csc-tesk-noauth.rahtiapp.fi/swagger-ui.html|https://tesk-na.cloud.e-infra.cz/swagger-ui.html" diff --git a/cwl_wes/config/__init__.py b/cwl_wes/config/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cwl_wes/config/app_config.py b/cwl_wes/config/app_config.py deleted file mode 100644 index bf23c51..0000000 --- a/cwl_wes/config/app_config.py +++ /dev/null @@ -1,57 +0,0 @@ -"""Function for configuring a Connection app instance.""" - -import logging -import os -from typing import Optional - -from foca.config.config_parser import YAMLConfigParser - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def parse_app_config( - config_var: Optional[str] = None, - default_path: str = os.path.abspath( - os.path.join( - os.path.dirname( - os.path.realpath(__file__) - ), - 'app_config.yaml' - ) - ) -) -> YAMLConfigParser: - """Parses configuration files and adds configuration to Connexion app.""" - # Create parser instance - config = YAMLConfigParser() - - # Parse config - try: - paths = config.update_from_yaml( - config_paths=[default_path], - config_vars=[config_var], - ) - - # Abort if a config file was not found/accessible - except (FileNotFoundError, PermissionError) as e: - logger.exception( - ( - 'Config file not found. Ensure that default config file is ' - "available and accessible at '{default_path}'. If " - "'{config_var}' is set, further ensure that the file or files " - 'it points are available and accessible. Execution aborted. ' - "Original error message: {type}: {msg}" - ).format( - default_path=default_path, - config_var=config_var, - type=type(e).__name__, - msg=e, - ) - ) - raise SystemExit(1) - - else: - logger.info("App config loaded from '{paths}'.".format(paths=paths)) - - return config diff --git a/cwl_wes/config/app_config.yaml b/cwl_wes/config/app_config.yaml deleted file mode 100644 index 6deec8d..0000000 --- a/cwl_wes/config/app_config.yaml +++ /dev/null @@ -1,111 +0,0 @@ -# General server/service settings -# -# Any change in this file will be detected by gunicorn and the configuration will be reloaded. -# -server: - host: '0.0.0.0' - port: 8080 - debug: True - environment: development - testing: False - use_reloader: True - -# Security settings -security: - authorization_required: False - jwt: - add_key_to_claims: True - algorithms: - - RS256 - allow_expired: False - audience: null # list of allowed audiences or 'null' (do not validate audience) - claim_identity: sub - claim_issuer: iss - claim_key_id: kid - header_name: Authorization - token_prefix: Bearer - validation_methods: - - userinfo - - public_key - validation_checks: all # 'any' or 'all' - -# Database settings -database: - host: mongodb - port: 27017 - name: cwl-wes-db - run_id: - length: 6 - charset: string.ascii_uppercase + string.digits - -# Storage -storage: - permanent_dir: '/data/output' - tmp_dir: '/data/tmp' - remote_storage_url: 'ftp://ftp-private.ebi.ac.uk/upload/foivos' - -# Celery task queue -celery: - broker_host: rabbitmq - broker_port: 5672 - result_backend: 'rpc://' - include: - - cwl_wes.tasks.tasks.run_workflow - - cwl_wes.tasks.tasks.cancel_run - monitor: - timeout: 0.1 - message_maxsize: 16777216 - -# OpenAPI specs -api: - specs: - - path: '20181010.be85140.workflow_execution_service.swagger.yaml' - strict_validation: True - validate_responses: True - swagger_ui: True - swagger_json: True - endpoint_params: - default_page_size: 5 - timeout_cancel_run: 60 - timeout_run_workflow: Null - -# WES service info settings -service_info: - contact_info: 'https://github.com/elixir-cloud-aai/cwl-WES' - auth_instructions_url: 'https://www.elixir-europe.org/services/compute/aai' - supported_filesystem_protocols: - - ftp - - https - - local - supported_wes_versions: - - 1.0.0 - workflow_type_versions: - CWL: - workflow_type_version: - - v1.0 - workflow_engine_versions: - cwl-tes: 0.2.0 - default_workflow_engine_parameters: - - type: string - default_value: some_string - - type: int - default_value: '5' - tags: - known_tes_endpoints: 'https://tes.tsi.ebi.ac.uk/|https://tes-dev.tsi.ebi.ac.uk/|https://csc-tesk.c03.k8s-popup.csc.fi/|https://tesk.c01.k8s-popup.csc.fi/' - app_version: 0.15.0 - -# TES server -tes: - url: 'https://csc-tesk.c03.k8s-popup.csc.fi/' - timeout: 5 - status_query_params: 'FULL' - -# DRS integration -drs: - port: Null # use this port for resolving DRS URIs; set to `Null` to use default (443) - base_path: Null # use this base path for resolving DRS URIs; set to `Null` to use default (`ga4gh/drs/v1`) - use_http: False # use `http` for resolving DRS URIs; set to `False` to use default (`https`) - file_types: # extensions of files to scan for DRS URI resolution - - cwl - - yaml - - yml diff --git a/cwl_wes/config/log_config.yaml b/cwl_wes/config/log_config.yaml deleted file mode 100644 index b85dc13..0000000 --- a/cwl_wes/config/log_config.yaml +++ /dev/null @@ -1,33 +0,0 @@ -version: 1 - -disable_existing_loggers: False - -formatters: - standard: - class: logging.Formatter - style: "{" - format: "[{asctime}: {levelname:<8} {module:<18}] {message}" - - long: - class: logging.Formatter - style: "{" - format: "[{asctime}: {levelname:<8}] {message} [{name}]" - - # OTHER FORMATS - #format: "{message}" - #format: "[{asctime}] [{levelname:^8}] {message} ({name})" - #format: "{asctime}-{levelno:^2}-{name}-{module}-{funcName}: {message}" - #format: "[{asctime}: {levelname:}/{name:<36}] {message}" - #format: "[{asctime}] [{levelname:^8}] [{name}] {message} ({pathname}:{funcName})" - #datefmt: "%y-%m-%d %H:%M:%S" - -handlers: - console: - class: logging.StreamHandler - level: DEBUG - formatter: long - stream: ext://sys.stderr - -root: - level: INFO - handlers: [console] \ No newline at end of file diff --git a/cwl_wes/custom_config.py b/cwl_wes/custom_config.py new file mode 100644 index 0000000..3453ce3 --- /dev/null +++ b/cwl_wes/custom_config.py @@ -0,0 +1,355 @@ +"""Custom app config models.""" + +from pathlib import Path +import string +from typing import Dict, List, Optional + +from foca.models.config import FOCABaseConfig + +# pragma pylint: disable=too-few-public-methods + + +class StorageConfig(FOCABaseConfig): + """Model for task run and storage configuration. + + Args: + tmp_dir: Temporary run directory path + permanent_dir: Permanent working directory path + remote_storage_url: Remote file storage FTP endpoint + + Attributes: + tmp_dir: Temporary run directory path + permanent_dir: Permanent working directory path + remote_storage_url: Remote file storage FTP endpoint + + Example: + >>> StorageConfig( + ... tmp_dir='/data/tmp', + ... permanent_dir='/data/output', + ... remote_storage_url='ftp://ftp.private/upload' + ... ) + StorageConfig(tmp_dir='/data/tmp', permanent_dir='/data/output', remote + orage_url='ftp://ftp.private/upload') + """ + + permanent_dir: Path = Path("/data/output") + tmp_dir: Path = Path("/data/tmp") + remote_storage_url: str = "ftp://ftp-private.ebi.ac.uk/upload/foivos" + + +class CeleryConfig(FOCABaseConfig): + """Model for celery configurations. + + Args: + timeout: Celery task timeout. + message_maxsize: Celery message max size. + + Attributes: + timeout: Celery task timeout. + message_maxsize: Celery message max size. + + Example: + >>> CeleryConfig( + ... timeout=15, + ... message_maxsize=1024 + ... ) + CeleryConfig(timeout=15, message_maxsize=1024) + """ + + timeout: float = 0.1 + message_maxsize: int = 16777216 + + +class WorkflowTypeVersionConfig(FOCABaseConfig): + """Workflow type versions supported by this service. + + Args: + workflow_type_version: List of one or more acceptable versions for the + workflow type. + + Attributes: + workflow_type_version: List of one or more acceptable versions for the + workflow type. + + Example: + >>> WorkflowTypeVersionConfig( + ... workflow_type_version=['v1.0'] + ... ) + WorkflowTypeVersionConfig(workflow_type_version=['v1.0']) + """ + + workflow_type_version: Optional[List[str]] = [] + + +class DefaultWorkflowEngineParameterConfig(FOCABaseConfig): + """Model for default workflow engine parameters. + + Args: + name: Parameter name. + type: Parameter type. + default_value: Stringified version of default parameter. + + Attributes: + name: Parameter name. + type: Parameter type. + default_value: Stringified version of default parameter. + + Example: + >>> DefaultWorkflowEngineParameterConfig( + ... name='name', + ... type='str', + ... default_value='default' + ... ) + DefaultWorkflowEngineParameterConfig(name='name', type='str', default_v + alue='default') + """ + + name: Optional[str] + type: Optional[str] + default_value: Optional[str] + + +class TagsConfig(FOCABaseConfig): + """Model for service info tag configuration. + + Args: + known_tes_endpoints: Valid TES endpoints. + + Attributes: + known_tes_endpoints: Valid TES endpoints. + + Example: + >>> TagsConfig( + ... known_tes_endpoints='https://tes.endpoint', + ... ) + TagsConfig(known_tes_endpoints='https://tes.endpoint') + """ + + known_tes_endpoints: str + + +class ServiceInfoConfig(FOCABaseConfig): + """Model for service info configurations. + + Args: + contact_info: Email address/webpage URL with contact information. + auth_instructions_url: Web page URL with information about how to get + an authorization token necessary to use a specific endpoint. + supported_filesystem_protocols: Filesystem protocols supported by this + service. + supported_wes_versions: Version(s) of the WES schema supported by this + service. + workflow_type_versions: Map with keys as the workflow format type name + and value is a `WorkflowTypeVersionConfig` object which simply + contains an array of one or more version strings. + workflow_engine_versions: Workflow engine(s) used by this WES service. + default_workflow_engine_parameters: Each workflow engine can present + additional parameters that can be sent to the workflow engine. + tags: A key-value map of arbitrary, extended metadata outside the scope + of the above but useful to report back. + + Attributes: + contact_info: Email address/webpage URL with contact information. + auth_instructions_url: Web page URL with information about how to get + an authorization token necessary to use a specific endpoint. + supported_filesystem_protocols: Filesystem protocols supported by this + service. + supported_wes_versions: Version(s) of the WES schema supported by this + service. + workflow_type_versions: Map with keys as the workflow format type name + and value is a `WorkflowTypeVersionConfig` object which simply + contains an array of one or more version strings. + workflow_engine_versions: Workflow engine(s) used by this WES service. + default_workflow_engine_parameters: Each workflow engine can present + additional parameters that can be sent to the workflow engine. + tags: A key-value map of arbitrary, extended metadata outside the scope + of the above but useful to report back. + + Example: + >>> ServiceInfoConfig( + ... contact_info='https://contact.url', + ... auth_instructions_url='https://auth.url', + ... supported_filesystem_protocols=['ftp', 'https', 'local'], + ... supported_wes_versions=['1.0.0'], + ... workflow_type_versions={ + ... 'CWL': WorkflowTypeVersionConfig( + ... workflow_type_version=['v1.0'] + ... ) + ... }, + ... workflow_engine_versions={}, + ... default_workflow_engine_parameters=[], + ... tags=TagsConfig(known_tes_endpoints='https://tes.endpoint/') + ... ) + ServiceInfoConfig(contact_info='https://github.com/elixir-cloud-aai/cwl + -WES', auth_instructions_url='https://www.elixir-europe.org/services/co + mpute/aai', supported_filesystem_protocols=['ftp', 'https', 'local'], s + upported_wes_versions=['1.0.0'], workflow_type_versions={'CWL': Workflo + wTypeVersionConfig(workflow_type_version=['v1.0'])}, workflow_engine_ve + rsions={}, default_workflow_engine_parameters=[], tags=TagsConfig(known + _tes_endpoints='https://tes.endpoint/')) + """ + + contact_info: str = "https://github.com/elixir-cloud-aai/cwl-WES" + auth_instructions_url: str = ( + "https://www.elixir-europe.org/services/compute/aai" + ) + supported_filesystem_protocols: List[str] = ["ftp", "https", "local"] + supported_wes_versions: List[str] = ["1.0.0"] + workflow_type_versions: Dict[str, WorkflowTypeVersionConfig] = { + "CWL": WorkflowTypeVersionConfig(workflow_type_version=["v1.0"]), + } + workflow_engine_versions: Dict[str, str] = {} + default_workflow_engine_parameters: List[ + DefaultWorkflowEngineParameterConfig + ] = [] + tags: TagsConfig + + +class TesServerConfig(FOCABaseConfig): + """Model for TES server configuration. + + Args: + url: TES Endpoint URL. + timeout: Request time out. + status_query_params: Request query parameters. + + Attributes: + url: TES Endpoint URL. + timeout: Request time out. + status_query_params: Request query parameters. + + Example: + >>> TesServerConfig( + ... url='https://tes.endpoint', + ... timeout=5, + ... status_query_params='FULL' + ... ) + TesServerConfig(url='https://tes.endpoint', timeout=5, status_query_par + ams='FULL') + """ + + url: str + timeout: int = 5 + status_query_params: str = "FULL" + + +class DRSServerConfig(FOCABaseConfig): + """Model for DRS server configuration. + + Args: + port: Port for resolving DRS URIs; + set to `null` to use default (443). + base_path: Base path for resolving DRS URIs; + set to `null` to use default (`ga4gh/drs/v1`). + use_http: Use `http` for resolving DRS URIs; + set to `False` to use default (`https`). + file_types: Extensions of files to scan for DRS URI resolution. + + Attributes: + port: Port for resolving DRS URIs; + set to `null` to use default (443). + base_path: Base path for resolving DRS URIs; + set to `null` to use default (`ga4gh/drs/v1`). + use_http: Use `http` for resolving DRS URIs; + set to `False` to use default (`https`). + file_types: Extensions of files to scan for DRS URI resolution. + + Example: + >>> DRSServerConfig( + ... port=443, + ... base_path='ga4gh/drs/v1', + ... use_http=False, + ... file_types=['cwl', 'yaml', 'yml'] + ... ) + DRSServerConfig(port=443, base_path='ga4gh/drs/v1', use_http=False, fil + e_types=['cwl', 'yaml', 'yml']) + """ + + port: Optional[int] = None + base_path: Optional[str] = None + use_http: bool = False + file_types: List[str] = ["cwl", "yaml", "yml"] + + +class IdConfig(FOCABaseConfig): + """Model for defining unique identifier for services on cloud registry. + + Args: + charset: A string of allowed characters or an expression evaluating to + a string of allowed characters. + length: Length of returned string. + + Attributes: + charset: A string of allowed characters or an expression evaluating to + a string of allowed characters. + length: Length of returned string. + + Example: + >>> IdConfig( + ... charset='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', + ... length=6 + ... ) + IdConfig(charset='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', length=6) + """ + + length: int = 6 + charset: str = string.ascii_uppercase + string.digits + + +class ControllerConfig(FOCABaseConfig): + """Model for controller configurations. + + Args: + default_page_size: Pagination page size. + timeout_cancel_run: Timeout for `cancel_run` workflow. + timeout_run_workflow: Timeout for `run_workflow` workflow. + tes_server: TES Server config parameters. + drs_server: DRS Server config parameters. + runs_id: Identifier config parameters. + + Attributes: + default_page_size: Pagination page size. + timeout_cancel_run: Timeout for `cancel_run` workflow. + timeout_run_workflow: Timeout for `run_workflow` workflow. + tes_server: TES Server config parameters. + drs_server: DRS Server config parameters. + runs_id: Identifier config parameters. + + Example: + >>> ControllerConfig( + ... default_page_size=5, + ... timeout_cancel_run=60, + ... timeout_run_workflow=None + ... ) + ControllerConfig(default_page_size=5, timeout_cancel_run=60, timeout_ru + n_workflow=60) + """ + + default_page_size: int = 5 + timeout_cancel_run: int = 60 + timeout_run_workflow: Optional[int] = None + tes_server: TesServerConfig + drs_server: DRSServerConfig = DRSServerConfig() + runs_id: IdConfig = IdConfig() + + +class CustomConfig(FOCABaseConfig): + """Model for custom configuration parameters. + + Args: + storage: Storage config parameters. + celery: Celery config parameters. + controller: Controller config parameters. + service_info: Service Info config parameters. + + Attributes: + storage: Storage config parameters. + celery: Celery config parameters. + controller: Controller config parameters. + service_info: Service Info config parameters. + """ + + storage: StorageConfig = StorageConfig() + celery: CeleryConfig = CeleryConfig() + controller: ControllerConfig + service_info: ServiceInfoConfig diff --git a/cwl_wes/database/__init__.py b/cwl_wes/database/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cwl_wes/database/db_utils.py b/cwl_wes/database/db_utils.py deleted file mode 100644 index fe1d409..0000000 --- a/cwl_wes/database/db_utils.py +++ /dev/null @@ -1,96 +0,0 @@ -"""Utility functions for MongoDB document insertion, updates and retrieval.""" - -from typing import (Any, List, Mapping, Optional) - -from bson.objectid import ObjectId -from pymongo.collection import ReturnDocument -from pymongo import collection as Collection - - -def find_one_latest(collection: Collection) -> Optional[Mapping[Any, Any]]: - """Returns newest/latest object, stripped of the object id, or None if no - object exists: collection. - """ - try: - return collection.find( - {}, - {'_id': False} - ).sort([('_id', -1)]).limit(1).next() - except StopIteration: - return None - - -def find_id_latest(collection: Collection) -> Optional[ObjectId]: - """Returns object id of newest/latest object, or None if no object exists. - """ - try: - return collection.find().sort([('_id', -1)]).limit(1).next()['_id'] - except StopIteration: - return None - - -def update_run_state( - collection: Collection, - task_id: str, - state: str = 'UNKNOWN' -) -> Optional[Mapping[Any, Any]]: - """Updates state of workflow run and returns document.""" - return collection.find_one_and_update( - {'task_id': task_id}, - {'$set': {'api.state': state}}, - return_document=ReturnDocument.AFTER - ) - - -def upsert_fields_in_root_object( - collection: Collection, - task_id: str, - root: str, - **kwargs -) -> Optional[Mapping[Any, Any]]: - """Inserts (or updates) fields in(to) the same root (object) field and - returns document. - """ - return collection.find_one_and_update( - {'task_id': task_id}, - {'$set': { - '.'.join([root, key]): - value for (key, value) in kwargs.items() - }}, - return_document=ReturnDocument.AFTER - ) - - -def update_tes_task_state( - collection: Collection, - task_id: str, - tes_id: str, - state: str -) -> Optional[Mapping[Any, Any]]: - """Updates `state` field in TES task log and returns updated document.""" - return collection.find_one_and_update( - {'task_id': task_id, 'api.task_logs': {'$elemMatch': {'id': tes_id}}}, - {'$set': {'api.task_logs.$.state': state}}, - return_document=ReturnDocument.AFTER - ) - - -def append_to_tes_task_logs( - collection: Collection, - task_id: str, - tes_log: Mapping, -) -> Optional[Mapping[Any, Any]]: - """Appends task log to TES task logs and returns updated document.""" - return collection.find_one_and_update( - {'task_id': task_id}, - {'$push': {'api.task_logs': tes_log}}, - return_document=ReturnDocument.AFTER - ) - - -def find_tes_task_ids( - collection: Collection, - run_id: str -) -> List: - """Get list of TES task ids associated with a run of interest.""" - return collection.distinct('api.task_logs.id', {'run_id': run_id}) diff --git a/cwl_wes/database/register_mongodb.py b/cwl_wes/database/register_mongodb.py deleted file mode 100644 index f667181..0000000 --- a/cwl_wes/database/register_mongodb.py +++ /dev/null @@ -1,100 +0,0 @@ -"""Function for Registering MongoDB with a Flask app instance.""" - -import os - -import logging -from typing import Dict - -from flask import Flask -from flask_pymongo import ASCENDING, PyMongo - -from foca.config.config_parser import get_conf -from cwl_wes.ga4gh.wes.endpoints.get_service_info import get_service_info - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def register_mongodb(app: Flask) -> Flask: - """Instantiates database and initializes collections.""" - config = app.config - - # Instantiante PyMongo client - mongo = create_mongo_client( - app=app, - config=config, - ) - - # Add database - db = mongo.db[os.environ.get( - 'MONGO_DBNAME', get_conf(config, 'database', 'name'))] - - # Add database collection for '/service-info' - collection_service_info = mongo.db['service-info'] - logger.debug("Added database collection 'service_info'.") - - # Add database collection for '/runs' - collection_runs = mongo.db['runs'] - collection_runs.create_index([ - ('run_id', ASCENDING), - ('task_id', ASCENDING), - ], - unique=True, - sparse=True - ) - logger.debug("Added database collection 'runs'.") - - # Add database and collections to app config - config['database']['database'] = db - config['database']['collections'] = dict() - config['database']['collections']['runs'] = collection_runs - config['database']['collections']['service_info'] = collection_service_info - app.config = config - - # Initialize service info - logger.debug('Initializing service info...') - get_service_info(config, silent=True) - - return app - - -def create_mongo_client( - app: Flask, - config: Dict, -): - """Register MongoDB uri and credentials.""" - if os.environ.get('MONGO_USERNAME') != '': - auth = '{username}:{password}@'.format( - username=os.environ.get('MONGO_USERNAME'), - password=os.environ.get('MONGO_PASSWORD'), - ) - else: - auth = '' - - app.config['MONGO_URI'] = 'mongodb://{auth}{host}:{port}/{dbname}'.format( - host=os.environ.get('MONGO_HOST', get_conf( - config, 'database', 'host')), - port=os.environ.get('MONGO_PORT', get_conf( - config, 'database', 'port')), - dbname=os.environ.get('MONGO_DBNAME', get_conf( - config, 'database', 'name')), - auth=auth - ) - - """Instantiate MongoDB client.""" - mongo = PyMongo(app) - logger.info( - ( - "Registered database '{name}' at URI '{uri}':'{port}' with Flask " - 'application.' - ).format( - name=os.environ.get('MONGO_DBNAME', get_conf( - config, 'database', 'name')), - uri=os.environ.get('MONGO_HOST', get_conf( - config, 'database', 'host')), - port=os.environ.get('MONGO_PORT', get_conf( - config, 'database', 'port')) - ) - ) - return mongo diff --git a/cwl_wes/errors/__init__.py b/cwl_wes/errors/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cwl_wes/errors/errors.py b/cwl_wes/errors/errors.py deleted file mode 100644 index ce6e7c6..0000000 --- a/cwl_wes/errors/errors.py +++ /dev/null @@ -1,97 +0,0 @@ -"""Custom errors, error handler functions and function to register error -handlers with a Connexion app instance.""" - -import logging - -from connexion import App, ProblemException -from connexion.exceptions import ( - ExtraParameterProblem, - Forbidden, - Unauthorized -) -from flask import Response -from json import dumps -from werkzeug.exceptions import (BadRequest, InternalServerError, NotFound) - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def register_error_handlers(app: App) -> App: - """Adds custom handlers for exceptions to Connexion app instance.""" - # Add error handlers - app.add_error_handler(BadRequest, handle_bad_request) - app.add_error_handler(ExtraParameterProblem, handle_bad_request) - app.add_error_handler(Forbidden, __handle_forbidden) - app.add_error_handler(InternalServerError, __handle_internal_server_error) - app.add_error_handler(Unauthorized, __handle_unauthorized) - app.add_error_handler(WorkflowNotFound, __handle_workflow_not_found) - logger.info('Registered custom error handlers with Connexion app.') - - # Return Connexion app instance - return app - - -# CUSTOM ERRORS -class WorkflowNotFound(ProblemException, NotFound): - """WorkflowNotFound(404) error compatible with Connexion.""" - - def __init__(self, title=None, **kwargs): - super(WorkflowNotFound, self).__init__(title=title, **kwargs) - - -# CUSTOM ERROR HANDLERS -def handle_bad_request(exception: Exception) -> Response: - return Response( - response=dumps({ - 'msg': 'The request is malformed.', - 'status_code': '400' - }), - status=400, - mimetype="application/problem+json" - ) - - -def __handle_unauthorized(exception: Exception) -> Response: - return Response( - response=dumps({ - 'msg': 'The request is unauthorized.', - 'status_code': '401' - }), - status=401, - mimetype="application/problem+json" - ) - - -def __handle_forbidden(exception: Exception) -> Response: - return Response( - response=dumps({ - 'msg': 'The requester is not authorized to perform this action.', - 'status_code': '403' - }), - status=403, - mimetype="application/problem+json" - ) - - -def __handle_workflow_not_found(exception: Exception) -> Response: - return Response( - response=dumps({ - 'msg': 'The requested workflow run wasn\'t found.', - 'status_code': '404' - }), - status=404, - mimetype="application/problem+json" - ) - - -def __handle_internal_server_error(exception: Exception) -> Response: - return Response( - response=dumps({ - 'msg': 'An unexpected error occurred.', - 'status_code': '500' - }), - status=500, - mimetype="application/problem+json" - ) diff --git a/cwl_wes/exceptions.py b/cwl_wes/exceptions.py new file mode 100644 index 0000000..abccce5 --- /dev/null +++ b/cwl_wes/exceptions.py @@ -0,0 +1,59 @@ +"""cwl-WES exceptions.""" + +from connexion.exceptions import ( + BadRequestProblem, + ExtraParameterProblem, + Forbidden, + Unauthorized, + ProblemException, +) +from pydantic import ValidationError +from werkzeug.exceptions import BadRequest, InternalServerError, NotFound + + +class WorkflowNotFound(ProblemException, NotFound): + """WorkflowNotFound(404) error compatible with Connexion.""" + + +exceptions = { + Exception: { + "message": "An unexpected error occurred.", + "code": "500", + }, + BadRequest: { + "message": "The request is malformed.", + "code": "400", + }, + BadRequestProblem: { + "message": "The request is malformed.", + "code": "400", + }, + ExtraParameterProblem: { + "message": "The request is malformed.", + "code": "400", + }, + ValidationError: { + "message": "The request is malformed.", + "code": "400", + }, + Unauthorized: { + "message": " The request is unauthorized.", + "code": "401", + }, + Forbidden: { + "message": "The requester is not authorized to perform this action.", + "code": "403", + }, + NotFound: { + "message": "The requested resource wasn't found.", + "code": "404", + }, + InternalServerError: { + "message": "An unexpected error occurred.", + "code": "500", + }, + WorkflowNotFound: { + "message": "The requested workflow run wasn't found.", + "code": "404", + }, +} diff --git a/cwl_wes/factories/__init__.py b/cwl_wes/factories/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cwl_wes/factories/celery_app.py b/cwl_wes/factories/celery_app.py deleted file mode 100644 index 53f2363..0000000 --- a/cwl_wes/factories/celery_app.py +++ /dev/null @@ -1,57 +0,0 @@ -"""Factory for creating Celery app instances based on Flask apps.""" - -import os - -from inspect import stack -import logging - -from flask import Flask -from celery import Celery - -from foca.config.config_parser import (get_conf, get_conf_type) - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def create_celery_app(app: Flask) -> Celery: - """Creates Celery application and configures it from Flask app.""" - broker = 'pyamqp://{host}:{port}//'.format( - host=os.environ.get('RABBIT_HOST', get_conf(app.config, 'celery', 'broker_host')), - port=os.environ.get('RABBIT_PORT', get_conf(app.config, 'celery', 'broker_port')), - ) - backend = get_conf(app.config, 'celery', 'result_backend') - include = get_conf_type(app.config, 'celery', 'include', types=(list)) - maxsize = get_conf(app.config, 'celery', 'message_maxsize') - - # Instantiate Celery app - celery = Celery( - app=__name__, - broker=broker, - backend=backend, - include=include, - ) - logger.info("Celery app created from '{calling_module}'.".format( - calling_module=':'.join([stack()[1].filename, stack()[1].function]) - )) - - # Set Celery options - celery.Task.resultrepr_maxsize = maxsize - celery.amqp.argsrepr_maxsize = maxsize - celery.amqp.kwargsrepr_maxsize = maxsize - - # Update Celery app configuration with Flask app configuration - celery.conf.update(app.config) - logger.info('Celery app configured.') - - class ContextTask(celery.Task): # type: ignore - # https://github.com/python/mypy/issues/4284) - def __call__(self, *args, **kwargs): - with app.app_context(): - return self.run(*args, **kwargs) - - celery.Task = ContextTask - logger.debug("App context added to 'celery.Task' class.") - - return celery diff --git a/cwl_wes/factories/connexion_app.py b/cwl_wes/factories/connexion_app.py deleted file mode 100644 index a59e669..0000000 --- a/cwl_wes/factories/connexion_app.py +++ /dev/null @@ -1,73 +0,0 @@ -"""Factory for creating and configuring Connexion app instances.""" - -from inspect import stack -import logging -from typing import (Mapping, Optional) - -from connexion import App - -from cwl_wes.errors.errors import handle_bad_request -from foca.config.config_parser import get_conf - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def create_connexion_app(config: Optional[Mapping] = None) -> App: - """Creates and configure Connexion app.""" - # Instantiate Connexion app - app = App(__name__) - logger.info("Connexion app created from '{calling_module}'.".format( - calling_module=':'.join([stack()[1].filename, stack()[1].function]) - )) - - # Workaround for adding a custom handler for `connexion.problem` responses - # Responses from request and paramater validators are not raised and - # cannot be intercepted by `add_error_handler`; see here: - # https://github.com/zalando/connexion/issues/138 - @app.app.after_request - def _rewrite_bad_request(response): - if ( - response.status_code == 400 and - response.data.decode('utf-8').find('"title":') is not None - ): - response = handle_bad_request(400) - return response - - # Configure Connexion app - if config is not None: - app = __add_config_to_connexion_app( - app=app, - config=config, - ) - - return app - - -def __add_config_to_connexion_app( - app: App, - config: Mapping -) -> App: - """Adds configuration to Flask app and replaces default Connexion and Flask - settings.""" - # Replace Connexion app settings - app.host = get_conf(config, 'server', 'host') - app.port = get_conf(config, 'server', 'port') - app.debug = get_conf(config, 'server', 'debug') - - # Replace Flask app settings - app.app.config['DEBUG'] = app.debug - app.app.config['ENV'] = get_conf(config, 'server', 'environment') - app.app.config['TESTING'] = get_conf(config, 'server', 'testing') - - # Log Flask config - logger.debug('Flask app settings:') - for (key, value) in app.app.config.items(): - logger.debug('* {}: {}'.format(key, value)) - - # Add user configuration to Flask app config - app.app.config.update(config) - - logger.info('Connexion app configured.') - return app diff --git a/cwl_wes/ga4gh/__init__.py b/cwl_wes/ga4gh/__init__.py index e69de29..cb92431 100644 --- a/cwl_wes/ga4gh/__init__.py +++ b/cwl_wes/ga4gh/__init__.py @@ -0,0 +1 @@ +"""Controllers superpackage.""" diff --git a/cwl_wes/ga4gh/wes/__init__.py b/cwl_wes/ga4gh/wes/__init__.py index e69de29..dd66766 100644 --- a/cwl_wes/ga4gh/wes/__init__.py +++ b/cwl_wes/ga4gh/wes/__init__.py @@ -0,0 +1 @@ +"""cwl-WES controllers package.""" diff --git a/cwl_wes/ga4gh/wes/endpoints/__init__.py b/cwl_wes/ga4gh/wes/endpoints/__init__.py index e69de29..856f49d 100644 --- a/cwl_wes/ga4gh/wes/endpoints/__init__.py +++ b/cwl_wes/ga4gh/wes/endpoints/__init__.py @@ -0,0 +1 @@ +"""cwl-WES controllers helper functions.""" diff --git a/cwl_wes/ga4gh/wes/endpoints/cancel_run.py b/cwl_wes/ga4gh/wes/endpoints/cancel_run.py deleted file mode 100644 index a94d4c0..0000000 --- a/cwl_wes/ga4gh/wes/endpoints/cancel_run.py +++ /dev/null @@ -1,92 +0,0 @@ -"""Utility functions for POST /runs/{run_id}/cancel endpoints.""" - -import logging -from typing import Dict - -from celery import (Celery, uuid) -from connexion.exceptions import Forbidden - -from foca.config.config_parser import get_conf -from cwl_wes.errors.errors import WorkflowNotFound -from cwl_wes.ga4gh.wes.states import States -from cwl_wes.tasks.tasks.cancel_run import task__cancel_run - - -# Get logger instance -logger = logging.getLogger(__name__) - - -# Utility function for endpoint POST /runs//delete -def cancel_run( - config: Dict, - celery_app: Celery, - run_id: str, - *args, - **kwargs -) -> Dict: - """Cancels running workflow.""" - collection_runs = get_conf(config, 'database', 'collections', 'runs') - document = collection_runs.find_one( - filter={'run_id': run_id}, - projection={ - 'user_id': True, - 'task_id': True, - 'api.state': True, - '_id': False, - } - ) - - # Raise error if workflow run was not found - if not document: - logger.error("Run '{run_id}' not found.".format(run_id=run_id)) - raise WorkflowNotFound - - # Raise error trying to access workflow run that is not owned by user - # Only if authorization enabled - if 'user_id' in kwargs and document['user_id'] != kwargs['user_id']: - logger.error( - ( - "User '{user_id}' is not allowed to access workflow run " - "'{run_id}'." - ).format( - user_id=kwargs['user_id'], - run_id=run_id, - ) - ) - raise Forbidden - - # Cancel unfinished workflow run in background - if document['api']['state'] in States.CANCELABLE: - - # Get timeout duration - timeout_duration = get_conf( - config, - 'api', - 'endpoint_params', - 'timeout_cancel_run', - ) - - # Execute cancelation task in background - task_id = uuid() - logger.info( - ( - "Canceling run '{run_id}' as background task " - "'{task_id}'..." - ).format( - run_id=run_id, - task_id=task_id, - ) - ) - task__cancel_run.apply_async( - None, - { - 'run_id': run_id, - 'task_id': document['task_id'], - 'token': kwargs.get('jwt'), - }, - task_id=task_id, - soft_time_limit=timeout_duration, - ) - - response = {'run_id': run_id} - return response diff --git a/cwl_wes/ga4gh/wes/endpoints/get_run_log.py b/cwl_wes/ga4gh/wes/endpoints/get_run_log.py deleted file mode 100644 index e618773..0000000 --- a/cwl_wes/ga4gh/wes/endpoints/get_run_log.py +++ /dev/null @@ -1,55 +0,0 @@ -"""Utility function for GET /runs/{run_id} endpoint.""" - -from connexion.exceptions import Forbidden -import logging - -from typing import Dict - -from foca.config.config_parser import get_conf -from cwl_wes.errors.errors import WorkflowNotFound - - -# Get logger instance -logger = logging.getLogger(__name__) - - -# Utility function for endpoint GET /runs/ -def get_run_log( - config: Dict, - run_id: str, - *args, - **kwargs -) -> Dict: - """Gets detailed log information for specific run.""" - collection_runs = get_conf(config, 'database', 'collections', 'runs') - document = collection_runs.find_one( - filter={'run_id': run_id}, - projection={ - 'user_id': True, - 'api': True, - '_id': False, - } - ) - - # Raise error if workflow run was not found or has no task ID - if document: - run_log = document['api'] - else: - logger.error("Run '{run_id}' not found.".format(run_id=run_id)) - raise WorkflowNotFound - - # Raise error trying to access workflow run that is not owned by user - # Only if authorization enabled - if 'user_id' in kwargs and document['user_id'] != kwargs['user_id']: - logger.error( - ( - "User '{user_id}' is not allowed to access workflow run " - "'{run_id}'." - ).format( - user_id=kwargs['user_id'], - run_id=run_id, - ) - ) - raise Forbidden - - return run_log diff --git a/cwl_wes/ga4gh/wes/endpoints/get_run_status.py b/cwl_wes/ga4gh/wes/endpoints/get_run_status.py deleted file mode 100644 index dc67a0c..0000000 --- a/cwl_wes/ga4gh/wes/endpoints/get_run_status.py +++ /dev/null @@ -1,59 +0,0 @@ -"""Utility function for GET /runs/{run_id}/status endpoint.""" - -from connexion.exceptions import Forbidden -import logging - -from typing import Dict - -from foca.config.config_parser import get_conf -from cwl_wes.errors.errors import WorkflowNotFound - - -# Get logger instance -logger = logging.getLogger(__name__) - - -# Utility function for endpoint GET /runs//status -def get_run_status( - config: Dict, - run_id: str, - *args, - **kwargs -) -> Dict: - """Gets status information for specific run.""" - collection_runs = get_conf(config, 'database', 'collections', 'runs') - document = collection_runs.find_one( - filter={'run_id': run_id}, - projection={ - 'user_id': True, - 'api.state': True, - '_id': False, - } - ) - - # Raise error if workflow run was not found or has no task ID - if document: - state = document['api']['state'] - else: - logger.error("Run '{run_id}' not found.".format(run_id=run_id)) - raise WorkflowNotFound - - # Raise error trying to access workflow run that is not owned by user - # Only if authorization enabled - if 'user_id' in kwargs and document['user_id'] != kwargs['user_id']: - logger.error( - ( - "User '{user_id}' is not allowed to access workflow run " - "'{run_id}'." - ).format( - user_id=kwargs['user_id'], - run_id=run_id, - ) - ) - raise Forbidden - - response = { - 'run_id': run_id, - 'state': state - } - return response diff --git a/cwl_wes/ga4gh/wes/endpoints/get_service_info.py b/cwl_wes/ga4gh/wes/endpoints/get_service_info.py deleted file mode 100644 index ee94746..0000000 --- a/cwl_wes/ga4gh/wes/endpoints/get_service_info.py +++ /dev/null @@ -1,82 +0,0 @@ -"""Utility functions for GET /service-info endpoint.""" - -from copy import deepcopy -from datetime import datetime -import logging -from typing import (Any, Dict, Mapping) - -from pymongo import collection as Collection - -import cwl_wes.database.db_utils as db_utils -from cwl_wes.ga4gh.wes.states import States - - -# Get logger instance -logger = logging.getLogger(__name__) - - -# Helper function GET /service-info -def get_service_info( - config: Mapping, - silent: bool = False, - *args: Any, - **kwarg: Any -): - """Returns readily formatted service info or `None` (in silent mode); - creates service info database document if it does not exist.""" - collection_service_info = config['database']['collections']['service_info'] - collection_runs = config['database']['collections']['runs'] - service_info = deepcopy(config['service_info']) - - # Write current service info to database if absent or different from latest - if not service_info == db_utils.find_one_latest(collection_service_info): - collection_service_info.insert(service_info) - logger.info('Updated service info: {service_info}'.format( - service_info=service_info, - )) - else: - logger.debug('No change in service info. Not updated.') - - # Return None when called in silent mode: - if silent: - return None - - # Add current system state counts - service_info['system_state_counts'] = __get_system_state_counts( - collection_runs - ) - - # Add timestamps - _id = db_utils.find_id_latest(collection_service_info) - if _id: - service_info['tags']['last_service_info_update'] = _id.generation_time - service_info['tags']['current_time'] = datetime.utcnow().isoformat() - - return service_info - - -def __get_system_state_counts(collection: Collection) -> Dict[str, int]: - """Gets current system state counts.""" - current_counts = __init_system_state_counts() - - # Query database for workflow run states - cursor = collection.find( - filter={}, - projection={ - 'api.state': True, - '_id': False, - } - ) - - # Iterate over states and increase counter - for record in cursor: - current_counts[record['api']['state']] += 1 - - return current_counts - - -def __init_system_state_counts() -> Dict[str, int]: - """Initializes system state counts.""" - # TODO: Get states programmatically or define as enum - # Set all state counts to zero - return {state: 0 for state in States.ALL} diff --git a/cwl_wes/ga4gh/wes/endpoints/list_runs.py b/cwl_wes/ga4gh/wes/endpoints/list_runs.py deleted file mode 100644 index c45ce50..0000000 --- a/cwl_wes/ga4gh/wes/endpoints/list_runs.py +++ /dev/null @@ -1,85 +0,0 @@ -"""Utility function for GET /runs endpoint.""" -import logging -from typing import Dict - -from bson.objectid import ObjectId - -from foca.config.config_parser import get_conf - - -# Get logger instance -logger = logging.getLogger(__name__) - - -# Utility function for endpoint GET /runs -def list_runs( - config: Dict, - *args, - **kwargs -) -> Dict: - """Lists IDs and status for all workflow runs.""" - collection_runs = get_conf(config, 'database', 'collections', 'runs') - - # Fall back to default page size if not provided by user - if 'page_size' in kwargs: - page_size = kwargs['page_size'] - else: - page_size = ( - config - ['api'] - ['endpoint_params'] - ['default_page_size'] - ) - - # Extract/set page token - if 'page_token' in kwargs: - page_token = kwargs['page_token'] - else: - page_token = '' - - # Initialize filter dictionary - filter_dict = {} - - # Add filter for user-owned runs if user ID is available - if 'user_id' in kwargs: - filter_dict['user_id'] = kwargs['user_id'] - - # Add pagination filter based on last object ID - if page_token != '': - filter_dict['_id'] = {'$lt': ObjectId(page_token)} - - # Query database for workflow runs - cursor = collection_runs.find( - filter=filter_dict, - projection={ - 'run_id': True, - 'api.state': True, - } - # Sort results by descending object ID (+/- newest to oldest) - ).sort( - '_id', -1 - # Implement page size limit - ).limit( - page_size - ) - - # Convert cursor to list - runs_list = list(cursor) - - # Get next page token from ID of last run in cursor - if runs_list: - next_page_token = str(runs_list[-1]['_id']) - else: - next_page_token = '' - - # Reshape list of runs - for run in runs_list: - del run['_id'] - run['state'] = run['api']['state'] - del run['api'] - - # Build and return response - return { - 'next_page_token': next_page_token, - 'runs': runs_list - } diff --git a/cwl_wes/ga4gh/wes/endpoints/run_workflow.py b/cwl_wes/ga4gh/wes/endpoints/run_workflow.py index 3c23c9b..33dda8a 100644 --- a/cwl_wes/ga4gh/wes/endpoints/run_workflow.py +++ b/cwl_wes/ga4gh/wes/endpoints/run_workflow.py @@ -1,29 +1,27 @@ """Utility functions for POST /runs endpoint.""" +from json import decoder, loads import logging -import os +from pathlib import Path import re import shutil -import string # noqa: F401 import subprocess +from typing import Dict from celery import uuid -from flask import current_app -from json import (decoder, loads) -from pymongo.errors import DuplicateKeyError -from random import choice -from typing import (Dict, List, Optional) +from flask import Config, request +from foca.utils.misc import generate_id +from pymongo.collection import Collection +from pymongo.errors import DuplicateKeyError, PyMongoError from yaml import dump from werkzeug.datastructures import ImmutableMultiDict from werkzeug.utils import secure_filename -from flask import request - -from foca.config.config_parser import (get_conf, get_conf_type) -from cwl_wes.errors.errors import BadRequest -from cwl_wes.tasks.tasks.run_workflow import task__run_workflow -from cwl_wes.ga4gh.wes.endpoints.utils.drs import translate_drs_uris +from cwl_wes.exceptions import BadRequest +from cwl_wes.tasks.run_workflow import task__run_workflow +from cwl_wes.utils.drs import translate_drs_uris +# pragma pylint: disable=unused-argument # Get logger instance logger = logging.getLogger(__name__) @@ -31,12 +29,19 @@ # Utility function for endpoint POST /runs def run_workflow( - config: Dict, - form_data: ImmutableMultiDict, - *args, - **kwargs + config: Config, form_data: ImmutableMultiDict, *args, **kwargs ) -> Dict: - """Executes workflow and save info to database; returns unique run id.""" + """Execute workflow and save info to database. + + Args: + config: Flask configuration object. + form_data: Form data from POST /runs request. + *args: Variable length argument list. + **kwargs: Arbitrary keyword arguments. + + Returns: + Unique run id. + """ # Validate data and prepare run environment form_data_dict = __immutable_multi_dict_to_nested_dict( multi_dict=form_data @@ -45,34 +50,44 @@ def run_workflow( __check_service_info_compatibility(data=form_data_dict) document = __init_run_document(data=form_data_dict) document = __create_run_environment( - config=config, - document=document, - **kwargs + config=config, document=document, **kwargs ) # Start workflow run in background - __run_workflow( - config=config, - document=document, - **kwargs - ) + __run_workflow(config=config, document=document, **kwargs) - response = {'run_id': document['run_id']} + response = {"run_id": document["run_id"]} return response -def __secure_join(basedir: str, fname: str) -> str: +def __secure_join(basedir: Path, fname: str) -> Path: + """Generate a secure path for a file. + + Args: + basedir: Base directory. + fname: Filename. + + Returns: + Secure path. + """ fname = secure_filename(fname) if not fname: # Replace by a random filename fname = uuid() - return os.path.join(basedir, fname) + return basedir / fname def __immutable_multi_dict_to_nested_dict( - multi_dict: ImmutableMultiDict + multi_dict: ImmutableMultiDict, ) -> Dict: - """Converts ImmutableMultiDict to nested dictionary.""" + """Convert ImmutableMultiDict to nested dictionary. + + Args: + multi_dict: Immutable multi dictionary. + + Returns: + Nested dictionary. + """ # Convert to flat dictionary nested_dict = multi_dict.to_dict(flat=True) for key in nested_dict: @@ -85,9 +100,13 @@ def __immutable_multi_dict_to_nested_dict( def __validate_run_workflow_request(data: Dict) -> None: - """Validates presence and types of workflow run request form data; sets - defaults for optional fields.""" + """Validate workflow run request form data. + Set defaults for optional fields. + + Args: + data: Workflow run request form data. + """ # The form data is not validated properly because all types except # 'workflow_attachment' are string and none are labeled as required # Considering the 'RunRequest' model in the specs, the following @@ -115,20 +134,20 @@ def __validate_run_workflow_request(data: Dict) -> None: # required = False params_required = { - 'workflow_params', - 'workflow_type', - 'workflow_type_version', - 'workflow_url', + "workflow_params", + "workflow_type", + "workflow_type_version", + "workflow_url", } params_str = [ - 'workflow_type', - 'workflow_type_version', - 'workflow_url', + "workflow_type", + "workflow_type_version", + "workflow_url", ] params_dict = [ - 'workflow_params', - 'workflow_engine_parameters', - 'tags', + "workflow_params", + "workflow_engine_parameters", + "tags", ] # Raise error if any required params are missing @@ -153,81 +172,89 @@ def __validate_run_workflow_request(data: Dict) -> None: invalid = True if invalid: - logger.error('POST request does not conform to schema.') + logger.error("POST request does not conform to schema.") raise BadRequest - return None - def __check_service_info_compatibility(data: Dict) -> None: - """Checks compatibility with service info; raises BadRequest.""" + """Check compatibility with service info. Not implemented.""" # TODO: implement - return None def __init_run_document(data: Dict) -> Dict: - """Initializes workflow run document.""" - document: Dict = dict() - document['api'] = dict() - document['internal'] = dict() - document['api']['request'] = data - document['api']['state'] = 'UNKNOWN' - document['api']['run_log'] = dict() - document['api']['task_logs'] = list() - document['api']['outputs'] = dict() + """Initialize workflow run document. + + Args: + data: Workflow run request form data. + + Returns: + Workflow run document. + """ + document: Dict = {} + document["api"] = {} + document["internal"] = {} + document["api"]["request"] = data + document["api"]["state"] = "UNKNOWN" + document["api"]["run_log"] = {} + document["api"]["task_logs"] = [] + document["api"]["outputs"] = {} return document -def __create_run_environment( - config: Dict, - document: Dict, - **kwargs -) -> Dict: - """Creates unique run identifier and permanent and temporary storage - directories for current run.""" - collection_runs = get_conf(config, 'database', 'collections', 'runs') - out_dir = get_conf(config, 'storage', 'permanent_dir') - tmp_dir = get_conf(config, 'storage', 'tmp_dir') - run_id_charset = eval(get_conf(config, 'database', 'run_id', 'charset')) - run_id_length = get_conf(config, 'database', 'run_id', 'length') +def __create_run_environment(config: Config, document: Dict, **kwargs) -> Dict: + """Create run environment. + + Create unique run identifier and permanent and temporary storage + directories for current run. + + Args: + config: Flask configuration object. + document: Workflow run document. + **kwargs: Additional keyword arguments. + + Returns: + Workflow run documument. + """ + collection_runs: Collection = ( + config.foca.db.dbs["cwl-wes-db"].collections["runs"].client + ) + controller_conf = config.foca.custom.controller + info_conf = config.foca.custom.service_info + storage_conf = config.foca.custom.storage # Keep on trying until a unique run id was found and inserted - # TODO: If no more possible IDs => inf loop; fix (raise custom error; 500 - # to user) + # TODO: If no more possible IDs => inf loop; fix while True: # Create unique run and task ids - run_id = __create_run_id( - charset=run_id_charset, - length=run_id_length, + run_id = generate_id( + charset=controller_conf.runs_id.charset, + length=controller_conf.runs_id.length, ) task_id = uuid() # Set temporary and output directories - current_tmp_dir = os.path.abspath(os.path.join(tmp_dir, run_id)) - current_out_dir = os.path.abspath(os.path.join(out_dir, run_id)) + current_tmp_dir = storage_conf.tmp_dir.resolve() / run_id + current_out_dir = storage_conf.permanent_dir.resolve() / run_id # Try to create workflow run directory (temporary) try: - # TODO: Think about permissions - # TODO: Add working dir (currently one has to run the app from - # outermost dir) - os.makedirs(current_tmp_dir) - os.makedirs(current_out_dir) + current_tmp_dir.mkdir(parents=True, exist_ok=True) + current_out_dir.mkdir(parents=True, exist_ok=True) # Try new run id if directory already exists except FileExistsError: continue # Add run/task/user identifier, temp/output directories to document - document['run_id'] = run_id - document['task_id'] = task_id - if 'user_id' in kwargs: - document['user_id'] = kwargs['user_id'] + document["run_id"] = run_id + document["task_id"] = task_id + if "user_id" in kwargs: + document["user_id"] = kwargs["user_id"] else: - document['user_id'] = None - document['internal']['tmp_dir'] = current_tmp_dir - document['internal']['out_dir'] = current_out_dir + document["user_id"] = None + document["internal"]["tmp_dir"] = str(current_tmp_dir) + document["internal"]["out_dir"] = str(current_out_dir) # Process worflow attachments document = __process_workflow_attachments(document) @@ -247,67 +274,38 @@ def __create_run_environment( # Catch other database errors # TODO: implement properly - except Exception as e: - print('Database error') - print(e) + except PyMongoError as exc: + print("Database error") + print(exc) break # Exit loop break - + # translate DRS URIs to access URLs - file_types: List[str] = get_conf_type( - current_app.config, - 'drs', - 'file_types', - types=(list), - ) - supported_access_methods: List[str] = get_conf_type( - current_app.config, - 'service_info', - 'supported_filesystem_protocols', - types=(list), - ) - port: Optional[int] = get_conf( - current_app.config, - 'drs', - 'port', - ) - base_path: Optional[str] = get_conf( - current_app.config, - 'drs', - 'base_path', - ) - use_http: bool = get_conf( - current_app.config, - 'drs', - 'use_http', - ) translate_drs_uris( - path=document['internal']['workflow_files'], - file_types=file_types, - supported_access_methods=supported_access_methods, - port=port, - base_path=base_path, - use_http=use_http, + path=document["internal"]["workflow_files"], + file_types=controller_conf.drs_server.file_types, + supported_access_methods=info_conf.supported_filesystem_protocols, + port=controller_conf.drs_server.port, + base_path=controller_conf.drs_server.base_path, + use_http=controller_conf.drs_server.use_http, ) return document -def __create_run_id( - charset: str = '0123456789', - length: int = 6 -) -> str: - """Creates random run ID.""" - return ''.join(choice(charset) for __ in range(length)) +def __process_workflow_attachments( # pylint: disable=too-many-branches + data: Dict, +) -> Dict: + """Process workflow attachments. + Args: + data: Workflow run document. -def __process_workflow_attachments(data: Dict) -> Dict: - """Processes workflow attachments.""" - # TODO: implement properly - # Current workaround until processing of workflow attachments is - # implemented + Returns: + Workflow run document. + """ # Use 'workflow_url' for path to (main) CWL workflow file on local file # system or in Git repo # Use 'workflow_params' or file in Git repo to generate YAML file @@ -337,79 +335,61 @@ def __process_workflow_attachments(data: Dict) -> Dict: # specified, are: ',', ';', ':', '|' re_git_file = re.compile( ( - r'^(https?:.*)\/(blob|src|tree)\/(.*?)\/(.*?\.(cwl|yml|yaml|json))' - r'[,:;|]?(.*\.(yml|yaml|json))?' + r"^(?Phttps?:.*)\/(blob|src|tree)\/" + r"(?P.*?)\/(?P.*?\.(cwl|yml|yaml|json))" + r"[,:;|]?(?P.*\.(yml|yaml|json))?" ) ) # Create directory for storing workflow files - data['internal']['workflow_files'] = workflow_dir = os.path.abspath( - os.path.join( - data['internal']['out_dir'], 'workflow_files' - ) - ) - try: - os.mkdir(workflow_dir) - - except OSError: - # TODO: Do something more reasonable here - pass + workflow_dir = Path(data["internal"]["out_dir"]) / "workflow_files" + data["internal"]["workflow_files"] = str(workflow_dir) + workflow_dir.mkdir() # Get main workflow file - user_string = data['api']['request']['workflow_url'] - m = re_git_file.match(user_string) + match = re_git_file.match(data["api"]["request"]["workflow_url"]) # Get workflow from Git repo if regex matches - if m: - - repo_url = '.'.join([m.group(1), 'git']) - branch_commit = m.group(3) - cwl_path = m.group(4) + if match: # Try to clone repo if not subprocess.run( [ - 'git', - 'clone', - repo_url, - os.path.join(workflow_dir, 'repo') + "git", + "clone", + match.group("repo_url") + ".git", + str(workflow_dir / "repo"), ], - check=True + check=True, ): logger.error( - ( - 'Could not clone Git repository. Check value of ' - "'workflow_url' in run request." - ) + "Could not clone Git repository. Check value of " + "'workflow_url' in run request." ) raise BadRequest # Try to checkout branch/commit if not subprocess.run( [ - 'git', - '--git-dir', - os.path.join(workflow_dir, 'repo', '.git'), - '--work-tree', - os.path.join(workflow_dir, 'repo'), - 'checkout', - branch_commit + "git", + "--git-dir", + str(workflow_dir / "repo" / ".git"), + "--work-tree", + str(workflow_dir / "repo"), + "checkout", + match.group("branch_commit"), ], - check=True + check=True, ): logger.error( - ( - 'Could not checkout repository commit/branch. Check value ' - "of 'workflow_url' in run request." - ) + "Could not checkout repository commit/branch. Check value " + "of 'workflow_url' in run request." ) raise BadRequest # Set CWL path - data['internal']['cwl_path'] = os.path.join( - workflow_dir, - 'repo', - cwl_path + data["internal"]["cwl_path"] = str( + workflow_dir / "repo" / match.group("cwl_path") ) # Else assume value of 'workflow_url' represents file on local file system, @@ -428,126 +408,114 @@ def __process_workflow_attachments(data: Dict) -> Dict: shutil.copyfileobj(attachment.stream, dest) # Adjust workflow_url to point to workflow directory. - req_data = data['api']['request'] - workflow_url = __secure_join(workflow_dir, req_data['workflow_url']) - if os.path.exists(workflow_url): - req_data['workflow_url'] = workflow_url + req_data = data["api"]["request"] + workflow_url = __secure_join( + workflow_dir, req_data["workflow_url"] + ) + if workflow_url.exists(): + req_data["workflow_url"] = str(workflow_url) # Set main CWL workflow file path - data['internal']['cwl_path'] = os.path.abspath( - data['api']['request']['workflow_url'] - ) - - # Extract name and extensions of workflow - workflow_name_ext = os.path.splitext( - os.path.basename( - data['internal']['cwl_path'] - ) + data["internal"]["cwl_path"] = str( + Path(data["api"]["request"]["workflow_url"]).resolve() ) # Get parameter file - workflow_name_ext = os.path.splitext( - os.path.basename( - data['internal']['cwl_path'] - ) - ) - + workflow_base_name = Path(data["internal"]["cwl_path"]).stem # Try to get parameters from 'workflow_params' field - if data['api']['request']['workflow_params']: + if data["api"]["request"]["workflow_params"]: # Replace `DRS URIs` in 'workflow_params' # replace_drs_uris(data['api']['request']['workflow_params']) - data['internal']['param_file_path'] = os.path.join( - workflow_dir, - '.'.join([ - str(workflow_name_ext[0]), - 'yml', - ]), + data["internal"]["param_file_path"] = str( + workflow_dir / f"{workflow_base_name}.yml" ) - with open(data['internal']['param_file_path'], 'w') as yaml_file: + with open( + data["internal"]["param_file_path"], + mode="w", + encoding="utf-8", + ) as yaml_file: dump( - data['api']['request']['workflow_params'], + data["api"]["request"]["workflow_params"], yaml_file, allow_unicode=True, - default_flow_style=False + default_flow_style=False, ) # Or from provided relative file path in repo - elif m and m.group(6): - param_path = m.group(6) - data['internal']['param_file_path'] = os.path.join( - workflow_dir, - 'repo', - param_path, + elif match and match.group("params_path"): + data["internal"]["param_file_path"] = str( + workflow_dir / "repo" / match.group("params_path") ) # Else try to see if there is a 'yml', 'yaml' or 'json' file with exactly # the same basename as CWL in same dir else: - param_file_extensions = ['yml', 'yaml', 'json'] - for ext in param_file_extensions: - possible_param_file = os.path.join( - workflow_dir, - 'repo', - '.'.join([ - str(workflow_name_ext[0]), - ext, - ]), + for ext in ["yml", "yaml", "json"]: + candidate_file = ( + workflow_dir / "repo" / f"{workflow_base_name}.{ext}" ) - if os.path.isfile(possible_param_file): - data['internal']['param_file_path'] = possible_param_file + if candidate_file.is_file(): + data["internal"]["param_file_path"] = str(candidate_file) break - # Raise BadRequest if not parameter file was found - if 'param_file_path' not in data['internal']: + # Raise BadRequest if no parameter file was found + if "param_file_path" not in data["internal"]: raise BadRequest # Extract workflow attachments from form data dictionary - if 'workflow_attachment' in data['api']['request']: - - # TODO: do something with data['workflow_attachment'] - - # Strip workflow attachments from data - del data['api']['request']['workflow_attachment'] + if "workflow_attachment" in data["api"]["request"]: + del data["api"]["request"]["workflow_attachment"] # Return form data stripped of workflow attachments return data -def __run_workflow( - config: Dict, - document: Dict, - **kwargs -) -> None: - """Helper function `run_workflow()`.""" - tes_url = get_conf(config, 'tes', 'url') - remote_storage_url = get_conf(config, 'storage', 'remote_storage_url') - run_id = document['run_id'] - task_id = document['task_id'] - tmp_dir = document['internal']['tmp_dir'] - cwl_path = document['internal']['cwl_path'] - param_file_path = document['internal']['param_file_path'] +def __run_workflow(config: Config, document: Dict, **kwargs) -> None: + """Run workflow helper function. + + Args: + config: Flask configuration object. + document: Workflow run document. + **kwargs: Additional keyword arguments. + + Raises: + BadRequest: If workflow run fails. + """ + tes_url = config.foca.custom.controller.tes_server.url + remote_storage_url = config.foca.custom.storage.remote_storage_url + run_id = document["run_id"] + task_id = document["task_id"] + tmp_dir = document["internal"]["tmp_dir"] + cwl_path = document["internal"]["cwl_path"] + param_file_path = document["internal"]["param_file_path"] # Build command command_list = [ - 'cwl-tes', - '--debug', - '--leave-outputs', - '--remote-storage-url', remote_storage_url, - '--tes', tes_url, + "cwl-tes", + "--debug", + "--leave-outputs", + "--remote-storage-url", + remote_storage_url, + "--tes", + tes_url, cwl_path, - param_file_path + param_file_path, ] # Add authorization parameters - if 'jwt' in kwargs \ - and 'claims' in kwargs \ - and 'public_key' in kwargs['claims']: + if ( + "jwt" in kwargs + and "claims" in kwargs + and "public_key" in kwargs["claims"] + ): auth_params = [ - '--token-public-key', kwargs['claims']['public_key'], - '--token', kwargs['jwt'], + "--token-public-key", + kwargs["claims"]["public_key"], + "--token", + kwargs["jwt"], ] command_list[2:2] = auth_params @@ -566,32 +534,20 @@ def __run_workflow( # ] # Get timeout duration - timeout_duration = get_conf( - config, - 'api', - 'endpoint_params', - 'timeout_run_workflow', - ) + timeout_duration = config.foca.custom.controller.timeout_run_workflow # Execute command as background task logger.info( - ( - "Starting execution of run '{run_id}' as task '{task_id}' in " - "'{tmp_dir}'..." - ).format( - run_id=run_id, - task_id=task_id, - tmp_dir=tmp_dir, - ) + f"Starting execution of run '{run_id}' as task '{task_id}' in: " + f"{tmp_dir}" ) task__run_workflow.apply_async( None, { - 'command_list': command_list, - 'tmp_dir': tmp_dir, - 'token': kwargs.get('jwt'), + "command_list": command_list, + "tmp_dir": tmp_dir, + "token": kwargs.get("jwt"), }, task_id=task_id, soft_time_limit=timeout_duration, ) - return None diff --git a/cwl_wes/ga4gh/wes/endpoints/service_info.py b/cwl_wes/ga4gh/wes/endpoints/service_info.py new file mode 100644 index 0000000..b62f15d --- /dev/null +++ b/cwl_wes/ga4gh/wes/endpoints/service_info.py @@ -0,0 +1,105 @@ +"""Controller for the `/service-info route.""" + +import logging +from typing import Dict + +from bson.objectid import ObjectId +from flask import current_app +from pymongo.collection import Collection + +from cwl_wes.exceptions import ( + NotFound, +) +from cwl_wes.ga4gh.wes.states import States + +logger = logging.getLogger(__name__) + + +class ServiceInfo: + """Class for WES API service info server-side controller methods. + + Creates service info upon first request, if it does not exist. + + Attributes: + db_collections: FOCA MongoDB collections. + db_client: Database collection storing service info objects. + object_id: Database identifier for service info. + """ + + def __init__(self) -> None: + """Construct class instance.""" + self.db_collections = current_app.config.foca.db.dbs[ + "cwl-wes-db" + ].collections + self.db_client: Collection = self.db_collections["service_info"].client + self.object_id: str = "000000000000000000000000" + + def get_service_info(self, get_counts: bool = True) -> Dict: + """Get latest service info from database. + + Args: + get_counts: Whether system state counts should be returned. + + Returns: + Latest service info details. + + Raises: + NotFound: Service info was not found. + """ + service_info = self.db_client.find_one( + {"_id": ObjectId(self.object_id)}, + {"_id": False}, + ) + if service_info is None: + raise NotFound + if get_counts: + service_info["system_state_counts"] = self._get_state_counts() + return service_info + + def set_service_info(self, data: Dict) -> None: + """Create or update service info. + + Arguments: + data: Dictionary of service info values. Cf. + """ + self.db_client.replace_one( + filter={"_id": ObjectId(self.object_id)}, + replacement=data, + upsert=True, + ) + logger.info(f"Service info set: {data}") + + def init_service_info_from_config(self) -> None: + """Initialize service info from config. + + Set service info only if it does not yet exist. + """ + service_info_conf = current_app.config.foca.custom.service_info.dict() + try: + service_info_db = self.get_service_info(get_counts=False) + except NotFound: + logger.info("Initializing service info.") + self.set_service_info(data=service_info_conf) + return + if service_info_db != service_info_conf: + logger.info( + "Service info configuration changed. Updating service info." + ) + self.set_service_info(data=service_info_conf) + return + logger.debug("Service info already initialized and up to date.") + + def _get_state_counts(self) -> Dict[str, int]: + """Get current system state counts.""" + current_counts = {state: 0 for state in States.ALL} + db_client_runs: Collection = self.db_collections["runs"].client + cursor = db_client_runs.find( + filter={}, + projection={ + "api.state": True, + "_id": False, + }, + ) + for record in cursor: + current_counts[record["api"]["state"]] += 1 + return current_counts diff --git a/cwl_wes/ga4gh/wes/endpoints/utils/__init__.py b/cwl_wes/ga4gh/wes/endpoints/utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cwl_wes/ga4gh/wes/server.py b/cwl_wes/ga4gh/wes/server.py index d275b93..370aea3 100644 --- a/cwl_wes/ga4gh/wes/server.py +++ b/cwl_wes/ga4gh/wes/server.py @@ -1,118 +1,187 @@ """Controller for GA4GH WES API endpoints.""" import logging +from typing import Dict, Optional -from celery import current_app as celery_app +from bson.objectid import ObjectId +from celery import uuid from connexion import request from flask import current_app +from pymongo.collection import Collection -import cwl_wes.ga4gh.wes.endpoints.cancel_run as cancel_run -import cwl_wes.ga4gh.wes.endpoints.get_run_log as get_run_log -import cwl_wes.ga4gh.wes.endpoints.get_run_status as get_run_status -import cwl_wes.ga4gh.wes.endpoints.list_runs as list_runs -import cwl_wes.ga4gh.wes.endpoints.run_workflow as run_workflow -import cwl_wes.ga4gh.wes.endpoints.get_service_info as get_service_info -from cwl_wes.security.decorators import auth_token_optional +from foca.utils.logging import log_traffic +from cwl_wes.ga4gh.wes.endpoints.run_workflow import run_workflow +from cwl_wes.ga4gh.wes.endpoints.service_info import ServiceInfo +from cwl_wes.ga4gh.wes.states import States +from cwl_wes.tasks.cancel_run import task__cancel_run +from cwl_wes.utils.controllers import get_document_if_allowed + +# pragma pylint: disable=invalid-name,unused-argument # Get logger instance logger = logging.getLogger(__name__) # GET /runs/ -@auth_token_optional -def GetRunLog(run_id, *args, **kwargs): - """Returns detailed run info.""" - response = get_run_log.get_run_log( +@log_traffic +def GetRunLog(run_id, *args, **kwargs) -> Dict: + """Get detailed run info. + + Returns: + Run info object. + """ + document = get_document_if_allowed( config=current_app.config, run_id=run_id, - *args, - **kwargs + projection={ + "user_id": True, + "api": True, + "_id": False, + }, + user_id=kwargs.get("user_id"), ) - log_request(request, response) - return response + assert "api" in document, "'api' key not in document" + return document["api"] # POST /runs//cancel -@auth_token_optional -def CancelRun(run_id, *args, **kwargs): - """Cancels unfinished workflow run.""" - response = cancel_run.cancel_run( +@log_traffic +def CancelRun(run_id, *args, **kwargs) -> Dict: + """Cancel unfinished workflow run. + + Returns: + Run identifier object. + """ + document = get_document_if_allowed( config=current_app.config, - celery_app=celery_app, run_id=run_id, - *args, - **kwargs + projection={ + "user_id": True, + "task_id": True, + "api.state": True, + "_id": False, + }, + user_id=kwargs.get("user_id"), ) - log_request(request, response) - return response + assert "api" in document, "'api' key not in document" + assert "state" in document["api"], "'state' key not in document['api']" + + if document["api"]["state"] in States.CANCELABLE: + timeout_duration = ( + current_app.config.foca.custom.controller.timeout_cancel_run + ) + task_id = uuid() + logger.info(f"Canceling run '{run_id}' as background task: {task_id}") + task__cancel_run.apply_async( + None, + { + "run_id": run_id, + "task_id": document["task_id"], + "token": kwargs.get("jwt"), + }, + task_id=task_id, + soft_time_limit=timeout_duration, + ) + + return {"run_id": run_id} # GET /runs//status -@auth_token_optional -def GetRunStatus(run_id, *args, **kwargs): - """Returns run status.""" - response = get_run_status.get_run_status( +@log_traffic +def GetRunStatus(run_id, *args, **kwargs) -> Dict: + """Get run status. + + Returns: + Run status object. + """ + document = get_document_if_allowed( config=current_app.config, run_id=run_id, - *args, - **kwargs + projection={ + "user_id": True, + "api.state": True, + "_id": False, + }, + user_id=kwargs.get("user_id"), ) - log_request(request, response) - return response + assert "api" in document, "'api' key not in document" + assert "state" in document["api"], "'state' key not in document['api']" + return {"run_id": run_id, "state": document["api"]["state"]} # GET /service-info -def GetServiceInfo(*args, **kwargs): - """Returns service info.""" - response = get_service_info.get_service_info( - config=current_app.config, - *args, - **kwargs - ) - log_request(request, response) - return response +@log_traffic +def GetServiceInfo(*args, **kwargs) -> Optional[Dict]: + """Get service info. + + Returns: + Service info object. + """ + service_info = ServiceInfo() + return service_info.get_service_info() # GET /runs -@auth_token_optional -def ListRuns(*args, **kwargs): - """Lists IDs and status of all workflow runs.""" - response = list_runs.list_runs( - config=current_app.config, - *args, - **kwargs +@log_traffic +def ListRuns(*args, **kwargs) -> Dict: + """List IDs and status of all workflow runs. + + Returns: + Run list object. + """ + collection_runs: Collection = ( + current_app.config.foca.db.dbs["cwl-wes-db"].collections["runs"].client ) - log_request(request, response) - return response + page_size = kwargs.get( + "page_size", + current_app.config.foca.custom.controller.default_page_size, + ) + page_token = kwargs.get("page_token", "") + + filter_dict = {} + if "user_id" in kwargs: + filter_dict["user_id"] = kwargs["user_id"] + if page_token != "": + filter_dict["_id"] = {"$lt": ObjectId(page_token)} + cursor = ( + collection_runs.find( + filter=filter_dict, + projection={ + "run_id": True, + "api.state": True, + }, + ) + .sort("_id", -1) + .limit(page_size) + ) + runs_list = list(cursor) + + if runs_list: + next_page_token = str(runs_list[-1]["_id"]) + else: + next_page_token = "" + + for run in runs_list: + del run["_id"] + run["state"] = run["api"]["state"] + del run["api"] + + return {"next_page_token": next_page_token, "runs": runs_list} # POST /runs -@auth_token_optional -def RunWorkflow(*args, **kwargs): - """Executes workflow.""" - response = run_workflow.run_workflow( +@log_traffic +def RunWorkflow(*args, **kwargs) -> Dict: + """Trigger workflow run. + + Returns: + Run identifier object. + """ + response = run_workflow( config=current_app.config, form_data=request.form, *args, - **kwargs + **kwargs, ) - log_request(request, response) return response - - -def log_request(request, response): - """Writes request and response to log.""" - # TODO: write decorator for request logging - logger.debug( - ( - "Response to request \"{method} {path} {protocol}\" from " - "{remote_addr}: {response}" - ).format( - method=request.environ['REQUEST_METHOD'], - path=request.environ['PATH_INFO'], - protocol=request.environ['SERVER_PROTOCOL'], - remote_addr=request.environ['REMOTE_ADDR'], - response=response, - ) - ) diff --git a/cwl_wes/ga4gh/wes/states.py b/cwl_wes/ga4gh/wes/states.py index 3ab5fc2..2fd4002 100644 --- a/cwl_wes/ga4gh/wes/states.py +++ b/cwl_wes/ga4gh/wes/states.py @@ -1,25 +1,31 @@ -class States(): +"""WES run states.""" + +# pragma pylint: disable=too-few-public-methods + + +class States: + """WES run states.""" UNDEFINED = [ - 'UNKNOWN', + "UNKNOWN", ] CANCELABLE = [ - 'INITIALIZING', - 'PAUSED', - 'QUEUED', - 'RUNNING', + "INITIALIZING", + "PAUSED", + "QUEUED", + "RUNNING", ] UNFINISHED = CANCELABLE + [ - 'CANCELING', + "CANCELING", ] FINISHED = [ - 'COMPLETE', - 'CANCELED', - 'EXECUTOR_ERROR', - 'SYSTEM_ERROR', + "COMPLETE", + "CANCELED", + "EXECUTOR_ERROR", + "SYSTEM_ERROR", ] DEFINED = UNFINISHED + FINISHED diff --git a/cwl_wes/gunicorn.py b/cwl_wes/gunicorn.py new file mode 100644 index 0000000..9d297ae --- /dev/null +++ b/cwl_wes/gunicorn.py @@ -0,0 +1,31 @@ +"""Gunicorn entry point.""" + +import os + +from cwl_wes.app import init_app + +# Source application configuration +app = init_app().app +app_config = app.config.foca + +# Set Gunicorn number of workers and threads +workers = int(os.environ.get("GUNICORN_PROCESSES", "1")) +threads = int(os.environ.get("GUNICORN_THREADS", "1")) + +# Set allowed IPs +forwarded_allow_ips = "*" # pylint: disable=invalid-name + +# Set Gunicorn bind address +bind = f"{app_config.server.host}:{app_config.server.port}" + +# Source the environment variables for the Gunicorn workers +raw_env = [ + f"WES_CONFIG={os.environ.get('WES_CONFIG', '')}", + f"RABBIT_HOST={os.environ.get('RABBIT_HOST', app_config.jobs.host)}", + f"RABBIT_PORT={os.environ.get('RABBIT_PORT', app_config.jobs.port)}", + f"MONGO_HOST={os.environ.get('MONGO_HOST', app_config.db.host)}", + f"MONGO_PORT={os.environ.get('MONGO_PORT', app_config.db.port)}", + f"MONGO_DBNAME={os.environ.get('MONGO_DBNAME', 'cwl-wes-db')}", + f"MONGO_USERNAME={os.environ.get('MONGO_USERNAME', '')}", + f"MONGO_PASSWORD={os.environ.get('MONGO_PASSWORD', '')}", +] diff --git a/cwl_wes/security/__init__.py b/cwl_wes/security/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cwl_wes/security/cors.py b/cwl_wes/security/cors.py deleted file mode 100644 index 4d55b83..0000000 --- a/cwl_wes/security/cors.py +++ /dev/null @@ -1,17 +0,0 @@ -"""Function enabling cross-origin resource sharing for a Flask app -instance.""" - -import logging -from flask import Flask - -from flask_cors import CORS - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def enable_cors(app: Flask) -> None: - """Enables cross-origin resource sharing for Flask app.""" - CORS(app) - logger.info('Enabled CORS for Flask app.') diff --git a/cwl_wes/security/decorators.py b/cwl_wes/security/decorators.py deleted file mode 100644 index 5b729c2..0000000 --- a/cwl_wes/security/decorators.py +++ /dev/null @@ -1,612 +0,0 @@ -"""Decorator and utility functions for protecting access to endpoints.""" - -from connexion.exceptions import Unauthorized -from connexion import request -from flask import current_app -from functools import wraps -import logging -from typing import (Callable, Iterable, Mapping, Optional) - -from cryptography.hazmat.primitives import serialization -import jwt -import requests -import json - -from foca.config.config_parser import get_conf, get_conf_type - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def auth_token_optional(fn: Callable) -> Callable: - """ - **The decorator protects an endpoint from being called without a valid - authorization token. - """ - @wraps(fn) - def wrapper(*args, **kwargs): - - # Check if authentication is enabled - if get_conf( - current_app.config, - 'security', - 'authorization_required', - ): - - # Get config parameters - validation_methods = get_conf_type( - current_app.config, - 'security', - 'jwt', - 'validation_methods', - types=(list), - ) - validation_checks = get_conf( - current_app.config, - 'security', - 'jwt', - 'validation_checks', - ) - algorithms = get_conf_type( - current_app.config, - 'security', - 'jwt', - 'algorithms', - types=(list), - ) - expected_prefix = get_conf( - current_app.config, - 'security', - 'jwt', - 'token_prefix', - ) - header_name = get_conf( - current_app.config, - 'security', - 'jwt', - 'header_name', - ) - claim_key_id = get_conf( - current_app.config, - 'security', - 'jwt', - 'claim_key_id', - ) - claim_issuer = get_conf( - current_app.config, - 'security', - 'jwt', - 'claim_issuer', - ) - claim_identity = get_conf( - current_app.config, - 'security', - 'jwt', - 'claim_identity', - ) - add_key_to_claims = get_conf( - current_app.config, - 'security', - 'jwt', - 'add_key_to_claims', - ) - audience = get_conf_type( - current_app.config, - 'security', - 'jwt', - 'audience', - types=(list, type(None)), - ) - allow_expired = get_conf( - current_app.config, - 'security', - 'jwt', - 'allow_expired', - ) - - # Ensure that at least one validation method was configured - if not len(validation_methods): - logger.error("No JWT validation methods configured.") - raise Unauthorized - - # Ensure that a valid validation checks argument was configured - if validation_checks not in ['all', 'any']: - logger.error( - ( - "Illegal argument '{validation_checks} passed to " - "configuration paramater 'validation_checks'. Allowed " - "values: 'any', 'all'" - ) - ) - raise Unauthorized - - # Parse JWT token from HTTP header - token = parse_jwt_from_header( - header_name=header_name, - expected_prefix=expected_prefix, - ) - - # Initialize claims - claims = {} - - # Validate JWT via /userinfo endpoint - if 'userinfo' in validation_methods: - if not (claims and validation_checks == 'any'): - logger.debug( - ( - "Validating JWT via identity provider's " - "'/userinfo' endpoint..." - ) - ) - claims = validate_jwt_via_userinfo_endpoint( - token=token, - algorithms=algorithms, - claim_issuer=claim_issuer, - ) - if not claims and validation_checks == 'all': - logger.error( - ( - "Insufficient number of JWT validation checks " - "passed." - ) - ) - raise Unauthorized - - # Validate JWT via public key - if 'public_key' in validation_methods: - if not (claims and validation_checks == 'any'): - logger.debug( - ( - "Validating JWT via identity provider's public " - "key..." - ) - ) - claims = validate_jwt_via_public_key( - token=token, - algorithms=algorithms, - claim_key_id=claim_key_id, - claim_issuer=claim_issuer, - add_key_to_claims=add_key_to_claims, - audience=audience, - allow_expired=allow_expired, - ) - if not claims and validation_checks == 'all': - logger.error( - ( - "Insufficient number of JWT validation checks " - "passed." - ) - ) - raise Unauthorized - - # Check whether enough validation checks passed - if not claims: - logger.error( - ( - "No JWT validation checks passed." - ) - ) - raise Unauthorized - - # Ensure that specified identity claim is available - if not validate_jwt_claims( - claim_identity, - claims=claims, - ): - raise Unauthorized - - # Log result - logger.debug( - "Access granted." - ) - - # Return wrapped function with token data - return fn( - jwt=token, - claims=claims, - user_id=claims[claim_identity], - *args, - **kwargs - ) - - # Return wrapped function without token data - else: - return fn(*args, **kwargs) - - return wrapper - - -def parse_jwt_from_header( - header_name: str = 'Authorization', - expected_prefix: str = 'Bearer', -) -> str: - """Parses authorization token from HTTP header.""" - # TODO: Add custom errors - # Ensure that authorization header is present - auth_header = request.headers.get(header_name, None) - if not auth_header: - logger.error("No HTTP header with name '{header_name}' found.".format( - header_name=header_name, - )) - raise Unauthorized - - # Ensure that authorization header is formatted correctly - try: - (prefix, token) = auth_header.split() - except ValueError as e: - logger.error( - ( - "Authentication header is malformed. Original error message: " - "{type}: {msg}" - ).format( - type=type(e).__name__, - msg=e, - ) - ) - raise Unauthorized - - if prefix != expected_prefix: - logger.error( - ( - "Expected token prefix in authentication header is " - "'{expected_prefix}', but '{prefix}' was found." - ).format( - expected_prefix=expected_prefix, - prefix=prefix, - ) - ) - raise Unauthorized - - return token - - -def validate_jwt_via_userinfo_endpoint( - token: str, - algorithms: Iterable[str] = ['RS256'], - claim_issuer: str = 'iss', - service_document_field: str = 'userinfo_endpoint', -) -> Mapping: - - # Decode JWT - try: - claims = jwt.decode( - jwt=token, - verify=False, - algorithms=algorithms, - ) - except Exception as e: - logger.warning( - ( - "JWT could not be decoded. Original error message: " - "{type}: {msg}" - ).format( - type=type(e).__name__, - msg=e, - ) - ) - return {} - - # Verify existence of issuer claim - if not validate_jwt_claims( - claim_issuer, - claims=claims, - ): - return {} - - # Get /userinfo endpoint URL - url = get_entry_from_idp_service_discovery_endpoint( - issuer=claims[claim_issuer], - entry=service_document_field, - ) - - # Validate JWT via /userinfo endpoint - if url: - logger.debug(f"Issuer's '/userinfo' endpoint URL: {url}") - try: - validate_jwt_via_endpoint( - url=url, - token=token, - ) - except Exception: - return {} - else: - return {} - - # Log success and return claims - logger.debug( - f"Claims decoded: {claims}" - ) - return claims - - -def validate_jwt_via_public_key( - token: str, - algorithms: Iterable[str] = ['RS256'], - claim_key_id: str = 'kid', - claim_issuer: str = 'iss', - service_document_field: str = 'jwks_uri', - add_key_to_claims: bool = True, - audience: Optional[Iterable[str]] = None, - allow_expired: bool = False, -) -> Mapping: - - # Extract JWT claims - try: - claims = jwt.decode( - jwt=token, - verify=False, - algorithms=algorithms, - ) - except Exception as e: - logger.error( - ( - "JWT could not be decoded. Original error message: {type}: " - "{msg}" - ).format( - type=type(e).__name__, - msg=e, - ) - ) - return {} - - # Extract JWT header claims - try: - header_claims = jwt.get_unverified_header(token) - except Exception as e: - logger.error( - ( - "Could not extract JWT header claims. Original error message: " - "{type}: {msg}" - ).format( - type=type(e).__name__, - msg=e, - ) - ) - return {} - - # Get JWK set endpoint URL - url = get_entry_from_idp_service_discovery_endpoint( - issuer=claims[claim_issuer], - entry=service_document_field, - ) - - # Obtain identity provider's public keys - if url: - logger.debug(f"Issuer's JWK set endpoint URL: {url}") - public_keys = get_public_keys( - url=url, - claim_key_id=claim_key_id, - ) - else: - return {} - - # If currently used public key is specified, verify that it exists and - # remove all other keys - if claim_key_id in header_claims: - if header_claims[claim_key_id] in public_keys: - public_keys = { - header_claims[claim_key_id]: - public_keys[header_claims[claim_key_id]] - } - else: - logger.error( - "JWT key ID not found among issuer's JWKs." - ) - return {} - else: - logger.debug( - "JWT key ID not specified. Trying all available JWKs..." - ) - - # Set validations - validation_options = {} - if audience is None: - validation_options['verify_aud'] = False - if allow_expired: - validation_options['verify_exp'] = False - - # Try public keys one after the other - pem = '' - for key in public_keys.values(): - - # Get PEM representation of key - pem = key.public_bytes( - encoding=serialization.Encoding.PEM, - format=serialization.PublicFormat.SubjectPublicKeyInfo, - ).decode('utf-8').encode('unicode_escape').decode('utf-8') - - # Decode JWT and validate via public key - try: - claims = jwt.decode( - jwt=token, - verify=True, - key=key, - algorithms=algorithms, - audience=audience, - options=validation_options, - ) - # Wrong or faulty key was used; try next one - except ( - jwt.exceptions.InvalidSignatureError, - jwt.exceptions.InvalidKeyError - ) as e: - logger.debug( - "JWT could not be decoded with current JWK:\n" - f"{pem}\n" - f"Original error message: {type(e).__name__}: {e}" - ) - # Key seems okay but token seems invalid - except Exception as e: - logger.error( - "JWT could not be validated. Original error message: " - f"{type(e).__name__}: {e}" - ) - return {} - - # Do not try other keys if token was decoded - if claims: - break - - # Verify that token was decoded - if not claims: - logger.error( - "JWT could not be validated with any of the issuer's JWKs." - ) - return {} - - # Add public key to claims - if add_key_to_claims: - claims['public_key'] = pem - - # Log success and return claims - logger.debug( - f"Claims decoded: {claims}" - ) - return claims - - -def validate_jwt_claims( - *args: str, - claims: Mapping, -) -> bool: - """ - Validates the existence of JWT claims. Returns False if any are missing, - otherwise returns True. - """ - # Check for existence of required claims - for claim in args: - if claim not in claims: - logger.warning( - ( - "Required claim '{claim}' not found in JWT." - ).format( - claim=claim, - ) - ) - return False - else: - return True - - -def get_entry_from_idp_service_discovery_endpoint( - issuer: str, - entry: str, -) -> Optional[str]: - """ - Access the identity provider's service discovery endpoint to retrieve the - value of the specified entry. - """ - # Build endpoint URL - base_url = issuer.rstrip("/") - url = "{base_url}/.well-known/openid-configuration".format( - base_url=base_url - ) - - # Send GET request to service discovery endpoint - try: - response = requests.get(url) - response.raise_for_status() - except Exception as e: - logger.warning( - ( - "Could not connect to endpoint '{url}'. Original error " - "message: {type}: {msg}" - ).format( - url=url, - type=type(e).__name__, - msg=e, - ) - ) - return None - - # Return entry or None - if entry not in response.json(): - logger.warning( - ( - "Required entry '{entry}' not found in identity provider's " - "documentation accessed at endpoint '{endpoint}'." - ).format( - entry=entry, - url=url, - ) - ) - return None - else: - return response.json()[entry] - - -def validate_jwt_via_endpoint( - url: str, - token: str, - header_name: str = 'Authorization', - prefix: str = 'Bearer' -) -> None: - """ - Returns True if a JWT-headed request to a specified URL yields the - specified status code. - """ - headers = { - "{header_name}".format( - header_name=header_name - ): "{prefix} {token}".format( - header_name=header_name, - prefix=prefix, - token=token, - ) - } - try: - response = requests.get( - url, - headers=headers, - ) - response.raise_for_status() - except Exception as e: - logger.warning( - ( - "Could not connect to endpoint '{url}'. Original error " - "message: {type}: {msg}" - ).format( - url=url, - type=type(e).__name__, - msg=e, - ) - ) - raise - - return None - - -def get_public_keys( - url: str, - claim_key_id: str = 'kid', -) -> Mapping: - """ - Obtain the identity provider's list of public keys. - """ - # Get JWK sets from identity provider - try: - response = requests.get(url) - response.raise_for_status() - except Exception as e: - logger.warning( - ( - "Could not connect to endpoint '{url}'. Original error " - "message: {type}: {msg}" - ).format( - url=url, - type=type(e).__name__, - msg=e, - ) - ) - return {} - - # Iterate over all JWK sets and store public keys in dictionary - public_keys = {} - for jwk in response.json()['keys']: - public_keys[jwk[claim_key_id]] = jwt.algorithms.RSAAlgorithm.from_jwk( - json.dumps(jwk) - ) - - # Return dictionary of public keys - return public_keys diff --git a/cwl_wes/tasks/__init__.py b/cwl_wes/tasks/__init__.py index e69de29..b86bf42 100644 --- a/cwl_wes/tasks/__init__.py +++ b/cwl_wes/tasks/__init__.py @@ -0,0 +1 @@ +"""cwl-WES background tasks.""" diff --git a/cwl_wes/tasks/tasks/cancel_run.py b/cwl_wes/tasks/cancel_run.py similarity index 62% rename from cwl_wes/tasks/tasks/cancel_run.py rename to cwl_wes/tasks/cancel_run.py index 2da3810..628dce7 100644 --- a/cwl_wes/tasks/tasks/cancel_run.py +++ b/cwl_wes/tasks/cancel_run.py @@ -1,80 +1,73 @@ """Celery background task to cancel workflow run and related TES tasks.""" import logging -from requests import HTTPError -import tes import time -from typing import (List, Optional) +from typing import List, Optional from celery.exceptions import SoftTimeLimitExceeded from flask import current_app +from foca.database.register_mongodb import _create_mongo_client from pymongo import collection as Collection +from requests import HTTPError +import tes -from cwl_wes.celery_worker import celery -from foca.config.config_parser import get_conf -import cwl_wes.database.db_utils as db_utils -from cwl_wes.database.register_mongodb import create_mongo_client from cwl_wes.ga4gh.wes.states import States -from cwl_wes.tasks.utils import set_run_state - +import cwl_wes.utils.db as db_utils +from cwl_wes.worker import celery_app # Get logger instance logger = logging.getLogger(__name__) -@celery.task( - name='tasks.cancel_run', +@celery_app.task( + name="tasks.cancel_run", ignore_result=True, bind=True, ) def task__cancel_run( - self, run_id: str, task_id: str, token: Optional[str] = None, ) -> None: """Revokes worfklow task and tries to cancel all running TES tasks.""" - config = current_app.config + foca_config = current_app.config.foca # Create MongoDB client - mongo = create_mongo_client( + mongo = _create_mongo_client( app=current_app, - config=config, + host=foca_config.db.host, + port=foca_config.db.port, + db="cwl-wes-db", ) - collection = mongo.db['runs'] + collection = mongo.db["runs"] # Set run state to 'CANCELING' - set_run_state( + db_utils.set_run_state( collection=collection, run_id=run_id, task_id=task_id, - state='CANCELING', + state="CANCELING", ) try: # Cancel individual TES tasks + tes_server_config = foca_config.custom.controller.tes_server __cancel_tes_tasks( collection=collection, run_id=run_id, - url=get_conf(config, 'tes', 'url'), - timeout=get_conf(config, 'tes', 'timeout'), + url=tes_server_config.url, + timeout=tes_server_config.timeout, token=token, ) - except SoftTimeLimitExceeded as e: - set_run_state( + except SoftTimeLimitExceeded as exc: + db_utils.set_run_state( collection=collection, run_id=run_id, task_id=task_id, - state='SYSTEM_ERROR', + state="SYSTEM_ERROR", ) logger.warning( - ( - "Canceling workflow run '{run_id}' timed out. Run state " - "was set to 'SYSTEM_ERROR'. Original error message: " - "{type}: {msg}" - ).format( - run_id=run_id, - type=type(e).__name__, - msg=e, - ) + f"Canceling workflow run '{run_id}' timed out. Run state was set " + "to 'SYSTEM_ERROR'. Original error message: " + f"{type(exc).__name__}: {exc}" ) @@ -91,7 +84,7 @@ def __cancel_tes_tasks( timeout=timeout, token=token, ) - canceled: List = list() + canceled: List = [] while True: task_ids = db_utils.find_tes_task_ids( collection=collection, @@ -108,11 +101,11 @@ def __cancel_tes_tasks( canceled = canceled + cancel time.sleep(timeout) document = collection.find_one( - filter={'run_id': run_id}, + filter={"run_id": run_id}, projection={ - 'api.state': True, - '_id': False, - } + "api.state": True, + "_id": False, + }, ) - if document['api']['state'] in States.FINISHED: + if document["api"]["state"] in States.FINISHED: break diff --git a/cwl_wes/tasks/celery_task_monitor.py b/cwl_wes/tasks/celery_task_monitor.py deleted file mode 100644 index 20d44bd..0000000 --- a/cwl_wes/tasks/celery_task_monitor.py +++ /dev/null @@ -1,593 +0,0 @@ -"""Celery task monitor, event handlers and related utility functions.""" - -from ast import literal_eval -from datetime import datetime -import logging -import os -import re -import requests -from shlex import quote -from threading import Thread -from time import sleep -from typing import (Dict, List, Optional) - -from celery import Celery -from celery.events import Event -from celery.events.receiver import EventReceiver -from kombu.connection import Connection # noqa: F401 -from pymongo import collection as Collection -import tes - -import cwl_wes.database.db_utils as db_utils - - -# Get logger instance -logger = logging.getLogger(__name__) - - -# Set string time format -strf: str = '%Y-%m-%d %H:%M:%S.%f' - - -class TaskMonitor(): - """Celery task monitor.""" - - def __init__( - self, - celery_app: Celery, - collection: Collection, - tes_config: Dict[str, str], - timeout: float = 0, - authorization: bool = True, - ) -> None: - """Starts Celery task monitor daemon process.""" - self.celery_app = celery_app - self.collection = collection - self.timeout = timeout - self.authorization = authorization - self.tes_config = tes_config - - self.thread = Thread(target=self.run, args=()) - self.thread.daemon = True - self.thread.start() - - logger.debug('Celery task monitor daemon process started...') - - def run(self) -> None: - """Daemon process for Celery task monitor.""" - while True: - - try: - - with self.celery_app.connection() as \ - connection: # type: Connection - - listener: EventReceiver = self.celery_app.events.Receiver( - connection, - handlers={ - 'task-received': - self.on_task_received, - 'task-started': - self.on_task_started, - 'task-failed': - self.on_task_failed, - 'task-succeeded': - self.on_task_succeeded, - 'task-tes-task-update': - self.on_task_tes_task_update, - } - ) - listener.capture(limit=None, timeout=None, wakeup=True) - - except KeyboardInterrupt as e: - logger.exception( - ( - 'Task monitor interrupted. Execution aborted. ' - 'Original error message: {type}: {msg}' - ).format( - type=type(e).__name__, - msg=e, - ) - ) - raise SystemExit - - except Exception as e: - logger.exception( - ( - 'Unknown error in task monitor occurred. Original ' - 'error message: {type}: {msg}' - ).format( - type=type(e).__name__, - msg=e, - ) - ) - pass - - # Sleep for specified interval - sleep(self.timeout) - - def on_task_received( - self, - event: Event, - ) -> None: - """Event handler for received Celery tasks.""" - if not event['name'] == 'tasks.run_workflow': - return None - # Parse subprocess inputs - try: - kwargs = literal_eval(event['kwargs']) - except Exception as e: - logger.exception( - ( - "Field 'kwargs' in event message malformed. Original " - 'error message: {type}: {msg}' - ).format( - type=type(e).__name__, - msg=e, - ) - ) - pass - - # Build command - if 'command_list' in kwargs: - if self.authorization: - kwargs['command_list'][3] = '' - kwargs['command_list'][5] = '' - command = ' '.join( - [quote(item) for item in kwargs['command_list']] - ) - else: - command = 'N/A' - - # Create dictionary for internal parameters - internal = dict() - internal['task_received'] = datetime.utcfromtimestamp( - event['timestamp'] - ) - internal['process_id_worker'] = event['pid'] - internal['host'] = event['hostname'] - - # Update run document in database - try: - self.update_run_document( - event=event, - state='QUEUED', - internal=internal, - task_received=datetime.utcfromtimestamp( - event['timestamp'] - ).strftime(strf), - command=command, - utc_offset=event['utcoffset'], - max_retries=event['retries'], - expires=event['expires'], - ) - except Exception as e: - logger.exception( - ( - 'Database error. Could not update log information for ' - "task '{task}'. Original error message: {type}: {msg}" - ).format( - task=event['uuid'], - type=type(e).__name__, - msg=e, - ) - ) - - def on_task_started( - self, - event: Event, - ) -> None: - """Event handler for started Celery tasks.""" - if not self.collection.find_one({'task_id': event['uuid']}): - return None - internal = dict() - internal['task_started'] = datetime.utcfromtimestamp( - event['timestamp'] - ) - # Update run document in database - try: - self.update_run_document( - event=event, - state='RUNNING', - internal=internal, - task_started=datetime.utcfromtimestamp( - event['timestamp'] - ).strftime(strf), - ) - except Exception as e: - logger.exception( - ( - 'Database error. Could not update log information for ' - "task '{task}'. Original error message: {type}: {msg}" - ).format( - task=event['uuid'], - type=type(e).__name__, - msg=e, - ) - ) - - def on_task_failed( - self, - event: Event, - ) -> None: - """Event handler for failed (system error) Celery tasks.""" - if not self.collection.find_one({'task_id': event['uuid']}): - return None - # Create dictionary for internal parameters - internal = dict() - internal['task_finished'] = datetime.utcfromtimestamp( - event['timestamp'] - ) - internal['traceback'] = event['traceback'] - - # Update run document in databse - self.update_run_document( - event=event, - state='SYSTEM_ERROR', - internal=internal, - task_finished=datetime.utcfromtimestamp( - event['timestamp'] - ).strftime(strf), - exception=event['exception'], - ) - - def on_task_succeeded( - self, - event: Event, - ) -> None: - """Event handler for successful, failed and canceled Celery - tasks.""" - if not self.collection.find_one({'task_id': event['uuid']}): - return None - # Parse subprocess results - try: - (returncode, log, tes_ids, token) = literal_eval(event['result']) - log_list=log - log = os.linesep.join(log) - except Exception as e: - logger.exception( - ( - "Field 'result' in event message malformed. Original " - 'error message: {type}: {msg}' - ).format( - type=type(e).__name__, - msg=e, - ) - ) - pass - - # Create dictionary for internal parameters - internal = dict() - internal['task_finished'] = datetime.utcfromtimestamp( - event['timestamp'] - ) - - # Set final state to be set - document = self.collection.find_one( - filter={'task_id': event['uuid']}, - projection={ - 'api.state': True, - '_id': False, - } - ) - if document and document['api']['state'] == 'CANCELING': - state = 'CANCELED' - elif returncode: - state = 'EXECUTOR_ERROR' - else: - state = 'COMPLETE' - - # Extract run outputs - #outputs = self.__cwl_tes_outputs_parser(log) - outputs = self.__cwl_tes_outputs_parser_list(log_list) - - # Get task logs - task_logs = self.__get_tes_task_logs( - tes_ids=tes_ids, - token=token, - ) - - # Update run document in database - try: - self.update_run_document( - event=event, - state=state, - internal=internal, - outputs=outputs, - task_logs=task_logs, - task_finished=datetime.utcfromtimestamp( - event['timestamp'] - ).strftime(strf), - return_code=returncode, - stdout=log, - stderr='', - ) - except Exception as e: - logger.exception( - ( - 'Database error. Could not update log information for ' - "task '{task}'. Original error message: {type}: {msg}" - ).format( - task=event['uuid'], - type=type(e).__name__, - msg=e, - ) - ) - pass - - def on_task_tes_task_update( - self, - event: Event, - ) -> None: - """Event handler for TES task state changes.""" - # If TES task is new, add task log to database - if not event['tes_state']: - tes_log = self.__get_tes_task_log( - tes_id=event['tes_id'], - token=event['token'], - ) - try: - db_utils.append_to_tes_task_logs( - collection=self.collection, - task_id=event['uuid'], - tes_log=tes_log, - ) - except Exception as e: - logger.exception( - ( - 'Database error. Could not update log information for ' - "task '{task}'. Original error message: {type}: {msg}" - ).format( - task=event['uuid'], - type=type(e).__name__, - msg=e, - ) - ) - pass - - # Otherwise only update state - else: - try: - db_utils.update_tes_task_state( - collection=self.collection, - task_id=event['uuid'], - tes_id=event['tes_id'], - state=event['tes_state'], - ) - logger.info( - ( - "State of TES task '{tes_id}' of run with task ID " - "'{task_id}' changed to '{state}'." - ).format( - task_id=event['uuid'], - tes_id=event['tes_id'], - state=event['tes_state'], - ) - ) - except Exception as e: - logger.exception( - ( - 'Database error. Could not update log information for ' - "task '{task}'. Original error message: {type}: {msg}" - ).format( - task=event['uuid'], - type=type(e).__name__, - msg=e, - ) - ) - pass - - def update_run_document( - self, - event: Event, - state: Optional[str] = None, - internal: Optional[Dict] = None, - outputs: Optional[Dict] = None, - task_logs: Optional[List[Dict]] = None, - **run_log_params - ): - """Updates state, internal and run log parameters in database - document. - """ - # TODO: Minimize db ops; try to compile entire object & update once - # Update internal parameters - if internal: - document = db_utils.upsert_fields_in_root_object( - collection=self.collection, - task_id=event['uuid'], - root='internal', - **internal, - ) - - # Update outputs - if outputs: - document = db_utils.upsert_fields_in_root_object( - collection=self.collection, - task_id=event['uuid'], - root='api.outputs', - **outputs, - ) - - # Update task logs - if task_logs: - document = db_utils.upsert_fields_in_root_object( - collection=self.collection, - task_id=event['uuid'], - root='api', - task_logs=task_logs, - ) - - # Update run log parameters - if run_log_params: - document = db_utils.upsert_fields_in_root_object( - collection=self.collection, - task_id=event['uuid'], - root='api.run_log', - **run_log_params, - ) - - # Calculate queue, execution and run time - if document and document['internal']: - run_log = document['internal'] - durations = dict() - - if 'task_started' in run_log_params: - if 'task_started' in run_log and 'task_received' in run_log: - pass - durations['time_queue'] = ( - run_log['task_started'] - run_log['task_received'] - ).total_seconds() - - if 'task_finished' in run_log_params: - if 'task_finished' in run_log and 'task_started' in run_log: - pass - durations['time_execution'] = ( - run_log['task_finished'] - run_log['task_started'] - ).total_seconds() - if 'task_finished' in run_log and 'task_received' in run_log: - pass - durations['time_total'] = ( - run_log['task_finished'] - run_log['task_received'] - ).total_seconds() - - if durations: - document = db_utils.upsert_fields_in_root_object( - collection=self.collection, - task_id=event['uuid'], - root='api.run_log', - **durations, - ) - - # Update state - if state: - try: - document = db_utils.update_run_state( - collection=self.collection, - task_id=event['uuid'], - state=state, - ) - except Exception: - raise - - # Log info message - if document: - logger.info( - ( - "State of run '{run_id}' (task id: '{task_id}') changed " - "to '{state}'." - ).format( - run_id=document['run_id'], - task_id=event['uuid'], - state=state, - ) - ) - - return document - - @staticmethod - def __cwl_tes_outputs_parser(log: str) -> Dict: - """Parses outputs from cwl-tes log.""" - # Find outputs object in log string - re_outputs = re.compile( - r'(^\{$\n^ {4}"\S+": [\[\{]$\n(^ {4,}.*$\n)*^ {4}[\]\}]$\n^\}$\n)', - re.MULTILINE - ) - m = re_outputs.search(log) - if m: - return literal_eval(m.group(1)) - else: - return dict() - - @staticmethod - def __cwl_tes_outputs_parser_list(log: List) -> Dict: - """This function parses outputs from the cwl-tes log""" - """The outputs JSON starts at the line before last in the logs""" - """So unless the outputs are empty ({}), parse upward,""" - """until you find the beginning of the JSON containing the outputs""" - - indices=range(len(log)-1,-1,-1) - - start=-1 - end=-1 - for index in indices: - if log[index].rstrip()=='{}': - return dict() - elif log[index].rstrip()=='}': - end=index - break - - # No valid JSON was found and the previous loop - # reached the end of the log - if end==0: - return dict() - - indices=range(end-1,-1,-1) - for index in indices: - if log[index].rstrip()=='{': - start=index - break - - json=os.linesep.join(log[start:end+1]) - - try: - return literal_eval(json) - except ValueError as verr: - logger.exception( - "ValueError when evaluation JSON: '%s'. Original error message: %s" % \ - (json, verr) - ) - return dict() - except SyntaxError as serr: - logger.exception( - "SyntaxError when evaluation JSON: '%s'. Original error message: %s" % \ - (json, serr) - ) - return dict() - - def __get_tes_task_logs( - self, - tes_ids: List = list(), - token: Optional[str] = None, - ) -> List[Dict]: - """Gets multiple task logs from TES instance.""" - task_logs = list() - for tes_id in tes_ids: - task_logs.append( - self.__get_tes_task_log( - tes_id=tes_id, - token=token, - ) - ) - return task_logs - - def __get_tes_task_log( - self, - tes_id: str, - token: Optional[str] = None, - ) -> Dict: - """Gets task log from TES instance.""" - tes_client = tes.HTTPClient( - url=self.tes_config['url'], - timeout=self.tes_config['timeout'], - token=token, - ) - - task_log = {} - - try: - task_log = tes_client.get_task( - task_id=tes_id, - view=self.tes_config['query_params'], - ).as_dict() - except Exception as e: - # TODO: handle more robustly: only 400/Bad Request is okay; - # TODO: other errors (e.g. 500) should be dealt with - logger.warning( - "Could not obtain task log. Setting default. Original error " - f"message: {type(e).__name__}: {e}" - ) - task_log = {} - - logger.debug(f'Task log: {task_log}') - - return task_log diff --git a/cwl_wes/tasks/cwl_log_processor.py b/cwl_wes/tasks/cwl_log_processor.py new file mode 100644 index 0000000..3a46722 --- /dev/null +++ b/cwl_wes/tasks/cwl_log_processor.py @@ -0,0 +1,358 @@ +"""cwl-tes log parser executed on worker.""" + +from ast import literal_eval +import logging +import os +import re +from typing import Dict, List, Optional, Tuple + +from _io import TextIOWrapper +from pymongo.errors import PyMongoError +import tes + +import cwl_wes.utils.db as db_utils +from cwl_wes.worker import celery_app + +# Get logger instance +logger = logging.getLogger(__name__) + + +class CWLLogProcessor: + """cwl-tes log parser executed on worker. + + Args: + tes_config: TES configuration. + collection: MongoDB collection. + + Attributes: + tes_config: TES configuration. + collection: MongoDB collection. + """ + + def __init__(self, tes_config, collection) -> None: + """Construct class instance.""" + self.tes_config = tes_config + self.collection = collection + + def process_cwl_logs( + self, + task: celery_app.Task, + stream: TextIOWrapper, + token: Optional[str] = None, + ) -> Tuple[List, List]: + """Parse cwl-tes logs. + + Args: + task: Celery task instance. + stream: Combined STDOUT/STDERR stream. + token: OAuth2 token. + + Returns: + Tuple of lists containing the following: + - List of log lines. + - List of TES task IDs. + """ + stream_container: List = [] + tes_states: Dict = {} + + # Iterate over STDOUT/STDERR stream + for line in iter(stream.readline, ""): + + line = line.rstrip() + + # Replace single quote characters to avoid `literal_eval()` errors + line = line.replace("'", '"') + + # Handle special cases + lines = self.process_tes_log(line) + for processed_line in lines: + stream_container.append(processed_line) + logger.info(f"[{task}] {processed_line}") + continue + + # Detect TES task state changes + (tes_id, tes_state) = self.extract_tes_state(line) + if tes_id: + + # Handle new task + if tes_id not in tes_states: + tes_states[tes_id] = tes_state + self.capture_tes_task_update( + task, + tes_id=tes_id, + token=token, + ) + # Handle state change + elif tes_states[tes_id] != tes_state: + tes_states[tes_id] = tes_state + self.capture_tes_task_update( + task, + tes_id=tes_id, + tes_state=tes_state, + ) + logger.info(line) + continue + + stream_container.append(line) + logger.info(line) + + return (stream_container, list(tes_states.keys())) + + def process_tes_log(self, line: str) -> List[str]: + """Handle irregularities arising from log parsing. + + Args: + line: Log line. + + Returns: + List of log lines. + """ + lines: List = [] + + # Handle special case where FTP and cwl-tes logs are on same line + re_ftp_cwl_tes = re.compile( + r"^(\*cmd\* .*)(\[step \w*\] produced output \{)$" + ) + match = re_ftp_cwl_tes.match(line) + if match: + lines.append(match.group(1)) + + return lines + + def extract_tes_state( + self, + line: str, + ) -> Tuple[Optional[str], Optional[str]]: + """Extract task ID and state from cwl-tes log. + + Args: + line: Log line. + + Returns: + Tuple of task ID and state. + """ + task_id: Optional[str] = None + task_state: Optional[str] = None + + # Extract new task ID + re_task_new = re.compile(r"^\[job [\w\-]*\] task id: (\S*)$") + match = re_task_new.match(line) + if match: + task_id = match.group(1) + + # Extract task ID and state + re_task_state_poll = re.compile( + r'^\[job [\w\-]*\] POLLING "(\S*)", result: (\w*)' + ) + match = re_task_state_poll.match(line) + if match: + task_id = match.group(1) + task_state = match.group(2) + + return (task_id, task_state) + + def capture_tes_task_update( + self, + task: celery_app.Task, + tes_id: str, + tes_state: Optional[str] = None, + token: Optional[str] = None, + ) -> None: + """Handle TES task state change events. + + Args: + task: Celery task instance. + tes_id: TES task ID. + tes_state: TES task state. + token: OAuth2 token. + """ + # If TES task is new, add task log to database + logger.info(f"TES_STATE------------->{tes_state}") + cwl_tes_processor = CWLTesProcessor(tes_config=self.tes_config) + if not tes_state: + tes_log = cwl_tes_processor.get_tes_task_log( + tes_id=tes_id, + token=token, + ) + logger.info(f"LOG------------->{tes_log}") + try: + db_utils.append_to_tes_task_logs( + collection=self.collection, + task_id=task.task_id, + tes_log=tes_log, + ) + except PyMongoError as exc: + logger.exception( + "Database error. Could not update log information for" + f" task '{task.task_id}'. Original error message:" + f" {type(exc).__name__}: {exc}" + ) + + # Otherwise only update state + else: + try: + db_utils.update_tes_task_state( + collection=self.collection, + task_id=task.task_id, + tes_id=tes_id, + state=tes_state, + ) + logger.info( + f"State of TES task '{tes_id}' of run with task ID " + f"'{task.task_id}' changed to '{tes_state}'." + ) + except PyMongoError as exc: + logger.exception( + "Database error. Could not update log information for" + f" task '{task.task_id}'. Original error message:" + f" {type(exc).__name__}: {exc}" + ) + + +class CWLTesProcessor: + """Class for processing cwl-tes logs. + + Args: + tes_config: TES configuration. + + Attributes: + tes_config: TES configuration. + """ + + def __init__(self, tes_config) -> None: + """Construct class instance.""" + self.tes_config = tes_config + + @staticmethod + def cwl_tes_outputs_parser(log: str) -> Dict: + """Parse outputs from cwl-tes log. + + Args: + log: cwl-tes log. + + Returns: + Outputs dictionary. + """ + re_outputs = re.compile( + r'(^\{$\n^ {4}"\S+": [\[\{]$\n(^ {4,}.*$\n)*^ {4}[\]\}]$\n^\}$\n)', + re.MULTILINE, + ) + match = re_outputs.search(log) + if match: + return literal_eval(match.group(1)) + return {} + + @staticmethod + def cwl_tes_outputs_parser_list(log: List) -> Dict: + """Parse outputs from cwl-tes log. + + The outputs JSON starts at the line before last in the logs. So unless + the outputs are empty ({}), parse upward, until you find the beginning + of the JSON containing the outputs. + + Args: + log: cwl-tes log. + + Returns: + Outputs dictionary. + """ + indices = range(len(log) - 1, -1, -1) + + start = -1 + end = -1 + for index in indices: + if log[index].rstrip() == "{}": + return {} + if log[index].rstrip() == "}": + end = index + break + + # No valid JSON was found and the previous loop + # reached the end of the log + if end == 0: + return {} + + indices = range(end - 1, -1, -1) + for index in indices: + if log[index].rstrip() == "{": + start = index + break + + json = os.linesep.join(log[start : end + 1]) # noqa: E203 + + try: + return literal_eval(json) + except ValueError as exc: + logger.exception( + f"ValueError when evaluation JSON: {json}. Original error" + f" message: {exc}" + ) + return {} + except SyntaxError as exc: + logger.exception( + f"SyntaxError when evaluation JSON: {json}. Original error" + f" message: {exc}" + ) + return {} + + def get_tes_task_logs( + self, + tes_ids: List, + token: Optional[str] = None, + ) -> List[Dict]: + """Get multiple task logs from TES instance. + + Args: + tes_ids: TES task IDs. + token: OAuth2 token. + + Returns: + Task logs. + """ + task_logs = [] + for tes_id in tes_ids: + task_logs.append( + self.get_tes_task_log( + tes_id=tes_id, + token=token, + ) + ) + return task_logs + + def get_tes_task_log( + self, + tes_id: str, + token: Optional[str] = None, + ) -> Dict: + """Get single task log from TES instance. + + Args: + tes_id: TES task ID. + token: OAuth2 token. + + Returns: + Task log. + """ + tes_client = tes.HTTPClient( + url=self.tes_config["url"], + timeout=self.tes_config["timeout"], + token=token, + ) + + task_log = {} + + try: + task_log = tes_client.get_task( + task_id=tes_id, + view=self.tes_config["query_params"], + ).as_dict() + except Exception as exc: # pylint: disable=broad-except + logger.warning( + "Could not obtain task log. Setting default. Original error " + f"message: {type(exc).__name__}: {exc}" + ) + task_log = {} + + logger.debug(f"Task log: {task_log}") + + return task_log diff --git a/cwl_wes/tasks/register_celery.py b/cwl_wes/tasks/register_celery.py deleted file mode 100644 index 9400cfa..0000000 --- a/cwl_wes/tasks/register_celery.py +++ /dev/null @@ -1,40 +0,0 @@ -"""Function to create Celery app instance and register task monitor.""" - -from flask import Flask -import logging -import os - -from cwl_wes.factories.celery_app import create_celery_app -from cwl_wes.tasks.celery_task_monitor import TaskMonitor - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def register_task_service(app: Flask) -> None: - """Instantiates Celery app and registers task monitor.""" - # Ensure that code is executed only once when app reloader is used - if os.environ.get("WERKZEUG_RUN_MAIN") != 'true': - - # Instantiate Celery app instance - celery_app = create_celery_app(app) - - # Start task monitor daemon - TaskMonitor( - celery_app=celery_app, - collection=app.config['database']['collections']['runs'], - tes_config={ - 'url': - app.config['tes']['url'], - 'query_params': - app.config['tes']['status_query_params'], - 'timeout': - app.config['tes']['timeout'] - }, - timeout=app.config['celery']['monitor']['timeout'], - authorization=app.config['security']['authorization_required'], - ) - logger.info('Celery task monitor registered.') - - return None diff --git a/cwl_wes/tasks/run_workflow.py b/cwl_wes/tasks/run_workflow.py new file mode 100644 index 0000000..e567732 --- /dev/null +++ b/cwl_wes/tasks/run_workflow.py @@ -0,0 +1,31 @@ +"""Celery background task to start workflow run.""" + +import logging +from typing import List, Optional + +from cwl_wes.worker import celery_app +from cwl_wes.tasks.workflow_run_manager import WorkflowRunManager + + +# Get logger instance +logger = logging.getLogger(__name__) + + +@celery_app.task( + name="tasks.run_workflow", + bind=True, + ignore_result=True, + track_started=True, +) +def task__run_workflow( + self, + command_list: List, + tmp_dir: str, + token: Optional[str] = None, +) -> None: + """Add workflow run to task queue.""" + # Execute task in background + workflow_run_manager = WorkflowRunManager( + task=self, command_list=command_list, tmp_dir=tmp_dir, token=token + ) + workflow_run_manager.run_workflow() diff --git a/cwl_wes/tasks/tasks/__init__.py b/cwl_wes/tasks/tasks/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cwl_wes/tasks/tasks/run_workflow.py b/cwl_wes/tasks/tasks/run_workflow.py deleted file mode 100644 index c674310..0000000 --- a/cwl_wes/tasks/tasks/run_workflow.py +++ /dev/null @@ -1,157 +0,0 @@ -"""Celery background task to start workflow run.""" - -from _io import TextIOWrapper -import logging -import re -import subprocess -from typing import (Dict, List, Optional, Tuple) - -from cwl_wes.celery_worker import celery - - -# Get logger instance -logger = logging.getLogger(__name__) - - -@celery.task( - name='tasks.run_workflow', - bind=True, - ignore_result=True, - track_started=True -) -def task__run_workflow( - self, - command_list: List, - tmp_dir: str, - token: Optional[str] = None, -) -> Tuple[int, List[str], List[str], Optional[str]]: - """Adds workflow run to task queue.""" - # Execute task in background - proc = subprocess.Popen( - command_list, - cwd=tmp_dir, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - universal_newlines=True, - ) - # Parse output in real-time - log, tes_ids = __process_cwl_logs( - self, - stream=proc.stdout, - token=token, - ) - - returncode = proc.wait() - - return (returncode, log, tes_ids, token) - - -def __process_cwl_logs( - task: celery.Task, - stream: TextIOWrapper, - token: Optional[str] = None, -) -> Tuple[List, List]: - """Parses combinend cwl-tes STDOUT/STDERR and sends TES task IDs and state - updates to broker.""" - stream_container: List = list() - tes_states: Dict = dict() - - # Iterate over STDOUT/STDERR stream - for line in iter(stream.readline, ''): - - line = line.rstrip() - - # Replace single quote characters to avoid `literal_eval()` errors - line = line.replace("'", '"') - - # Handle special cases - lines = __handle_cwl_tes_log_irregularities(line) - for line in lines: - stream_container.append(line) - logger.info(f"[{task}] {line}") - continue - - # Detect TES task state changes - (tes_id, tes_state) = __extract_tes_task_state_from_cwl_tes_log(line) - if tes_id: - - # Handle new task - if tes_id not in tes_states: - tes_states[tes_id] = tes_state - __send_event_tes_task_update( - task, - tes_id=tes_id, - token=token, - ) - # Handle state change - elif tes_states[tes_id] != tes_state: - tes_states[tes_id] = tes_state - __send_event_tes_task_update( - task, - tes_id=tes_id, - tes_state=tes_state, - ) - logger.info(line) - continue - - stream_container.append(line) - logger.info(line) - - return (stream_container, list(tes_states.keys())) - - -def __handle_cwl_tes_log_irregularities(line: str) -> List[str]: - """Handles irregularities arising from log parsing.""" - lines: List = list() - - # Handle special case where FTP and cwl-tes logs are on same line - re_ftp_cwl_tes = re.compile( - r'^(\*cmd\* .*)(\[step \w*\] produced output \{)$' - ) - m = re_ftp_cwl_tes.match(line) - if m: - lines.append(m.group(1)) - - return lines - - -def __extract_tes_task_state_from_cwl_tes_log( - line: str, -) -> Tuple[Optional[str], Optional[str]]: - """Extracts task ID and state from cwl-tes log.""" - task_id: Optional[str] = None - task_state: Optional[str] = None - - # Extract new task ID - re_task_new = re.compile(r"^\[job [\w\-]*\] task id: (\S*)$") - m = re_task_new.match(line) - if m: - task_id = m.group(1) - - # Extract task ID and state - re_task_state_poll = re.compile( - r'^\[job [\w\-]*\] POLLING "(\S*)", result: (\w*)' - ) - m = re_task_state_poll.match(line) - if m: - task_id = m.group(1) - task_state = m.group(2) - - return (task_id, task_state) - - -def __send_event_tes_task_update( - task: celery.Task, - tes_id: str, - tes_state: Optional[str] = None, - token: Optional[str] = None, -) -> None: - """Sends custom event to inform about TES task state change.""" - task.send_event( - 'task-tes-task-update', - tes_id=tes_id, - tes_state=tes_state, - token=token, - ) - - return None diff --git a/cwl_wes/tasks/utils.py b/cwl_wes/tasks/utils.py deleted file mode 100644 index 6788f49..0000000 --- a/cwl_wes/tasks/utils.py +++ /dev/null @@ -1,64 +0,0 @@ -"""Utility functions for Celery background tasks.""" - -import logging -from typing import Optional - -from pymongo import collection as Collection - -import cwl_wes.database.db_utils as db_utils - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def set_run_state( - collection: Collection, - run_id: str, - task_id: Optional[str] = None, - state: str = 'UNKNOWN', -): - """Set/update state of run associated with Celery task.""" - if not task_id: - document = collection.find_one( - filter={'run_id': run_id}, - projection={ - 'task_id': True, - '_id': False, - } - ) - _task_id = document['task_id'] - else: - _task_id = task_id - try: - document = db_utils.update_run_state( - collection=collection, - task_id=_task_id, - state=state, - ) - except Exception as e: - logger.exception( - ( - "Database error. Could not update state of run '{run_id}' " - "(task id: '{task_id}') to state '{state}'. Original error " - "message: {type}: {msg}" - ).format( - run_id=run_id, - task_id=_task_id, - state=state, - type=type(e).__name__, - msg=e, - ) - ) - finally: - if document: - logger.info( - ( - "State of run '{run_id}' (task id: '{task_id}') " - "changed to '{state}'." - ).format( - run_id=run_id, - task_id=_task_id, - state=state, - ) - ) diff --git a/cwl_wes/tasks/workflow_run_manager.py b/cwl_wes/tasks/workflow_run_manager.py new file mode 100644 index 0000000..97b7751 --- /dev/null +++ b/cwl_wes/tasks/workflow_run_manager.py @@ -0,0 +1,376 @@ +"""Workflow run manager executed on worker.""" + +from datetime import datetime +import logging +import os +import subprocess +import time +from typing import Dict, List, Optional + +from foca.models.config import Config +from pymongo.errors import PyMongoError + +from cwl_wes.tasks.cwl_log_processor import CWLLogProcessor, CWLTesProcessor +import cwl_wes.utils.db as db_utils +from cwl_wes.worker import celery_app + +# Get logger instance +logger = logging.getLogger(__name__) + + +class WorkflowRunManager: # pylint: disable=too-many-instance-attributes + """Workflow run manager.""" + + def __init__( + self, + command_list: List, + task: celery_app.Task, + tmp_dir: str, + token: Optional[str] = None, + ) -> None: + """Initiate workflow run manager instance. + + Args: + task: Celery task instance for initiating workflow run. + task_id: Unique identifier for workflow run task. + command_list: List of commands to be executed as a part of workflow + run. + tmp_dir: Current working directory to be passed for child process + execution context. + token: JSON Web Token (JWT). + foca_config: :py:class:`foca.models.config.Config` instance + describing configurations registered with `celery_app`. + custom_config: :py:class:`cwl_wes.custom_config.CustomConfig` + instance describing custom configuration model for cwl-WES + specific configurations. + collection: Collection client for saving task run progress. + tes_config: TES (Task Execution Service) endpoint configurations. + authorization: Boolean to define the security auth configuration + for the app. + string_format: String time format for task timestamps. + + Attributes: + task: Celery task instance for initiating workflow run. + task_id: Unique identifier for workflow run task. + command_list: List of commands to be executed as a part of workflow + run. + tmp_dir: Current working directory to be passed for child process + execution context. + token: JSON Web Token (JWT). + foca_config: :py:class:`foca.models.config.Config` instance + describing configurations registered with `celery_app`. + custom_config: :py:class:`cwl_wes.custom_config.CustomConfig` + instance describing custom configuration model for cwl-WES + specific configurations. + collection: Collection client for saving task run progress. + tes_config: TES (Task Execution Service) endpoint configurations. + authorization: Boolean to define the security auth configuration + for the app. + string_format: String time format for task timestamps. + """ + self.task = task + self.task_id = self.task.request.id + self.command_list = command_list + self.tmp_dir = tmp_dir + self.token = token + self.foca_config: Config = celery_app.conf.foca + self.controller_config = self.foca_config.custom.controller + self.collection = ( + self.foca_config.db.dbs["cwl-wes-db"].collections["runs"].client + ) + self.tes_config = { + "url": self.controller_config.tes_server.url, + "query_params": ( + self.controller_config.tes_server.status_query_params + ), + "timeout": self.controller_config.tes_server.timeout, + } + self.authorization = self.foca_config.security.auth.required + self.string_format: str = "%Y-%m-%d %H:%M:%S.%f" + + def trigger_task_start_events(self) -> None: + """Trigger task start events.""" + if not self.collection.find_one({"task_id": self.task.request.id}): + return + internal = {} + current_ts = time.time() + internal["task_started"] = datetime.utcfromtimestamp(current_ts) + # Update run document in database + try: + self.update_run_document( + state="RUNNING", + internal=internal, + task_started=datetime.utcfromtimestamp(current_ts).strftime( + self.string_format + ), + ) + except PyMongoError as exc: + logger.exception( + "Database error. Could not update log information for task" + f" '{self.task_id}'. Original error message:" + f" {type(exc).__name__}: {exc}" + ) + raise + + def trigger_task_failure_events(self, task_end_ts): + """Trigger task failure events. + + Args: + task_end_ts: Task end timestamp. + """ + if not self.collection.find_one({"task_id": self.task_id}): + return + + # Create dictionary for internal parameters + internal = {} + internal["task_finished"] = datetime.utcfromtimestamp(task_end_ts) + task_meta_data = celery_app.AsyncResult(id=self.task_id) + internal["traceback"] = task_meta_data.traceback + + # Update run document in databse + self.update_run_document( + state="SYSTEM_ERROR", + internal=internal, + task_finished=datetime.utcfromtimestamp(task_end_ts).strftime( + self.string_format + ), + exception=task_meta_data.result, + ) + + def trigger_task_success_events( # pylint: disable=too-many-arguments + self, + returncode: int, + log: str, + tes_ids: List[str], + token: str, + task_end_ts: float, + ) -> None: + """Trigger task success events. + + Args: + returncode: Task completion status code. + log: Task run log. + tes_ids: TES task identifiers. + token: TES token. + task_end_ts: Task end timestamp. + """ + if not self.collection.find_one({"task_id": self.task_id}): + return + + # Parse subprocess results + log_list = log + log = os.linesep.join(log) + + # Create dictionary for internal parameters + internal = {} + internal["task_finished"] = datetime.utcfromtimestamp(task_end_ts) + + # Set final state to be set + document = self.collection.find_one( + filter={"task_id": self.task_id}, + projection={ + "api.state": True, + "_id": False, + }, + ) + if document and document["api"]["state"] == "CANCELING": + state = "CANCELED" + elif returncode: + state = "EXECUTOR_ERROR" + else: + state = "COMPLETE" + + # Extract run outputs + cwl_tes_processor = CWLTesProcessor(tes_config=self.tes_config) + outputs = cwl_tes_processor.cwl_tes_outputs_parser_list(log=log_list) + + # Get task logs + task_logs = cwl_tes_processor.get_tes_task_logs( + tes_ids=tes_ids, + token=token, + ) + + # Update run document in database + try: + self.update_run_document( + state=state, + internal=internal, + outputs=outputs, + task_logs=task_logs, + task_finished=datetime.utcfromtimestamp(task_end_ts).strftime( + self.string_format + ), + return_code=returncode, + stdout=log, + stderr="", + ) + except PyMongoError as exc: + logger.exception( + "Database error. Could not update log information for task" + f" '{self.task_id}'. Original error message:" + f" {type(exc).__name__}: {exc}" + ) + raise + + def trigger_task_end_events( + self, + returncode: int, + log: str, + tes_ids: List[str], + token: str, + ) -> None: + """Trigger task completion events. + + Args: + returncode: Task completion status code. + log: Task run log. + tes_ids: TES task identifiers. + token: TES token. + task_end_ts: Task end timestamp. + """ + task_end_ts = time.time() + if returncode == 0: + self.trigger_task_success_events( + log=log, + tes_ids=tes_ids, + token=token, + task_end_ts=task_end_ts, + returncode=returncode, + ) + else: + self.trigger_task_failure_events(task_end_ts=task_end_ts) + + def update_run_document( # pylint: disable=too-many-branches + self, + state: Optional[str] = None, + internal: Optional[Dict] = None, + outputs: Optional[Dict] = None, + task_logs: Optional[List[Dict]] = None, + **run_log_params, + ): + """Update run document. + + Specifically, update state, internal and run log parameters in database + document. + + Args: + state: Task state. + internal: Task specific internal parameters. + outputs: Task specific output parameters. + task_logs: Task run logs. + **run_log_params: Run log parameters. + """ + # TODO: Minimize db ops; try to compile entire object & update once + # Update internal parameters + if internal: + document = db_utils.upsert_fields_in_root_object( + collection=self.collection, + task_id=self.task_id, + root="internal", + **internal, + ) + + # Update outputs + if outputs: + document = db_utils.upsert_fields_in_root_object( + collection=self.collection, + task_id=self.task_id, + root="api.outputs", + **outputs, + ) + + # Update task logs + if task_logs: + document = db_utils.upsert_fields_in_root_object( + collection=self.collection, + task_id=self.task_id, + root="api", + task_logs=task_logs, + ) + + # Update run log parameters + if run_log_params: + document = db_utils.upsert_fields_in_root_object( + collection=self.collection, + task_id=self.task_id, + root="api.run_log", + **run_log_params, + ) + + # Calculate queue, execution and run time + if document and document["internal"]: + run_log = document["internal"] + durations = {} + + if "task_started" in run_log_params: + if "task_started" in run_log and "task_received" in run_log: + durations["time_queue"] = ( + run_log["task_started"] - run_log["task_received"] + ).total_seconds() + + if "task_finished" in run_log_params: + if "task_finished" in run_log and "task_started" in run_log: + durations["time_execution"] = ( + run_log["task_finished"] - run_log["task_started"] + ).total_seconds() + if "task_finished" in run_log and "task_received" in run_log: + durations["time_total"] = ( + run_log["task_finished"] - run_log["task_received"] + ).total_seconds() + + if durations: + document = db_utils.upsert_fields_in_root_object( + collection=self.collection, + task_id=self.task_id, + root="api.run_log", + **durations, + ) + + # Update state + if state: + try: + document = db_utils.update_run_state( + collection=self.collection, + task_id=self.task_id, + state=state, + ) + except PyMongoError as exc: + logger.exception( + "Database error. Could not update log information for task" + f" '{self.task_id}'. Original error message:" + f" {type(exc).__name__}: {exc}" + ) + raise + + # Log info message + if document: + logger.info( + f"State of run '{document['run_id']}' (task id:" + f" '{self.task_id}') changed to '{state}'." + ) + + return document + + def run_workflow(self): + """Initiate workflow run.""" + self.trigger_task_start_events() + proc = subprocess.Popen( # pylint: disable=consider-using-with + self.command_list, + cwd=self.tmp_dir, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True, + ) + # Parse output in real-time + cwl_log_processor = CWLLogProcessor( + tes_config=self.tes_config, collection=self.collection + ) + log, tes_ids = cwl_log_processor.process_cwl_logs( + self.task, + stream=proc.stdout, + token=self.token, + ) + returncode = proc.wait() + self.trigger_task_end_events( + token=self.token, returncode=returncode, log=log, tes_ids=tes_ids + ) diff --git a/cwl_wes/utils/__init__.py b/cwl_wes/utils/__init__.py new file mode 100644 index 0000000..5bf53ce --- /dev/null +++ b/cwl_wes/utils/__init__.py @@ -0,0 +1 @@ +"""cwl-WES utilities.""" diff --git a/cwl_wes/utils/controllers.py b/cwl_wes/utils/controllers.py new file mode 100644 index 0000000..09a98b0 --- /dev/null +++ b/cwl_wes/utils/controllers.py @@ -0,0 +1,50 @@ +"""Controller utilities.""" + +import logging +from typing import Dict, Optional + +from connexion.exceptions import Forbidden +from flask import Config +from pymongo.collection import Collection + +from cwl_wes.exceptions import WorkflowNotFound + +logger = logging.getLogger(__name__) + + +def get_document_if_allowed( + config: Config, + run_id: str, + projection: Dict, + user_id: Optional[str], +) -> Dict: + """Get document from database, if allowed. + + Args: + config: Flask configuration object. + run_id: Workflow run ID. + projection: Projection for database query. + user_id: User ID. + + Raises: + WorkflowNotFound: If workflow run is not found. + Forbidden: If user is not allowed to access workflow run. + + Returns: + Document from database. + """ + collection_runs: Collection = ( + config.foca.db.dbs["cwl-wes-db"].collections["runs"].client + ) + document = collection_runs.find_one( + filter={"run_id": run_id}, + projection=projection, + ) + + if document is None: + raise WorkflowNotFound + + if document["user_id"] != user_id: + raise Forbidden + + return document diff --git a/cwl_wes/utils/db.py b/cwl_wes/utils/db.py new file mode 100644 index 0000000..65c3863 --- /dev/null +++ b/cwl_wes/utils/db.py @@ -0,0 +1,178 @@ +"""Utility functions for database access.""" + +import logging +from typing import Any, List, Mapping, Optional + +from bson.objectid import ObjectId +from pymongo import collection as Collection +from pymongo.collection import ReturnDocument +from pymongo.errors import PyMongoError + +# Get logger instance +logger = logging.getLogger(__name__) + + +def update_run_state( + collection: Collection, task_id: str, state: str = "UNKNOWN" +) -> Optional[Mapping[Any, Any]]: + """Update state of workflow run and returns document.""" + return collection.find_one_and_update( + {"task_id": task_id}, + {"$set": {"api.state": state}}, + return_document=ReturnDocument.AFTER, + ) + + +def upsert_fields_in_root_object( + collection: Collection, task_id: str, root: str, **kwargs +) -> Optional[Mapping[Any, Any]]: + """Insert or update fields in(to) the same root (object) field. + + Args: + collection: MongoDB collection. + task_id: Task identifier of workflow run. + root: Root field name. + **kwargs: Key-value pairs of fields to insert/update. + + Returns: + Inserted/updated document, or `None` if database operation failed. + """ + return collection.find_one_and_update( + {"task_id": task_id}, + { + "$set": { + ".".join([root, key]): value for (key, value) in kwargs.items() + } + }, + return_document=ReturnDocument.AFTER, + ) + + +def update_tes_task_state( + collection: Collection, task_id: str, tes_id: str, state: str +) -> Optional[Mapping[Any, Any]]: + """Update field 'state' in TES task log and return updated document. + + Args: + collection: MongoDB collection. + task_id: Task identifier of workflow run. + tes_id: Identifier of TES task. + state: New state of TES task. + + Returns: + Updated document, or `None` if database operation failed. + """ + return collection.find_one_and_update( + {"task_id": task_id, "api.task_logs": {"$elemMatch": {"id": tes_id}}}, + {"$set": {"api.task_logs.$.state": state}}, + return_document=ReturnDocument.AFTER, + ) + + +def append_to_tes_task_logs( + collection: Collection, + task_id: str, + tes_log: Mapping, +) -> Optional[Mapping[Any, Any]]: + """Append task log to TES task logs. + + Args: + collection: MongoDB collection. + task_id: Task identifier of workflow run. + tes_log: Task log to append. + + Returns: + Updated document, or `None` if database operation failed. + """ + return collection.find_one_and_update( + {"task_id": task_id}, + {"$push": {"api.task_logs": tes_log}}, + return_document=ReturnDocument.AFTER, + ) + + +def find_tes_task_ids(collection: Collection, run_id: str) -> List: + """Get list of TES task ids associated with a run of interest. + + Args: + collection: MongoDB collection. + run_id: Run identifier. + + Returns: + List of TES task ids. + """ + return collection.distinct("api.task_logs.id", {"run_id": run_id}) + + +def set_run_state( + collection: Collection, + run_id: str, + task_id: Optional[str] = None, + state: str = "UNKNOWN", +) -> None: + """Set/update state of run associated with Celery task. + + Args: + collection: MongoDB collection. + run_id: Run identifier. + task_id: Task identifier of workflow run. + state: New state of workflow run. + """ + if not task_id: + document = collection.find_one( + filter={"run_id": run_id}, + projection={ + "task_id": True, + "_id": False, + }, + ) + _task_id = document["task_id"] + else: + _task_id = task_id + try: + document = update_run_state( + collection=collection, + task_id=_task_id, + state=state, + ) + except PyMongoError as exc: + logger.exception( + f"Database error. Could not update state of run '{run_id}' (task " + f"id: '{task_id}') to state '{state}'. Original error message: " + f"{type(exc).__name__}: {exc}" + ) + finally: + if document: + logger.info( + f"State of run '{run_id}' (task id: '{task_id}') changed to: " + f"{state}." + ) + + +def find_one_latest(collection: Collection) -> Optional[Mapping[Any, Any]]: + """Find newest object. + + Returns: + Object stripped of object id, or `None` if no object exists. + """ + try: + return ( + collection.find({}, {"_id": False}) + .sort([("_id", -1)]) + .limit(1) + .next() + ) + except StopIteration: + return None + + +def find_id_latest(collection: Collection) -> Optional[ObjectId]: + """Find identifier of newest object. + + Returns: + Object identifier, or `None` if no object exists. + """ + try: + return collection.find().sort([("_id", -1)]).limit(1).next()["_id"] + except StopIteration: + return None diff --git a/cwl_wes/ga4gh/wes/endpoints/utils/drs.py b/cwl_wes/utils/drs.py similarity index 79% rename from cwl_wes/ga4gh/wes/endpoints/utils/drs.py rename to cwl_wes/utils/drs.py index 8e15d23..cac98ef 100644 --- a/cwl_wes/ga4gh/wes/endpoints/utils/drs.py +++ b/cwl_wes/utils/drs.py @@ -5,18 +5,18 @@ import logging import os import re -from requests.exceptions import ConnectionError import sys -from typing import (Iterator, List, Match, Optional) +from typing import Iterator, List, Match, Optional from drs_cli.client import DRSClient -from drs_cli.errors import (InvalidResponseError, InvalidURI) +from drs_cli.errors import InvalidResponseError, InvalidURI from drs_cli.models import Error from werkzeug.exceptions import ( BadRequest, InternalServerError, ) +# pragma pylint: disable=too-many-arguments # Get logger instance logger = logging.getLogger(__name__) @@ -30,8 +30,10 @@ def translate_drs_uris( base_path: Optional[str] = None, use_http: bool = False, ) -> None: - """Replace hostname-based DRS URIs with access links either in a file or, - recursively, in all files of a directory. + """Replace hostname-based DRS URIs with access links. + + Replacement takes place either in a file or, recursively, in all files of a + directory. For hostname-based DRS URIs, cf. https://ga4gh.github.io/data-repository-service-schemas/preview/develop/docs/#_hostname_based_drs_uris @@ -51,16 +53,20 @@ def translate_drs_uris( documentation/specification. """ # define regex for identifying DRS URIs - _RE_DOMAIN_PART = r'[a-z0-9]([a-z0-9-]{1,61}[a-z0-9]?)?' - _RE_DOMAIN = rf"({_RE_DOMAIN_PART}\.)+{_RE_DOMAIN_PART}\.?" - _RE_OBJECT_ID = rf"(?Pdrs:\/\/{_RE_DOMAIN}\/\S+)" + re_domain_part = r"[a-z0-9]([a-z0-9-]{1,61}[a-z0-9]?)?" + re_domain = rf"({re_domain_part}\.)+{re_domain_part}\.?" + re_object_id = rf"(?Pdrs:\/\/{re_domain}\/\S+)" # get absolute paths of file or directory (including subdirectories) logger.debug(f"Collecting file(s) for provided path '{path}'...") - files = abs_paths( - dir=path, - file_ext=file_types, - ) if os.path.isdir(path) else [path] + files = ( + abs_paths( + root_dir=path, + file_ext=file_types, + ) + if os.path.isdir(path) + else [path] + ) # replace any DRS URIs in any file in place for _file in files: @@ -69,10 +75,10 @@ def translate_drs_uris( for line in _f: sys.stdout.write( re.sub( - pattern=_RE_OBJECT_ID, + pattern=re_object_id, repl=partial( get_replacement_string, - ref='drs_uri', + ref="drs_uri", supported_access_methods=supported_access_methods, port=port, base_path=base_path, @@ -84,20 +90,20 @@ def translate_drs_uris( def abs_paths( - dir: str, + root_dir: str, file_ext: List[str], ) -> Iterator[str]: - """Yields absolute paths of files with the indicated file extensions in - specified directory and subdirectories. + """Get absolute paths of files in directory and subdirectories. Arguments: dir: Directory to search files in. - file_ext: List of file extensions for files to return. + file_ext: Limit results to files having either of the indicated + extensions. Returns: Generator yielding absolute file paths. """ - for dirpath, _, files in os.walk(dir): + for dirpath, _, files in os.walk(root_dir): for _file in files: if _file.endswith(tuple(file_ext)): yield os.path.abspath(os.path.join(dirpath, _file)) @@ -111,7 +117,7 @@ def get_replacement_string( base_path: Optional[str] = None, use_http: bool = False, ) -> str: - """Helper function to get string replacement string. + """Get string replacement string helper function. Args: match: Match object from `re.sub()` call @@ -146,7 +152,8 @@ def get_access_url_from_drs( base_path: Optional[str] = None, use_http: bool = False, ) -> str: - """ + """Get access URL from DRS URI. + Arguments: drs_uri: A DRS URI pointing to a DRS object. supported_access_methods: List of access methods/file transfer @@ -178,38 +185,35 @@ def get_access_url_from_drs( base_path=base_path, use_http=use_http, ) - except InvalidURI: + except InvalidURI as exc: logger.error(f"The provided DRS URI '{drs_uri}' is invalid.") - raise BadRequest + raise BadRequest from exc # get DRS object try: - object = client.get_object( - object_id=drs_uri - ) - except (ConnectionError, InvalidResponseError): + obj = client.get_object(object_id=drs_uri) + except (ConnectionError, InvalidResponseError) as exc: logger.error(f"Could not connect to DRS host for DRS URI '{drs_uri}'.") - raise InternalServerError - if isinstance(object, Error): - if object.status_code == 404: + raise InternalServerError from exc + if isinstance(obj, Error): + if obj.status_code == 404: logger.error(f"Could not access DRS host for DRS URI '{drs_uri}'.") raise BadRequest - # TODO: handle 401 & 403 - else: - logger.error(f"DRS returned error: {object}'.") - raise InternalServerError + logger.error(f"DRS returned error: {obj}'.") + raise InternalServerError # get access methods and access method types/protocols - available_methods = object.access_methods + available_methods: Optional[List] = obj.access_methods + assert available_methods is not None available_types = [m.type.value for m in available_methods] # iterate through supported methods by order of preference - # TODO: add support for access URL headers for supported_method in supported_access_methods: try: access_url = str( - available_methods - [available_types.index(supported_method)].access_url.url + available_methods[ + available_types.index(supported_method) + ].access_url.url ) logger.info( f"Resolved DRS URI '{drs_uri}' to access link '{access_url}'." diff --git a/cwl_wes/version.py b/cwl_wes/version.py new file mode 100644 index 0000000..18e5ee6 --- /dev/null +++ b/cwl_wes/version.py @@ -0,0 +1,3 @@ +"""Single source of truth for package version.""" + +__version__ = "0.16.0" diff --git a/cwl_wes/worker.py b/cwl_wes/worker.py new file mode 100644 index 0000000..2d8e301 --- /dev/null +++ b/cwl_wes/worker.py @@ -0,0 +1,9 @@ +"""Celery worker entry point.""" + +from foca.foca import Foca + +foca = Foca( + config_file="config.yaml", + custom_config_model="cwl_wes.custom_config.CustomConfig", +) +celery_app = foca.create_celery_app() diff --git a/cwl_wes/wsgi.py b/cwl_wes/wsgi.py index 4c5ade2..10b4251 100644 --- a/cwl_wes/wsgi.py +++ b/cwl_wes/wsgi.py @@ -1,3 +1,5 @@ -from cwl_wes.app import run_server +"""WSGI entry point.""" -app, config = run_server() +from cwl_wes.app import init_app + +app = init_app() diff --git a/deployment/templates/wes/wes-deployment.yaml b/deployment/templates/wes/wes-deployment.yaml index 93ca230..1a2ac37 100644 --- a/deployment/templates/wes/wes-deployment.yaml +++ b/deployment/templates/wes/wes-deployment.yaml @@ -26,7 +26,7 @@ spec: imagePullPolicy: Always workingDir: '/app/cwl_wes' command: [ 'gunicorn' ] - args: [ '--log-level', 'debug', '-c', 'config.py', 'wsgi:app', '--reload', '--reload-extra-file', '{{ .Values.extra_config.folder }}/{{ .Values.extra_config.file }}' ] + args: [ '--log-level', 'debug', '-c', 'gunicorn.py', 'wsgi:app', '--reload', '--reload-extra-file', '{{ .Values.extra_config.folder }}/{{ .Values.extra_config.file }}' ] env: - name: WES_CONFIG value: {{ .Values.extra_config.folder }}/{{ .Values.extra_config.file }} diff --git a/deployment/values.yaml b/deployment/values.yaml index f6a8e07..b7916a0 100644 --- a/deployment/values.yaml +++ b/deployment/values.yaml @@ -9,7 +9,7 @@ storageAccessMode: ReadWriteOnce # mongodb-pvc.yaml/rabbitmq-pvc.yaml, change t extra_config: folder: /etc/app_config - file: app_config.yaml + file: config.yaml autocert: createJob: "true" # actually create autocert cronjob, for K8S with autocert installed set to "false" @@ -88,8 +88,8 @@ wes: broker_port: 5672 result_backend: 'rpc://' include: - - cwl_wes.tasks.tasks.run_workflow - - cwl_wes.tasks.tasks.cancel_run + - cwl_wes.tasks.run_workflow + - cwl_wes.tasks.cancel_run monitor: timeout: 0.1 message_maxsize: 16777216 @@ -130,7 +130,6 @@ wes: default_value: '5' tags: known_tes_endpoints: 'https://tes.tsi.ebi.ac.uk/|https://tes-dev.tsi.ebi.ac.uk/|https://csc-tesk.c03.k8s-popup.csc.fi/|https://tesk.c01.k8s-popup.csc.fi/' - app_version: 0.15.0 # TES server tes: diff --git a/docker-compose.yaml b/docker-compose.yaml index cdae71b..e52b483 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -2,28 +2,28 @@ version: '3.6' services: wes: - image: elixircloud/cwl-wes:latest + image: docker.io/elixircloud/cwl-wes:latest build: context: . dockerfile: Dockerfile restart: unless-stopped links: - mongodb - command: bash -c "cd /app/cwl_wes; gunicorn -c config.py wsgi:app" + command: bash -c "cd /app/cwl_wes; gunicorn -c gunicorn.py wsgi:app" volumes: - ../data/cwl_wes:/data ports: - "8080:8080" wes-worker: - image: elixircloud/cwl-wes:latest + image: docker.io/elixircloud/cwl-wes:latest restart: unless-stopped depends_on: - wes links: - mongodb - rabbitmq - command: bash -c "cd /app/cwl_wes; celery -A celery_worker worker -E --loglevel=info" + command: bash -c "cd /app/cwl_wes; celery -A worker worker -E --loglevel=info" volumes: - ../data/cwl_wes:/data @@ -37,7 +37,7 @@ services: - "5672:5672" mongodb: - image: mongo:3.2 + image: mongo:3.6 restart: unless-stopped volumes: - ../data/cwl_wes/db:/data/db diff --git a/pylintrc b/pylintrc new file mode 100644 index 0000000..e3c9c44 --- /dev/null +++ b/pylintrc @@ -0,0 +1,4 @@ +[MESSAGES CONTROL] +disable=W0511,W1201,W1202,W1203 +#extension-pkg-white-list= +#ignored-classes= diff --git a/requirements.txt b/requirements.txt index 894004b..48f48e3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,7 @@ --e git+https://github.com/uniqueg/cwl-tes.git@57a193cabab2444bc9b661f83011837bd5ed571a#egg=cwl-tes -cwltool==1.0.20181217162649 -drs-cli==0.2.3 -foca==0.7.0 -gunicorn==19.9.0 -py-tes==0.4.2 -python-dateutil==2.6.1 -ruamel.yaml==0.15.51 \ No newline at end of file +foca~=0.12.0 +#cwl-tes @ git+https://github.com/ohsu-comp-bio/cwl-tes.git@7b44cb1825a302bb7eccb3f2d91dc233adc0e32f#egg=cwl-tes +cwl-tes==0.3.0 +drs-cli~=0.2.3 +gunicorn~=19.9.0 +py-tes~=0.4.2 +importlib-metadata==4.13.0 diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000..58ebd76 --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,5 @@ +black~=22.12.0 +flake8~=5.0.4 +flake8-docstrings~=1.6.0 +mypy~=0.991 +pylint~=2.15.9 diff --git a/setup.py b/setup.py index 058e2df..7860021 100644 --- a/setup.py +++ b/setup.py @@ -1,30 +1,41 @@ -from setuptools import (setup, find_packages) +"""Package setup.""" -with open('README.md', 'r') as fh: - long_description = fh.read() +from pathlib import Path +from setuptools import setup, find_packages + +root_dir = Path(__file__).parent.resolve() + +with open(root_dir / "cwl_wes" / "version.py", encoding="utf-8") as _file: + exec(_file.read()) # pylint: disable=exec-used + +with open(root_dir / "README.md", encoding="utf-8") as _file: + LONG_DESCRIPTION = _file.read() + +with open(root_dir / "requirements.txt", encoding="utf-8") as _file: + INSTALL_REQUIRES = _file.read().splitlines() setup( - name='cwl-wes', - version='0.15.0', - author='Elixir Cloud & AAI', - author_email='alexander.kanitz@alumni.ethz.ch', - description='Flask- and MongoDB-powered GA4GH WES server', - long_description=long_description, + name="cwl-wes", + version=__version__, # noqa: F821 # pylint: disable=undefined-variable + author="Elixir Cloud & AAI", + author_email="alexander.kanitz@alumni.ethz.ch", + description="Flask- and MongoDB-powered GA4GH WES server", + long_description=LONG_DESCRIPTION, long_description_content_type="text/markdown", - license='Apache License 2.0', - url='https://github.com/elixir-cloud-aai/cwl-WES.git', + license="Apache License 2.0", + url="https://github.com/elixir-cloud-aai/cwl-WES.git", packages=find_packages(), keywords=( - 'ga4gh wes workflow elixir rest restful api app server openapi ' - 'swagger mongodb python flask' + "ga4gh wes workflow elixir rest restful api app server openapi " + "swagger mongodb python flask" ), classifiers=[ - 'License :: OSI Approved :: Apache Software License', - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Science/Research', - 'Topic :: Scientific/Engineering :: Bio-Informatics', - 'Natural Language :: English', - 'Programming Language :: Python :: 3.7', + "License :: OSI Approved :: Apache Software License", + "Development Status :: 3 - Alpha", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Natural Language :: English", + "Programming Language :: Python :: 3.7", ], - install_requires=['connexion', 'Flask-Cors', 'Flask-PyMongo'], + install_requires=INSTALL_REQUIRES, ) diff --git a/test-http-call.bash b/test-http-call.bash deleted file mode 100644 index d676c8e..0000000 --- a/test-http-call.bash +++ /dev/null @@ -1,5 +0,0 @@ -CODE=$(curl --write-out '%{http_code}' --output /dev/null --silent localhost:8080/ga4gh/wes/v1/runs) -if [ $CODE != "200" ] -then - exit 1; -fi \ No newline at end of file diff --git a/tests/integration_tests.sh b/tests/integration_tests.sh new file mode 100755 index 0000000..b4f5f0a --- /dev/null +++ b/tests/integration_tests.sh @@ -0,0 +1,182 @@ +#!/usr/bin/env bash + +set -euo pipefail + +WES_ROOT="http://localhost:8080/ga4gh/wes/v1" + +# GET /service-info +ENDPOINT="/service-info" +METHOD="GET" +EXPECTED_CODE="200" +echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " +RESPONSE_CODE=$(curl \ + --silent \ + --write-out "%{http_code}" \ + --output "/dev/null" \ + --request "$METHOD" \ + --header "Accept: application/json" \ + "${WES_ROOT}${ENDPOINT}" \ +) +echo -n "$RESPONSE_CODE | Result: " +test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) + +# GET /runs +ENDPOINT="/runs" +METHOD="GET" +EXPECTED_CODE="200" +echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " +RESPONSE_CODE=$(curl \ + --silent \ + --write-out "%{http_code}" \ + --output "/dev/null" \ + --request "$METHOD" \ + --header "Accept: application/json" \ + "${WES_ROOT}${ENDPOINT}" \ +) +echo -n "$RESPONSE_CODE | Result: " +test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) + +# GET /runs/{run_id} 404 +RUN_ID_INVALID="INVALID_ID" +ENDPOINT="/runs/$RUN_ID_INVALID" +METHOD="GET" +EXPECTED_CODE="404" +echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " +RESPONSE_CODE=$(curl \ + --silent \ + --write-out "%{http_code}" \ + --output "/dev/null" \ + --request "$METHOD" \ + --header "Accept: application/json" \ + "${WES_ROOT}${ENDPOINT}" \ +) +echo -n "$RESPONSE_CODE | Result: " +test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) + +# GET /runs/{run_id}/status 404 +RUN_ID_INVALID="INVALID_ID" +ENDPOINT="/runs/$RUN_ID_INVALID/status" +METHOD="GET" +EXPECTED_CODE="404" +echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " +RESPONSE_CODE=$(curl \ + --silent \ + --write-out "%{http_code}" \ + --output "/dev/null" \ + --request "$METHOD" \ + --header "Accept: application/json" \ + "${WES_ROOT}${ENDPOINT}" \ +) +echo -n "$RESPONSE_CODE | Result: " +test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) + +# POST /runs 200 +ENDPOINT="/runs" +METHOD="POST" +EXPECTED_CODE="200" +echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " +RESPONSE_CODE=$(curl \ + --silent \ + --write-out '%{http_code}' \ + --output /dev/null \ + --request "$METHOD" \ + --header "Accept: application/json" \ + --header "Content-Type: multipart/form-data" \ + --form workflow_params='{"input":{"class":"File","path":"https://raw.githubusercontent.com/uniqueg/cwl-example-workflows/master/hashsplitter-workflow.cwl"}}' \ + --form workflow_type="CWL" \ + --form workflow_type_version="v1.0" \ + --form workflow_url="https://github.com/uniqueg/cwl-example-workflows/blob/master/hashsplitter-workflow.cwl" \ + "${WES_ROOT}${ENDPOINT}" +) +echo -n "$RESPONSE_CODE | Result: " +test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) + +# Fetch latest run identifier +ENDPOINT="/runs" +METHOD="GET" +echo -n "Fetching run identifier | Identifier: " +RUN_ID_COMPLETE=$(curl \ + --silent \ + --request "$METHOD" \ + --header "Accept: application/json" \ + "${WES_ROOT}${ENDPOINT}" \ + | jq .runs[0].run_id \ + | tr -d '"' \ +) +echo -n "$RUN_ID_COMPLETE | Result: " +test $RUN_ID_COMPLETE != "null" && echo "PASSED" || (echo "FAILED" && exit 1) + +# GET /runs/{run_id}/status 200 +ENDPOINT="/runs/$RUN_ID_COMPLETE" +METHOD="GET" +EXPECTED_CODE="200" +echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " +RESPONSE_CODE=$(curl \ + --silent \ + --write-out "%{http_code}" \ + --output "/dev/null" \ + --request "$METHOD" \ + --header "Accept: application/json" \ + "${WES_ROOT}${ENDPOINT}" \ +) +echo -n "$RESPONSE_CODE | Result: " +test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) + +# GET /runs/{run_id}/status 200 +ENDPOINT="/runs/$RUN_ID_COMPLETE/status" +METHOD="GET" +EXPECTED_CODE="200" +echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " +RESPONSE_CODE=$(curl \ + --silent \ + --write-out "%{http_code}" \ + --output "/dev/null" \ + --request "$METHOD" \ + --header "Accept: application/json" \ + "${WES_ROOT}${ENDPOINT}" \ +) +echo -n "$RESPONSE_CODE | Result: " +test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) + +# POST /runs 200 +ENDPOINT="/runs" +METHOD="POST" +EXPECTED_CODE="200" +echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " +RESPONSE_CODE=$(curl \ + --silent \ + --write-out '%{http_code}' \ + --output /dev/null \ + --request "$METHOD" \ + --header "Accept: application/json" \ + --header "Content-Type: multipart/form-data" \ + --form workflow_params='{"input":{"class":"File","path":"https://raw.githubusercontent.com/uniqueg/cwl-example-workflows/master/hashsplitter-workflow.cwl"}}' \ + --form workflow_type="CWL" \ + --form workflow_type_version="v1.0" \ + --form workflow_url="https://github.com/uniqueg/cwl-example-workflows/blob/master/hashsplitter-workflow.cwl" \ + "${WES_ROOT}${ENDPOINT}" +) +echo -n "$RESPONSE_CODE | Result: " +test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) + +# Fetch latest run identifier +ENDPOINT="/runs" +METHOD="GET" +echo -n "Fetching run identifier | Identifier: " +RUN_ID_CANCEL=$(curl \ + --silent \ + --request "$METHOD" \ + --header "Accept: application/json" \ + "${WES_ROOT}${ENDPOINT}" \ + | jq .runs[0].run_id \ + | tr -d '"' \ +) +echo -n "$RUN_ID_CANCEL | Result: " +test $RUN_ID_CANCEL != "null" && echo "PASSED" || (echo "FAILED" && exit 1) + +# TODO +# CANCEL /runs/{run_id} 200 +# Check that status changed to CANCELING +# Sleep 3-5 min +# Check that run with $RUN_ID_COMPLETE has status COMPLETE +# Check that run with $RUN_ID_CANCEL has status CANCELED diff --git a/tests/run_tests.sh b/tests/run_tests.sh deleted file mode 100755 index f2b90af..0000000 --- a/tests/run_tests.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash - -cwl-tes --tes https://tes-dev.tsi.ebi.ac.uk/ cwl/tools/echo.cwl cwl/tools/echo-job.json - -cwl-tes --tes https://tes-dev.tsi.ebi.ac.uk/ cwl/tools/sleep.cwl cwl/tools/sleep-job.json - -## Post tests - -# Post: sleep command -#curl -X POST --header 'Content-Type: multipart/form-data' --header 'Accept: application/json' -F workflow_params=tests%2Fcwl%2Ftools%2Fsleep-job.yml -F workflow_type=cwl -F workflow_type_version=v1.0 -F tags=empty -F workflow_engine_parameters=empty -F workflow_url=tests%2Fcwl%2Ftools%2Fsleep.cwl -F workflow_attachment=empty 'http://localhost:7777/ga4gh/wes/v1/runs' - -# Post: echo command -#curl -X POST --header 'Content-Type: multipart/form-data' --header 'Accept: application/json' -F workflow_params=tests%2Fcwl%2Ftools%2Fecho-job.yml -F workflow_type=cwl -F workflow_type_version=v1.0 -F tags=empty -F workflow_engine_parameters=empty -F workflow_url=tests%2Fcwl%2Ftools%2Fecho.cwl -F workflow_attachment=empty 'http://localhost:7777/ga4gh/wes/v1/runs'