From 1dce160669f8008e060a9a96f08588b5fb8bf03f Mon Sep 17 00:00:00 2001 From: Joe Taylor Date: Mon, 12 Jun 2017 13:08:02 +0200 Subject: [PATCH 01/18] moved user credentials for pypi to environment variables --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 418b40860..20b2cbc9c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,9 +36,9 @@ script: # Deploy to pypi automatically from tagged releases on the stable branch deploy: provider: pypi - user: nikkiray + user: $PYPI_USER password: - secure: YN2r3txhbQI+hZUhUrKy4/dANHKTOz+fzLdHTwhjtPEjTUZvcmxhx582qc4rmPO1qvKH+bzjq2YhhO0J+uN7PmYAvMGPDu1Cjn46GiDogfq3C2+vkM+iovXmXXW+/pd5GRSD2I0+P7s3z1BG2iMwHXrynlxCa9mDApN4kJvEs98Z8SlpUpsOSvv/BhMTMaS1BXUR14ZDedvwK7YJmUbCfdnHufT1T8egRqxbwVyFXQujLpXCv1XDo0mNYjYMjh6DKkn/loT9ZAFSpNYFPdf/ljZIaWbNEqbJ//xXqStW4ix8dVgItN2sNJXPoEAKKptofqzGmmevph0FwBO0aeNmy+nV0tZHmzGk24ofJhjdYuwJTeKSYJBrK0Hye7sQV19G7rba9ZdMp8fO/pLEW6d6g20tABrLxJDtPM+dCL8Tqhy+G0XTY5lKC3x9o+RldGrJCdecL1g4G05DCNeA4YeEdn3/dKt9JjlSXIxwWAFGXhQQtpY3GBKknDIW5gvdxcIk/ktLg80M7IZ5vd/6urN63jGmffawiMJ2Fv+Gx4c1Twm9CA0H8yKH2fV5mepFplpYUPkFjCNP8P5P6VyePSVFa2Re4+UXgzncoupDhG/FDW+skvqRk3S+ga68cNSzDKi2WcdTpRLhS7bvb8yHzshZ2JBMko06mJtpoKfZ8tI8iZg= + secure: $PYPI_PASS_SECURE on: tags: true distributions: sdist bdist_wheel From df0c150bf88b028a8878479ddc33a576367a5d96 Mon Sep 17 00:00:00 2001 From: Joe Taylor Date: Mon, 12 Jun 2017 13:16:17 +0200 Subject: [PATCH 02/18] added requirements.txt to pull requirements from config.py --- requirements.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..cd8c47d3d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +--index-url https://pypi.python.org/simple/ + +-e . From b6ce7cc653c33d89b4e10cd3fdfa7ae8cd4a7837 Mon Sep 17 00:00:00 2001 From: Joe Taylor Date: Mon, 12 Jun 2017 19:40:12 +0200 Subject: [PATCH 03/18] convered to LF otherwise tests will fail to see YAML header --- knowledge_repo/templates/knowledge_template.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/knowledge_repo/templates/knowledge_template.md b/knowledge_repo/templates/knowledge_template.md index 5ee4132e2..a306b7cba 100644 --- a/knowledge_repo/templates/knowledge_template.md +++ b/knowledge_repo/templates/knowledge_template.md @@ -78,12 +78,12 @@ the growing library of work. ```python import numpy as np - + x = np.linspace(0, 3*np.pi, 500) plot_data = dict() plot_data["x"] = x plot_data["y"] = np.sin(x**2) - + from ggplot import * ggplot(aes(x='date', y='beef'), data=meat) + \ geom_point(color='lightblue') + \ @@ -131,6 +131,3 @@ one person, and whether someone else's work can be linked. Put all the stuff here that is not necessary for supporting the points above. Good place for documentation without distraction. - - - From c79f961002cba43a13f8891864d2a60e5a767784 Mon Sep 17 00:00:00 2001 From: Joe Taylor Date: Mon, 12 Jun 2017 19:45:54 +0200 Subject: [PATCH 04/18] added scripts for docker and docker-compose --- Dockerfile | 41 +++++++++++++++++++++++++++++++++++++++++ docker-compose.yml | 10 ++++++++++ 2 files changed, 51 insertions(+) create mode 100644 Dockerfile create mode 100644 docker-compose.yml diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..4a6f29b73 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,41 @@ +# Using officially supported Ubuntu version for Travis CI +FROM ubuntu:14.04 + +ARG TRAVIS_PYTHON_VERSION + +# Install required Ubuntu packages +RUN apt-get update +RUN apt-get install -y wget +RUN apt-get install -y git + +# Download appropriate version of Miniconda +RUN if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh; \ +else wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; fi + +# Install Miniconda +RUN bash miniconda.sh -b -p /miniconda +ENV PATH=/miniconda/bin:${PATH} +RUN hash -r + +# Set up conda package installer +RUN conda config --set always_yes yes --set changeps1 no +RUN conda update -q conda + +# Useful for debugging any issues with conda +RUN conda info -a + +# Install R +RUN conda install -c r r +RUN conda install -c r r-knitr + +# Set the application directory +WORKDIR /app + +# Copy our code from the current folder to /app inside the container +ADD . /app + +# Install python dependencies +RUN pip install -r requirements.txt + +# Ready dependencies to use IpynbFormat instances +RUN pip install --ignore-installed --upgrade nbformat nbconvert[execute] traitlets diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..8a8de4080 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,10 @@ +version: '2' +services: + web: + build: + context: . + args: + TRAVIS_PYTHON_VERSION: "${TRAVIS_PYTHON_VERSION}" + dockerfile: Dockerfile + ports: + - "80:80" From 450a622f6ef3744fe26d69f7501dfc827bbfbd33 Mon Sep 17 00:00:00 2001 From: Joe Taylor Date: Mon, 12 Jun 2017 19:46:31 +0200 Subject: [PATCH 05/18] converted Travis build-test config to use new Dockerfile --- .travis.yml | 85 ++++++++++++++++++++++++----------------------------- 1 file changed, 39 insertions(+), 46 deletions(-) diff --git a/.travis.yml b/.travis.yml index 20b2cbc9c..052c11312 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,46 +1,39 @@ -language: python -python: - - "2.7" - - "3.4" - -before_install: - # We do this conditionally because it saves us some downloading if the - # version is the same. - - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then - wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh; - else - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; - fi - - bash miniconda.sh -b -p $HOME/miniconda - - export PATH="$HOME/miniconda/bin:$PATH" - - hash -r - - conda config --set always_yes yes --set changeps1 no - - conda update -q conda - # Useful for debugging any issues with conda - - conda info -a - # Replace dep1 dep2 ... with your dependencies - - conda install -c r r - - conda install -c r r-knitr -install: - - pip install autopep8 pep8 - - pip install .[all] - -# Cache isn't working yet because problems with expiration header. Disable for now because it's breaking builds. -# cache: pip - -before_script: - - "pep8 --exclude knowledge_repo/app/migrations,build,deploy,kube --ignore=E501 ." -script: - - bash run_tests.sh - -# Deploy to pypi automatically from tagged releases on the stable branch -deploy: - provider: pypi - user: $PYPI_USER - password: - secure: $PYPI_PASS_SECURE - on: - tags: true - distributions: sdist bdist_wheel - repo: airbnb/knowledge-repo - condition: $TRAVIS_PYTHON_VERSION = "3.4" +language: python +python: + - "2.7" + - "3.4" + +before_install: + # build docker image - this image contains none of the testing dependencies so is releasable if the tests pass + - docker-compose build +install: + # create a new container for testing with bash running in the background + - docker run -d -t --name knowledgerepo_test knowledgerepo_web bash + # install python testing dependencies + - docker exec knowledgerepo_test pip install autopep8 pep8 nose coverage bs4 + +# Cache isn't working yet because problems with expiration header. Disable for now because it's breaking builds. +# cache: pip + +before_script: + # make sure test .py files are nonexecutable so nose will find them + - docker exec knowledgerepo_test bash -c 'chmod a-x /app/tests/*.py' + - "docker exec knowledgerepo_test pep8 --exclude knowledge_repo/app/migrations,build,deploy,kube --ignore=E501 ." +script: + # run testing suite on the test container + - docker exec knowledgerepo_test bash run_tests.sh + # kill and remove the test container + - docker kill knowledgerepo_test + - docker rm knowledgerepo_test + +# Deploy to pypi automatically from tagged releases on the stable branch +deploy: + provider: pypi + user: $PYPI_USER + password: + secure: $PYPI_PASS_SECURE + on: + tags: true + distributions: sdist bdist_wheel + repo: airbnb/knowledge-repo + condition: $TRAVIS_PYTHON_VERSION = "3.4" From c21c1bf6eb9ca5656b19648b142dfd320bc3695b Mon Sep 17 00:00:00 2001 From: Joe Taylor Date: Mon, 12 Jun 2017 19:49:21 +0200 Subject: [PATCH 06/18] added docker service to Travis config --- .travis.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.travis.yml b/.travis.yml index 052c11312..2aa010a55 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,13 @@ +sudo: required + language: python python: - "2.7" - "3.4" +services: + - docker + before_install: # build docker image - this image contains none of the testing dependencies so is releasable if the tests pass - docker-compose build From 2511e8b53f69e42343aabdf65f660a0e5d8e88e4 Mon Sep 17 00:00:00 2001 From: Joe Taylor Date: Tue, 13 Jun 2017 10:17:44 +0200 Subject: [PATCH 07/18] added docker image deploy to Travis config --- .travis.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.travis.yml b/.travis.yml index 2aa010a55..d52d38917 100644 --- a/.travis.yml +++ b/.travis.yml @@ -31,6 +31,10 @@ script: - docker kill knowledgerepo_test - docker rm knowledgerepo_test +after_success: + - docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD" + - docker push ${DOCKER_REPO}:${TRAVIS_BRANCH} + # Deploy to pypi automatically from tagged releases on the stable branch deploy: provider: pypi From aea8095915df72c03d6a3738f1b8edaa52f5614c Mon Sep 17 00:00:00 2001 From: Joe Taylor Date: Tue, 13 Jun 2017 10:38:42 +0200 Subject: [PATCH 08/18] Made pypi repo configurable --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index d52d38917..3efc50c57 100644 --- a/.travis.yml +++ b/.travis.yml @@ -44,5 +44,5 @@ deploy: on: tags: true distributions: sdist bdist_wheel - repo: airbnb/knowledge-repo + repo: $PYPI_REPO condition: $TRAVIS_PYTHON_VERSION = "3.4" From d318bd395618a586e013e19fa44b06d6db33cf8a Mon Sep 17 00:00:00 2001 From: Joe Taylor Date: Tue, 13 Jun 2017 14:59:38 +0200 Subject: [PATCH 09/18] Added docker container for running system tests and created test stub in ./system_tests/run_tests.sh --- .travis.yml | 24 ++++++++-------- Dockerfile | 19 +++++++++++-- Dockerfile.systest | 12 ++++++++ docker-compose.yml | 10 ++++++- requirements.txt | 59 +++++++++++++++++++++++++++++++++++++-- system_tests/run_tests.sh | 12 ++++++++ 6 files changed, 119 insertions(+), 17 deletions(-) create mode 100644 Dockerfile.systest create mode 100644 system_tests/run_tests.sh diff --git a/.travis.yml b/.travis.yml index 3efc50c57..6ce97c8c5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,28 +12,30 @@ before_install: # build docker image - this image contains none of the testing dependencies so is releasable if the tests pass - docker-compose build install: - # create a new container for testing with bash running in the background - - docker run -d -t --name knowledgerepo_test knowledgerepo_web bash + # create web app and system-under-test containers + - docker-compose up -d # install python testing dependencies - - docker exec knowledgerepo_test pip install autopep8 pep8 nose coverage bs4 + - docker exec knowledgerepo_web_1 pip install autopep8 pep8 nose coverage bs4 # Cache isn't working yet because problems with expiration header. Disable for now because it's breaking builds. # cache: pip before_script: # make sure test .py files are nonexecutable so nose will find them - - docker exec knowledgerepo_test bash -c 'chmod a-x /app/tests/*.py' - - "docker exec knowledgerepo_test pep8 --exclude knowledge_repo/app/migrations,build,deploy,kube --ignore=E501 ." + - docker exec knowledgerepo_web_1 bash -c 'chmod a-x /app/tests/*.py' + - "docker exec knowledgerepo_web_1 pep8 --exclude knowledge_repo/app/migrations,build,deploy,kube --ignore=E501 ." script: - # run testing suite on the test container - - docker exec knowledgerepo_test bash run_tests.sh - # kill and remove the test container - - docker kill knowledgerepo_test - - docker rm knowledgerepo_test + # run unit tests + - docker exec knowledgerepo_web_1 bash run_tests.sh + # run system tests + - docker exec knowledgerepo_sut_1 bash run_tests.sh + # kill and remove the containers + - docker-compose down after_success: - docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD" - - docker push ${DOCKER_REPO}:${TRAVIS_BRANCH} + - docker tag knowledgerepo_web ${DOCKER_REPO}-python{$TRAVIS_PYTHON_VERSION}:${TRAVIS_BRANCH} + - docker push ${DOCKER_REPO}-python{$TRAVIS_PYTHON_VERSION}:${TRAVIS_BRANCH} # Deploy to pypi automatically from tagged releases on the stable branch deploy: diff --git a/Dockerfile b/Dockerfile index 4a6f29b73..ca69c5f06 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,6 +2,7 @@ FROM ubuntu:14.04 ARG TRAVIS_PYTHON_VERSION +ARG PORT # Install required Ubuntu packages RUN apt-get update @@ -31,11 +32,25 @@ RUN conda install -c r r-knitr # Set the application directory WORKDIR /app +# Install python dependencies - do this before adding rest of code to allow docker to cache this step +ADD ./requirements.txt /app/requirements.txt +RUN pip install -r requirements.txt + # Copy our code from the current folder to /app inside the container ADD . /app -# Install python dependencies -RUN pip install -r requirements.txt +# Run project installation scripts +RUN python setup.py develop # Ready dependencies to use IpynbFormat instances RUN pip install --ignore-installed --upgrade nbformat nbconvert[execute] traitlets + +# Set up to use a new empty repo until configured otherwise +RUN ./scripts/knowledge_repo --repo ./default_repo init +ENV KNOWLEDGE_REPO=./default_repo + +EXPOSE ${PORT} +ENV PORT=${PORT} + +# Deploy via gunicorn as standard startup command +CMD ["bash", "-c", "./scripts/knowledge_repo deploy --port ${PORT}"] diff --git a/Dockerfile.systest b/Dockerfile.systest new file mode 100644 index 000000000..b2e6ca11c --- /dev/null +++ b/Dockerfile.systest @@ -0,0 +1,12 @@ + +# this Dockerfile builds a container from which system tests can be run + +FROM ubuntu:14.04 + +RUN apt-get update && apt-get install -yq curl && apt-get clean + +WORKDIR /app + +ADD ./system_tests/run_tests.sh /app/run_tests.sh + +CMD ["bash"] diff --git a/docker-compose.yml b/docker-compose.yml index 8a8de4080..4962ff60d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,6 +5,14 @@ services: context: . args: TRAVIS_PYTHON_VERSION: "${TRAVIS_PYTHON_VERSION}" + PORT: "${PORT}" dockerfile: Dockerfile ports: - - "80:80" + - "${PORT}:${PORT}" + sut: + build: + context: . + dockerfile: Dockerfile.systest + tty: true + links: + - web diff --git a/requirements.txt b/requirements.txt index cd8c47d3d..bb881eaf1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,56 @@ ---index-url https://pypi.python.org/simple/ - --e . + --index-url https://pypi.python.org/simple/ + alembic==0.9.2 + asn1crypto==0.22.0 + bleach==2.0.0 + blinker==1.4 + cffi==1.10.0 + click==6.7 + conda==4.3.21 + cryptography==1.8.1 + decorator==4.0.11 + entrypoints==0.2.3 + enum34==1.1.6 + Flask==0.12.2 + Flask-Mail==0.9.1 + Flask-Migrate==2.0.4 + Flask-Script==2.0.5 + Flask-SQLAlchemy==2.2 + future==0.16.0 + gitdb2==2.0.2 + GitPython==2.1.5 + gunicorn==19.7.1 + html5lib==0.999999999 + idna==2.5 + inflection==0.3.1 + ipython-genutils==0.2.0 + itsdangerous==0.24 + Jinja2==2.9.6 + jsonschema==2.6.0 + jupyter-client==5.0.1 + jupyter-core==4.3.0 + Mako==1.0.6 + Markdown==2.6.8 + MarkupSafe==1.0 + mistune==0.7.4 + nbconvert==5.2.1 + nbformat==4.3.0 + packaging==16.8 + pandocfilters==1.4.1 + pycosat==0.6.2 + pycparser==2.17 + Pygments==2.2.0 + pyOpenSSL==17.0.0 + pyparsing==2.1.4 + python-dateutil==2.6.0 + python-editor==1.0.3 + PyYAML==3.12 + pyzmq==16.0.2 + requests==2.14.2 + six==1.10.0 + smmap2==2.0.3 + SQLAlchemy==1.1.10 + tabulate==0.7.7 + testpath==0.3.1 + traitlets==4.3.2 + webencodings==0.5.1 + Werkzeug==0.12.2 diff --git a/system_tests/run_tests.sh b/system_tests/run_tests.sh new file mode 100644 index 000000000..526acf390 --- /dev/null +++ b/system_tests/run_tests.sh @@ -0,0 +1,12 @@ + +# This is a stub to be replaced with more sophisticated system tests + +sleep 5 +# check that GET /feed serves something +if curl web/feed | grep -q 'Served with by Knowledge Repo'; then + echo "Tests passed!" + exit 0 +else + echo "Tests failed!" + exit 1 +fi From 3d4c421262e71e9f21c9c6958179d78194f474e6 Mon Sep 17 00:00:00 2001 From: Joe Taylor Date: Tue, 13 Jun 2017 15:08:18 +0200 Subject: [PATCH 10/18] Added default value for PORT environment variable in Travis config --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 6ce97c8c5..98dab90b5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,9 @@ python: services: - docker +env: + - PORT=80 + before_install: # build docker image - this image contains none of the testing dependencies so is releasable if the tests pass - docker-compose build From 27e962fe2173c39ab0c105a39484c0dd7e2a07ea Mon Sep 17 00:00:00 2001 From: Joe Taylor Date: Tue, 13 Jun 2017 15:16:39 +0200 Subject: [PATCH 11/18] correct typo in docker deploy --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 98dab90b5..4b2a313ec 100644 --- a/.travis.yml +++ b/.travis.yml @@ -37,8 +37,8 @@ script: after_success: - docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD" - - docker tag knowledgerepo_web ${DOCKER_REPO}-python{$TRAVIS_PYTHON_VERSION}:${TRAVIS_BRANCH} - - docker push ${DOCKER_REPO}-python{$TRAVIS_PYTHON_VERSION}:${TRAVIS_BRANCH} + - docker tag knowledgerepo_web ${DOCKER_REPO}-python${TRAVIS_PYTHON_VERSION}:${TRAVIS_BRANCH} + - docker push ${DOCKER_REPO}-python${TRAVIS_PYTHON_VERSION}:${TRAVIS_BRANCH} # Deploy to pypi automatically from tagged releases on the stable branch deploy: From 0970eb60c30979b6c5146b128d09a62bcce6153a Mon Sep 17 00:00:00 2001 From: Joe Taylor Date: Tue, 13 Jun 2017 15:40:53 +0200 Subject: [PATCH 12/18] omit docker image tags for successful builds on the master branch, pushing to :latest instead --- .travis.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4b2a313ec..fdb6a4ed8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,9 +36,15 @@ script: - docker-compose down after_success: + # deploy successfully built images to docker hub - docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD" - - docker tag knowledgerepo_web ${DOCKER_REPO}-python${TRAVIS_PYTHON_VERSION}:${TRAVIS_BRANCH} - - docker push ${DOCKER_REPO}-python${TRAVIS_PYTHON_VERSION}:${TRAVIS_BRANCH} + # omit docker image tags for successful builds on the master branch, pushing to :latest instead + - if [ "$TRAVIS_BRANCH" == "master" ]; + then export DOCKER_REPO_FULLNAME=${DOCKER_REPO}_python${TRAVIS_PYTHON_VERSION}; + else export DOCKER_REPO_FULLNAME=${DOCKER_REPO}_python${TRAVIS_PYTHON_VERSION}:${TRAVIS_BRANCH}; + fi + - docker tag knowledgerepo_web $DOCKER_REPO_FULLNAME + - docker push $DOCKER_REPO_FULLNAME # Deploy to pypi automatically from tagged releases on the stable branch deploy: From 14463114936b5b199ad0346a7b2271c423e27878 Mon Sep 17 00:00:00 2001 From: Joe Taylor Date: Tue, 13 Jun 2017 15:50:07 +0200 Subject: [PATCH 13/18] omit docker image tags for successful builds on the master branch, pushing to :latest instead --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index fdb6a4ed8..0f1112acf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,8 +40,8 @@ after_success: - docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD" # omit docker image tags for successful builds on the master branch, pushing to :latest instead - if [ "$TRAVIS_BRANCH" == "master" ]; - then export DOCKER_REPO_FULLNAME=${DOCKER_REPO}_python${TRAVIS_PYTHON_VERSION}; - else export DOCKER_REPO_FULLNAME=${DOCKER_REPO}_python${TRAVIS_PYTHON_VERSION}:${TRAVIS_BRANCH}; + then export DOCKER_REPO_FULLNAME=${DOCKER_REPO}-python${TRAVIS_PYTHON_VERSION}; + else export DOCKER_REPO_FULLNAME=${DOCKER_REPO}-python${TRAVIS_PYTHON_VERSION}:${TRAVIS_BRANCH}; fi - docker tag knowledgerepo_web $DOCKER_REPO_FULLNAME - docker push $DOCKER_REPO_FULLNAME From 1d86f525a6f752b4c55247610cda054997a6981f Mon Sep 17 00:00:00 2001 From: Joe Taylor Date: Tue, 13 Jun 2017 16:16:32 +0200 Subject: [PATCH 14/18] Update Quickstart instructions to include Docker --- README.md | 808 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 413 insertions(+), 395 deletions(-) diff --git a/README.md b/README.md index 3d995fa0d..0a68d6c9f 100644 --- a/README.md +++ b/README.md @@ -1,395 +1,413 @@ -# The Knowledge Repository (BETA) -[![Build Status](https://travis-ci.org/airbnb/knowledge-repo.svg?branch=master)](https://travis-ci.org/airbnb/knowledge-repo) -[![Windows Build Status](https://ci.appveyor.com/api/projects/status/t88a27n099oqnbsw/branch/master?svg=true&pendingText=Windows%20build%20pending...&passingText=Windows%20build%20passing&failingText=Windows%20build%20failing)](https://ci.appveyor.com/project/matthewwardrop/knowledge-repo) -[![PyPI version](https://badge.fury.io/py/knowledge-repo.svg)](https://badge.fury.io/py/knowledge-repo) -[![Python](https://img.shields.io/pypi/pyversions/knowledge-repo.svg?maxAge=2592000)](https://pypi.python.org/pypi/knowledge-repo) - -The Knowledge Repository project is focused on facilitating the sharing of knowledge between data scientists and other technical roles using data formats and tools that make sense in these professions. It provides various data stores (and utilities to manage them) for "knowledge posts", with a particular focus on notebooks (R Markdown and Jupyter / iPython Notebook) to better promote reproducible research. - -Check out this [Medium Post](https://medium.com/airbnb-engineering/scaling-knowledge-at-airbnb-875d73eff091) for the inspiration for the project. - -**Note:** The Knowledge Repository is a work in progress. There are lots of code cleanups and feature extensions TBD. Your assistance and involvement is more than encouraged. - -![](https://cloud.githubusercontent.com/assets/20175104/18972198/116861be-864d-11e6-9850-5a6cdad7ce54.png) - -![](https://cloud.githubusercontent.com/assets/20175104/18972218/264f4c00-864d-11e6-8153-3e9833563784.png) - -## Quickstart - -1\. Install the knowledge-repo tooling -``` -pip install --upgrade knowledge-repo -``` - -To install dependencies for iPython notebook, PDF uploading, and local development, use `pip install --upgrade knowledge-repo[all]` - -2\. Initialize a knowledge repository - your posts will get added here -``` -knowledge_repo --repo ./example_repo init -``` -3\. Create a post template - -for Rmd: -``` -knowledge_repo --repo ./example_repo create Rmd example_post.Rmd -``` - -for ipynb -``` -knowledge_repo --repo ./example_repo create ipynb example_post.ipynb -``` -4\. Edit the notebook file `example_post.ipynb` or `example_post.Rmd` as you normally would. - - -5\. Add your post to the repo with path `project/example` -``` -knowledge_repo --repo ./example_repo add example_post.Rmd -p project/example_rmd -knowledge_repo --repo ./example_repo add example_post.ipynb -p project/example_ipynb -``` -6\. Preview the added post -``` -knowledge_repo --repo ./example_repo preview project/example_rmd -#or -knowledge_repo --repo ./example_repo preview project/example_ipynb -``` - -### Feedback for Beta - -The Knowledge Repo is currently in a public beta, and we are rolling it out to more people to get feedback. In particular, we'd love to hear about the following: - - - How easy is it to set up the git knowledge post repository? - - How easy is it to set up the web application, and make it live internally within your organization? - - Where are the gaps in our documentation that we should fill in to assist others in understanding the system? - - At a higher level, are there any blockers or barriers to setting up the Knowledge Repo in your organization? - -### Known Issues - -Here's a running list of known issues we are working on: - - - The in-app webeditor needs refactoring to: - - Rely completely on KnowledgePost objects instead of interacting with db records - - Trigger "save" actions when necessary - - Allow for image uploading - - The Python configuration for git knowledge repositories currently reads directly out of the `master` branch, allowing (depending on your organization's git policy) a malicious user to commit arbitrary code into the master branch, which then gets run on client and server machines during interactions with the git repository using the inbuilt knowledge repository abstractions. - -## Introduction - -Knowledge posts are a general markdown format that is automatically generated from the following common formats: - - - Jupyter/Ipython notebooks - - Rmd notebooks - - Markdown files - -The Jupyter, Rmd, and Markdown files are required to have a specific set of yaml style headers which are used to organize and discover research: - -``` ---- -title: I Found that Lemurs Do Funny Dances -authors: -- sally_smarts -- wesley_wisdom -tags: -- knowledge -- example -created_at: 2016-06-29 -updated_at: 2016-06-30 -tldr: This is short description of the content and findings of the post. ---- -``` - -*See a full description of headers [further below](https://github.com/airbnb/knowledge-repo#post-headers)* - -Users add these notebooks/files to the knowledge repository through the `knowledge_repo` tool, as described below; which allows them to be rendered and curated in the knowledge repository's web app. - -If your favourite format is missing, we welcome contributions; and are happy to work with you to get it supported. See the "Contributing" section below to see how to add support for more formats. - -Note that the web application can live on top of multiple Knowledge Repo backends. Supported types so far are: - - - Git Repo + Remote Git Hosting Service (Primary Use Case) - - Web Application SQL db - -## Getting started -There are two repositories associated with the Knowledge Repository project. -1. This repository, which will be installed first. This is referred to as the knowledge repository tooling. -2. A knowledge data repository, which is created second. This is where the knowledge posts are stored. - -### Installation -To install the knowledge repository tooling (and all its dependencies), simply run: - -`pip install --upgrade "knowledge-repo[all]"` - -You can also skip installing dependencies which are only required in special cases by replacing `all` with one or more of the following (separated by commas): -- `ipynb` : Installs the dependencies required for adding/converting Jupyter notebook files -- `pdf` : Installs the dependencies required for uploading PDFs using the web editor -- `dev`: Installs the dependencies required for doing development, including running the tests - -The `knowledge_repo` script is the one that is used for all of the following actions. It requires the `--repo` flag to be passed to it, with the location of the knowledge data repository. - -You can drop the `--repo` option by setting the `$KNOWLEDGE_REPO` environment variable with the location of the knowledge data repo in your bash/zsh/shell configuration. In bash, this would be done as such: -``` -export KNOWLEDGE_REPO=repo_path -``` - -### Setup of the knowledge data repositories -There are two different ways to do this, depending on whether your organization already has a knowledge data repository or not: - -#### Your organization already has a knowledge data repository setup -If your organization already has a knowledge data repository setup, check it out onto your computer as you normally would; for example: - -`git clone git@example.com:example_data_repo.git` - -Running this same script if a repo already exists at `` will allow you to update it to be a knowledge data repository. This is useful if you are starting a repository on a remote service like GitHub, as this allows you to clone the remote repository as per normal; run this script; and then push the initialization back into the remote service using `git push`. - -#### Your organization does not have knowledge data repository setup -The following command will create a new repository at `` -``` -knowledge_repo --repo init -``` - -If you are hosting this repository on a remote service like Github, and you've created the knowledge data repository using the `init` flag, you must push that to that remote service in order for the later commands to work. On Git, this can be done by creating the remote repository through Git and then running - -``` -git remote add origin url_of_the_repository -git push -u origin master -``` - -For more details about the structure of a knowledge repository, see the technical details section below. - -### Configuration - -There are two types of configuration files, one for knowledge-data git repos that holds posts, and another for the web application. - -#### Knowledge Data Git Repo Configuration - -When running `knowledge_repo init` to make a folder a knowledge-data git repo, a `.knowledge_repo_config` file will be created in the folder. The file will be a copy of the default repo configuration file located [here](https://github.com/airbnb/knowledge-repo/blob/master/knowledge_repo/config_defaults.py). - -This configuration file will allow you to: - - - Add postprocessors to post contributions from the repo. (see the `postprocessors` array of functions) - - Add rules for which subdirectories posts can be added to. (see the `path_parse()` function) - - Check and manage the format of author names at contribution time - - Add logic to `username_parse()` to check post author names and raise exceptions when they don't match - - Add logic to `username_to_name()` to manage how user/author names are displayed, ex. "sally_smarts" --> "Sally Smarts" - - Add logic to `username_to_email()` to manage how user/author names are matched to emails, ex. "sally_smarts" --> "sally.smarts@mycompany.com" - -See the file itself for more detail. - -#### Knowledge Web Application Configuration - -Specify a configuration file when running the web application by adding the flag `--config path/to/config_file.py`. An example configuration file is provided [here](https://github.com/airbnb/knowledge-repo/blob/master/resources/server_config.py). - -This configuration file lets you specify details specific to the web server. For instance, one can specify the database connection string or the request header that contains usernames. See the file itself for more detail. - -## Writing Knowledge Posts - -### TLDR Guide For Contributing - -If you have already set up your system as described below, here is a snapshot of the commands you need to run to upload your knowledge post stored in ~/Documents/my_post.Rmd. For Jupyter / iPython Notebooks, the commands are the same, replacing all instances of `Rmd` with `ipynb`. It assumes you have configured the KNOWLEDGE_REPO environment variable to point to your local copy of the knowledge repository. The code is written for producing and contributing an ipynb file to make the examples clear, R Markdown files are run by using `Rmd` in place of `ipynb` in each command. - -1. `knowledge_repo create Rmd ~/Documents/my_post.Rmd`, which creates a template with required yaml headers. Templates can also be downloaded by clicking "Write a Post!" the web application. *Make sure your post has these headers with correct values for your post* -2. Do your work in the generated my_post.Rmd file. *Make sure the post runs through from start to finish before attempting to add to the Knowledge Repo!* -3. `knowledge_repo add ~/Documents/my_post.Rmd [-p projects/test_project] [--update]` -4. `knowledge_repo preview projects/test_project` -5. `knowledge_repo submit projects/test_project` -6. From your remote git hosting service, request a review for merging the post. (ie. open a pull request on Github) -7. After it has been reviewed, merge it in to the master branch. - -### Full Guide for Contributing: - -#### Creating knowledge -Once the knowledge data repository has been initialized, it is possible to start adding posts. Each post in the knowledge repository requires a specific header format, used for metadata formatting. -To create a new post using a provided template, which has both the header information and example content, run the following command: -``` -knowledge_repo --repo create {ipynb, Rmd, md} filename -``` - -The first argument indicates the type of the file that you want created, while the second argument indicates where the file should be created. - -If the knowledge data repository is created at `knowledge_data_repo`, running -``` -knowledge_repo --repo knowledge_data_repo create md ~/Documents/my_first_knowledge_post.md -``` -will create a file, `~/Documents/my_first_knowledge_post.md`, the contents of which will be the boilerplate template of the knowledge post. - -The help menu for this command (and all following commands) can be reached by adding the `-h` flag, `knowledge_repo --repo create -h`. - -Alternatively, by going to the `/create` route in the webapp, you can click the button for whichever template you would like to have, -and that will download the correct template. - -#### Adding knowledge -Once you've finished writing a post, the next step is to add it to the knowledge data repository. -To do this, run the following command: -``` -knowledge_repo --repo add [-p ] -``` - -Using the example from above, if we wanted to add the post `~/Documents/my_first_knowledge_post.md` to `knowledge_data_repo`, -we would run: -``` -knowledge_repo --repo knowledge_data_repo add ~/Documents/my_first_knowledge_post.md -p projects/test_knowledge -``` - -The `-p` flag specifies the location of the post in the knowledge data repository - in this case, `knowledge_data_repo/projects/test_knowledge`. -The `-p` flag does not need to be specified if `path` is included in the header of the knowledge post. - -#### Updating knowledge -To update an existing knowledge post, pass the `--update` flag to the `add` command. This will allow the add operation to override exiting knowledge posts. -``` -knowledge_repo --repo add --update -``` - -#### Previewing Knowledge -If you would like to see how the post would render on the web app before submitting the post for review, run the following command: -``` -knowledge_repo --repo preview -``` - -In the case from above, we would run: -``` -knowledge_repo --repo knowledge_data_repo preview projects/test_knowledge -``` - -There are other arguments that can be passed to this command, adding the `-h` flag shows them all along with further information about them. - -#### Submitting knowledge -After running the add command, two things should have happened: -1. A new folder should have been created at the path specified in the add command, which ends in `.kp`. This is added automatically to indicate that the folder is a knowledge post. -2. This folder will have been committed to the repository on the branch named `/path_in_add_command` - -Running the example command: `knowledge_repo --repo knowledge_data_repo add ~/Documents/my_first_knowledge_post.md -p projects/test_knowledge`, we would have seen: -1. A new folder: `knowledge_data_repo/projects/test_knowledge.kp` which was committed on -2. A branch (that you are now on), called `knowledge_data_repo/projects/test_knowledge` - -To submit this post for review, simply run the command: -``` -knowledge_repo --repo submit -``` - -In this case, we would run: -``` -knowledge_repo --repo knowledge_data_repo submit knowledge_data_repo/projects/test_knowledge.kp -``` - -### Post Headers - -Here is a full list of headers used in the YAML section of knowledge posts: - -|header |required |purpose |example | -|:--------------|:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------| -|title |required |String at top of post |title: This post proves that 2+2=4 | -|authors |required |User entity that wrote the post in organization specified format |authors:
- kanye_west
- beyonce_knowles | -|tags |required |Topics, projects, or any other uniting principle across posts |tags:
- hiphop
- yeezy | -|created_at |required |Date when post was written |created_at: 2016-04-03 | -|updated_at |optional |Date when post was last updated |created_at: 2016-10-10 | -|tldr |required |Summary of post takeaways that will be visible in /feed |tldr: I'ma let you finish, but Beyonce had one of the best videos of all time! | -|path |optional |Instead of specifying post path in the CLI, specify with this post header |path: projects/path/to/post/on/repo | -|thumbnail |optional |Specify which image is shown in /feed |thumbnail: 3 OR thumbnail: http://cdn.pcwallart.com/images/giraffe-tongue-wallpaper-1.jpg | -|private |optional |If included, post is only visible to authors and editors set in repo configuration |private: true | -|allowed_groups |optional |If the post is private, specify additional users or groups who can see the post |allowed_groups: ['jay_z', 'taylor_swift', 'rap_community'] | - -### Handling Images - -The knowledge repo's default behavior is to add the markdown's contents as is to your knowledge post git repository. If you do not have git LFS set up, it may be in your interest to have these images hosted on some type of cloud storage, so that pulling the repo locally isn't cumbersome. - -To add support for pushing images to cloud storage, we provide a [postprocessor](https://github.com/airbnb/knowledge-repo/blob/master/resources/extract_images_to_s3.py). This file needs one line to be configured for your organization's cloud storage. Once configured, the postprocessor's registry key can be added to the knowledge git repository's configuration file as a postprocessor. - -### Managing and Viewing Tags - -Tags are one of the main organizing principles of the knowledge repo web app, and it is important to have high integrity on these tag names. For example, having one post tagged as "my_project" and another as "my-project" or "my_projects" prevents these posts from being organized together. - -We currently have two ways to maintain tag integrity: - - - Browse the `/cluster?group_by=tags` endpoint to find and match existing tags. - - After contributing, organize tags in batch with the `/batch_tags` end point. - -## Running the web app - -Running the web app allows you to locally view all the knowledge posts in the repository, or to serve it for others to view. It is also useful when developing on the web app. - -### Running the development server - -Running the web app in development/local/private mode is as simple as running: - -`knowledge_repo --repo runserver` - -Supported options are `--port` and `--dburi` which respectively change the local port on which the server is running, and the sqlalchemy uri where the database can be found and/or initiated. The default port is 7000, and the default dburi is `sqlite:////tmp/knowledge.db`. If the database does not exist, it is created (if that is possible) and initialised. Database migrations are automatic (unless disabled to prevent accidental data loss), but can be performed manually using: - -`knowledge_repo --repo db_upgrade --dburi ` - -### Running the Web App on Multiple Repos - -The web application can be run on top of multiple knowledge repo backends. To do this, include each repo with a name and path, prefixed by --repo. For example: - -`knowledge_repo --repo {git}/path/to/git/repo --repo {webposts}sqlite:////tmp/dbrepo.db:mypostreftable runserver` - -If including a dbrepo, add the name of the dbrepo to the `WEB_EDITOR_PREFIXES` in the server config, and add it as config when running the app: - -`knowledge_repo --repo {git}/path/to/git/repo --repo {webposts}sqlite:////tmp/dbrepo.db:mypostreftable runserver --config resources/server_config.py` - -Note that this is required for the web application's internal post writing UI. - -### Deploying the web app - -Deploying the web app is much like running the development server, except that the web app is deployed on top of gunicorn. It also allows for enabling server-side components such as sending emails to subscribed users. - -Deploying is as simple as: -`knowledge_repo --repo deploy` - -or if using multiple repos: -`knowledge_repo --repo {git}/path/to/git/repo --repo {webposts}sqlite:////tmp/dbrepo.db:mypostreftable deploy --config resources/server_config.py` - -Supported options are `--port`, `--dburi`,`--workers`, `--timeout` and `--config`. The `--config` option allows you to specify a python config file from which to load the extended configuration. A template config file is provided in `resources/server_config.py`. The `--port` and `--dburi` options are as before, with the `--workers` and `--timeout` options specifying the number of threads to use when serving through gunicorn, and the timeout after which the threads are presumed to have died, and will be restarted. - -## Contributing - -We would love to work with you to create the best knowledge repository software possible. If you have ideas or would like to have your own code included, add an issue or pull request and we will review it. - -### Adding new filetype support - -Support for conversion of a particular filetype to a knowledge post is added by writing a new `KnowledgePostConverter` object. Each converter should live in its own file in `knowledge_repo/converters`. Refer to the implementation for ipynb, Rmd, and md for more details. If your conversion is site-specific, you can define these subclasses in `.knowledge_repo_config`, whereupon they will be picked up by the conversion code. - -### Adding extra structure and/or verifications to the knowledge post conversion process - -When a KnowledgePost is constructed by converting from support filetypes, the resulting post is then passed through a series of postprocessors (defined in `knowledge_repo/postprocessors`). This allows one to modify the knowledge post, upload images to remote storage facilities (such as S3), and/or verify some additional structure of the knowledge posts. As above, defining or importing these classes in `.knowledge_repo_config.py` allows for postprocessors to be used locally. - -### More - -Is the Knowledge Repository missing something else that you would like to see? Let us know, and we'll see if we cannot help you. - -## Technical Details - -### What is a Knowledge Repository - -A knowledge repository is a virtual filesystem (such as a git repository or database). A GitKnowledgeRepository, for example, has the following structure: - - - + .git # The git repository metadata - + .resources # A folder into which the knowledge_repo repository is checked out (as a git submodule) - - .knowledge_repo_config.py # Local configuration for this knowledge repository - - - -The use of a git submodule to checkout the knowledge_repo into `.resources` allows use to ensure that the client and server are using the same version of the code. When one uses the `knowledge_repo` script, it actually passes the options to the version of the `knowledge_repo` script in `.resources/scripts/knowledge_repo`. Thus, updating the version of knowledge_repo used by client and server alike is as simple as changing which revision is checked out by git submodule in the usual way. That is: - - pushd .resources - git pull - git checkout / - popd - git commit -a -m 'Updated version of the knowledge_repo' - git push - -Then, all users and servers associated with this repository will be updated to the new version. This prevents version mismatches between client and server, and all users of the repository. - -In development, it is often useful to disable this chaining. To use the local code instead of the code in the checked out knowledge repository, pass the `--dev` option as: - -`knowledge_repo --repo --dev ...` - -### What is a Knowledge Post? - -A knowledge post is a directory, with the following structure: - - - - knowledge.md - + images/* [Optional] - + orig_src/* [Optional; stores the original converted file] - -Images are automatically extracted from the local paths on your computer, and placed into images. `orig_src` contains the file(s) from which the knowledge post was converted from. +# The Knowledge Repository (BETA) +[![Build Status](https://travis-ci.org/airbnb/knowledge-repo.svg?branch=master)](https://travis-ci.org/airbnb/knowledge-repo) +[![Windows Build Status](https://ci.appveyor.com/api/projects/status/t88a27n099oqnbsw/branch/master?svg=true&pendingText=Windows%20build%20pending...&passingText=Windows%20build%20passing&failingText=Windows%20build%20failing)](https://ci.appveyor.com/project/matthewwardrop/knowledge-repo) +[![PyPI version](https://badge.fury.io/py/knowledge-repo.svg)](https://badge.fury.io/py/knowledge-repo) +[![Python](https://img.shields.io/pypi/pyversions/knowledge-repo.svg?maxAge=2592000)](https://pypi.python.org/pypi/knowledge-repo) + +The Knowledge Repository project is focused on facilitating the sharing of knowledge between data scientists and other technical roles using data formats and tools that make sense in these professions. It provides various data stores (and utilities to manage them) for "knowledge posts", with a particular focus on notebooks (R Markdown and Jupyter / iPython Notebook) to better promote reproducible research. + +Check out this [Medium Post](https://medium.com/airbnb-engineering/scaling-knowledge-at-airbnb-875d73eff091) for the inspiration for the project. + +**Note:** The Knowledge Repository is a work in progress. There are lots of code cleanups and feature extensions TBD. Your assistance and involvement is more than encouraged. + +![](https://cloud.githubusercontent.com/assets/20175104/18972198/116861be-864d-11e6-9850-5a6cdad7ce54.png) + +![](https://cloud.githubusercontent.com/assets/20175104/18972218/264f4c00-864d-11e6-8153-3e9833563784.png) + +## Quickstart + +1\. Install the knowledge-repo tooling +``` +pip install --upgrade knowledge-repo +``` + +To install dependencies for iPython notebook, PDF uploading, and local development, use `pip install --upgrade knowledge-repo[all]` + +2\. Initialize a knowledge repository - your posts will get added here +``` +knowledge_repo --repo ./example_repo init +``` +3\. Create a post template + +for Rmd: +``` +knowledge_repo --repo ./example_repo create Rmd example_post.Rmd +``` + +for ipynb +``` +knowledge_repo --repo ./example_repo create ipynb example_post.ipynb +``` +4\. Edit the notebook file `example_post.ipynb` or `example_post.Rmd` as you normally would. + + +5\. Add your post to the repo with path `project/example` +``` +knowledge_repo --repo ./example_repo add example_post.Rmd -p project/example_rmd +knowledge_repo --repo ./example_repo add example_post.ipynb -p project/example_ipynb +``` +6\. Preview the added post +``` +knowledge_repo --repo ./example_repo preview project/example_rmd +#or +knowledge_repo --repo ./example_repo preview project/example_ipynb +``` + +### Docker images + +Docker images of the latest build are available at Docker Hub and can be downloaded as follows: + +Python 2.7: +``` +docker pull airbnb/knowledge-repo-python2.7 +docker run -d -p 80:80 airbnb/knowledge-repo-python2.7 +``` + +Python 3.4: +``` +docker pull airbnb/knowledge-repo-python3.4 +docker run -d -p 80:80 airbnb/knowledge-repo-python3.4 +``` + +A Dockerfile has been provided if you wish to build a Docker image yourself. + +### Feedback for Beta + +The Knowledge Repo is currently in a public beta, and we are rolling it out to more people to get feedback. In particular, we'd love to hear about the following: + + - How easy is it to set up the git knowledge post repository? + - How easy is it to set up the web application, and make it live internally within your organization? + - Where are the gaps in our documentation that we should fill in to assist others in understanding the system? + - At a higher level, are there any blockers or barriers to setting up the Knowledge Repo in your organization? + +### Known Issues + +Here's a running list of known issues we are working on: + + - The in-app webeditor needs refactoring to: + - Rely completely on KnowledgePost objects instead of interacting with db records + - Trigger "save" actions when necessary + - Allow for image uploading + - The Python configuration for git knowledge repositories currently reads directly out of the `master` branch, allowing (depending on your organization's git policy) a malicious user to commit arbitrary code into the master branch, which then gets run on client and server machines during interactions with the git repository using the inbuilt knowledge repository abstractions. + +## Introduction + +Knowledge posts are a general markdown format that is automatically generated from the following common formats: + + - Jupyter/Ipython notebooks + - Rmd notebooks + - Markdown files + +The Jupyter, Rmd, and Markdown files are required to have a specific set of yaml style headers which are used to organize and discover research: + +``` +--- +title: I Found that Lemurs Do Funny Dances +authors: +- sally_smarts +- wesley_wisdom +tags: +- knowledge +- example +created_at: 2016-06-29 +updated_at: 2016-06-30 +tldr: This is short description of the content and findings of the post. +--- +``` + +*See a full description of headers [further below](https://github.com/airbnb/knowledge-repo#post-headers)* + +Users add these notebooks/files to the knowledge repository through the `knowledge_repo` tool, as described below; which allows them to be rendered and curated in the knowledge repository's web app. + +If your favourite format is missing, we welcome contributions; and are happy to work with you to get it supported. See the "Contributing" section below to see how to add support for more formats. + +Note that the web application can live on top of multiple Knowledge Repo backends. Supported types so far are: + + - Git Repo + Remote Git Hosting Service (Primary Use Case) + - Web Application SQL db + +## Getting started +There are two repositories associated with the Knowledge Repository project. +1. This repository, which will be installed first. This is referred to as the knowledge repository tooling. +2. A knowledge data repository, which is created second. This is where the knowledge posts are stored. + +### Installation +To install the knowledge repository tooling (and all its dependencies), simply run: + +`pip install --upgrade "knowledge-repo[all]"` + +You can also skip installing dependencies which are only required in special cases by replacing `all` with one or more of the following (separated by commas): +- `ipynb` : Installs the dependencies required for adding/converting Jupyter notebook files +- `pdf` : Installs the dependencies required for uploading PDFs using the web editor +- `dev`: Installs the dependencies required for doing development, including running the tests + +The `knowledge_repo` script is the one that is used for all of the following actions. It requires the `--repo` flag to be passed to it, with the location of the knowledge data repository. + +You can drop the `--repo` option by setting the `$KNOWLEDGE_REPO` environment variable with the location of the knowledge data repo in your bash/zsh/shell configuration. In bash, this would be done as such: +``` +export KNOWLEDGE_REPO=repo_path +``` + +### Setup of the knowledge data repositories +There are two different ways to do this, depending on whether your organization already has a knowledge data repository or not: + +#### Your organization already has a knowledge data repository setup +If your organization already has a knowledge data repository setup, check it out onto your computer as you normally would; for example: + +`git clone git@example.com:example_data_repo.git` + +Running this same script if a repo already exists at `` will allow you to update it to be a knowledge data repository. This is useful if you are starting a repository on a remote service like GitHub, as this allows you to clone the remote repository as per normal; run this script; and then push the initialization back into the remote service using `git push`. + +#### Your organization does not have knowledge data repository setup +The following command will create a new repository at `` +``` +knowledge_repo --repo init +``` + +If you are hosting this repository on a remote service like Github, and you've created the knowledge data repository using the `init` flag, you must push that to that remote service in order for the later commands to work. On Git, this can be done by creating the remote repository through Git and then running + +``` +git remote add origin url_of_the_repository +git push -u origin master +``` + +For more details about the structure of a knowledge repository, see the technical details section below. + +### Configuration + +There are two types of configuration files, one for knowledge-data git repos that holds posts, and another for the web application. + +#### Knowledge Data Git Repo Configuration + +When running `knowledge_repo init` to make a folder a knowledge-data git repo, a `.knowledge_repo_config` file will be created in the folder. The file will be a copy of the default repo configuration file located [here](https://github.com/airbnb/knowledge-repo/blob/master/knowledge_repo/config_defaults.py). + +This configuration file will allow you to: + + - Add postprocessors to post contributions from the repo. (see the `postprocessors` array of functions) + - Add rules for which subdirectories posts can be added to. (see the `path_parse()` function) + - Check and manage the format of author names at contribution time + - Add logic to `username_parse()` to check post author names and raise exceptions when they don't match + - Add logic to `username_to_name()` to manage how user/author names are displayed, ex. "sally_smarts" --> "Sally Smarts" + - Add logic to `username_to_email()` to manage how user/author names are matched to emails, ex. "sally_smarts" --> "sally.smarts@mycompany.com" + +See the file itself for more detail. + +#### Knowledge Web Application Configuration + +Specify a configuration file when running the web application by adding the flag `--config path/to/config_file.py`. An example configuration file is provided [here](https://github.com/airbnb/knowledge-repo/blob/master/resources/server_config.py). + +This configuration file lets you specify details specific to the web server. For instance, one can specify the database connection string or the request header that contains usernames. See the file itself for more detail. + +## Writing Knowledge Posts + +### TLDR Guide For Contributing + +If you have already set up your system as described below, here is a snapshot of the commands you need to run to upload your knowledge post stored in ~/Documents/my_post.Rmd. For Jupyter / iPython Notebooks, the commands are the same, replacing all instances of `Rmd` with `ipynb`. It assumes you have configured the KNOWLEDGE_REPO environment variable to point to your local copy of the knowledge repository. The code is written for producing and contributing an ipynb file to make the examples clear, R Markdown files are run by using `Rmd` in place of `ipynb` in each command. + +1. `knowledge_repo create Rmd ~/Documents/my_post.Rmd`, which creates a template with required yaml headers. Templates can also be downloaded by clicking "Write a Post!" the web application. *Make sure your post has these headers with correct values for your post* +2. Do your work in the generated my_post.Rmd file. *Make sure the post runs through from start to finish before attempting to add to the Knowledge Repo!* +3. `knowledge_repo add ~/Documents/my_post.Rmd [-p projects/test_project] [--update]` +4. `knowledge_repo preview projects/test_project` +5. `knowledge_repo submit projects/test_project` +6. From your remote git hosting service, request a review for merging the post. (ie. open a pull request on Github) +7. After it has been reviewed, merge it in to the master branch. + +### Full Guide for Contributing: + +#### Creating knowledge +Once the knowledge data repository has been initialized, it is possible to start adding posts. Each post in the knowledge repository requires a specific header format, used for metadata formatting. +To create a new post using a provided template, which has both the header information and example content, run the following command: +``` +knowledge_repo --repo create {ipynb, Rmd, md} filename +``` + +The first argument indicates the type of the file that you want created, while the second argument indicates where the file should be created. + +If the knowledge data repository is created at `knowledge_data_repo`, running +``` +knowledge_repo --repo knowledge_data_repo create md ~/Documents/my_first_knowledge_post.md +``` +will create a file, `~/Documents/my_first_knowledge_post.md`, the contents of which will be the boilerplate template of the knowledge post. + +The help menu for this command (and all following commands) can be reached by adding the `-h` flag, `knowledge_repo --repo create -h`. + +Alternatively, by going to the `/create` route in the webapp, you can click the button for whichever template you would like to have, +and that will download the correct template. + +#### Adding knowledge +Once you've finished writing a post, the next step is to add it to the knowledge data repository. +To do this, run the following command: +``` +knowledge_repo --repo add [-p ] +``` + +Using the example from above, if we wanted to add the post `~/Documents/my_first_knowledge_post.md` to `knowledge_data_repo`, +we would run: +``` +knowledge_repo --repo knowledge_data_repo add ~/Documents/my_first_knowledge_post.md -p projects/test_knowledge +``` + +The `-p` flag specifies the location of the post in the knowledge data repository - in this case, `knowledge_data_repo/projects/test_knowledge`. +The `-p` flag does not need to be specified if `path` is included in the header of the knowledge post. + +#### Updating knowledge +To update an existing knowledge post, pass the `--update` flag to the `add` command. This will allow the add operation to override exiting knowledge posts. +``` +knowledge_repo --repo add --update +``` + +#### Previewing Knowledge +If you would like to see how the post would render on the web app before submitting the post for review, run the following command: +``` +knowledge_repo --repo preview +``` + +In the case from above, we would run: +``` +knowledge_repo --repo knowledge_data_repo preview projects/test_knowledge +``` + +There are other arguments that can be passed to this command, adding the `-h` flag shows them all along with further information about them. + +#### Submitting knowledge +After running the add command, two things should have happened: +1. A new folder should have been created at the path specified in the add command, which ends in `.kp`. This is added automatically to indicate that the folder is a knowledge post. +2. This folder will have been committed to the repository on the branch named `/path_in_add_command` + +Running the example command: `knowledge_repo --repo knowledge_data_repo add ~/Documents/my_first_knowledge_post.md -p projects/test_knowledge`, we would have seen: +1. A new folder: `knowledge_data_repo/projects/test_knowledge.kp` which was committed on +2. A branch (that you are now on), called `knowledge_data_repo/projects/test_knowledge` + +To submit this post for review, simply run the command: +``` +knowledge_repo --repo submit +``` + +In this case, we would run: +``` +knowledge_repo --repo knowledge_data_repo submit knowledge_data_repo/projects/test_knowledge.kp +``` + +### Post Headers + +Here is a full list of headers used in the YAML section of knowledge posts: + +|header |required |purpose |example | +|:--------------|:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------| +|title |required |String at top of post |title: This post proves that 2+2=4 | +|authors |required |User entity that wrote the post in organization specified format |authors:
- kanye_west
- beyonce_knowles | +|tags |required |Topics, projects, or any other uniting principle across posts |tags:
- hiphop
- yeezy | +|created_at |required |Date when post was written |created_at: 2016-04-03 | +|updated_at |optional |Date when post was last updated |created_at: 2016-10-10 | +|tldr |required |Summary of post takeaways that will be visible in /feed |tldr: I'ma let you finish, but Beyonce had one of the best videos of all time! | +|path |optional |Instead of specifying post path in the CLI, specify with this post header |path: projects/path/to/post/on/repo | +|thumbnail |optional |Specify which image is shown in /feed |thumbnail: 3 OR thumbnail: http://cdn.pcwallart.com/images/giraffe-tongue-wallpaper-1.jpg | +|private |optional |If included, post is only visible to authors and editors set in repo configuration |private: true | +|allowed_groups |optional |If the post is private, specify additional users or groups who can see the post |allowed_groups: ['jay_z', 'taylor_swift', 'rap_community'] | + +### Handling Images + +The knowledge repo's default behavior is to add the markdown's contents as is to your knowledge post git repository. If you do not have git LFS set up, it may be in your interest to have these images hosted on some type of cloud storage, so that pulling the repo locally isn't cumbersome. + +To add support for pushing images to cloud storage, we provide a [postprocessor](https://github.com/airbnb/knowledge-repo/blob/master/resources/extract_images_to_s3.py). This file needs one line to be configured for your organization's cloud storage. Once configured, the postprocessor's registry key can be added to the knowledge git repository's configuration file as a postprocessor. + +### Managing and Viewing Tags + +Tags are one of the main organizing principles of the knowledge repo web app, and it is important to have high integrity on these tag names. For example, having one post tagged as "my_project" and another as "my-project" or "my_projects" prevents these posts from being organized together. + +We currently have two ways to maintain tag integrity: + + - Browse the `/cluster?group_by=tags` endpoint to find and match existing tags. + - After contributing, organize tags in batch with the `/batch_tags` end point. + +## Running the web app + +Running the web app allows you to locally view all the knowledge posts in the repository, or to serve it for others to view. It is also useful when developing on the web app. + +### Running the development server + +Running the web app in development/local/private mode is as simple as running: + +`knowledge_repo --repo runserver` + +Supported options are `--port` and `--dburi` which respectively change the local port on which the server is running, and the sqlalchemy uri where the database can be found and/or initiated. The default port is 7000, and the default dburi is `sqlite:////tmp/knowledge.db`. If the database does not exist, it is created (if that is possible) and initialised. Database migrations are automatic (unless disabled to prevent accidental data loss), but can be performed manually using: + +`knowledge_repo --repo db_upgrade --dburi ` + +### Running the Web App on Multiple Repos + +The web application can be run on top of multiple knowledge repo backends. To do this, include each repo with a name and path, prefixed by --repo. For example: + +`knowledge_repo --repo {git}/path/to/git/repo --repo {webposts}sqlite:////tmp/dbrepo.db:mypostreftable runserver` + +If including a dbrepo, add the name of the dbrepo to the `WEB_EDITOR_PREFIXES` in the server config, and add it as config when running the app: + +`knowledge_repo --repo {git}/path/to/git/repo --repo {webposts}sqlite:////tmp/dbrepo.db:mypostreftable runserver --config resources/server_config.py` + +Note that this is required for the web application's internal post writing UI. + +### Deploying the web app + +Deploying the web app is much like running the development server, except that the web app is deployed on top of gunicorn. It also allows for enabling server-side components such as sending emails to subscribed users. + +Deploying is as simple as: +`knowledge_repo --repo deploy` + +or if using multiple repos: +`knowledge_repo --repo {git}/path/to/git/repo --repo {webposts}sqlite:////tmp/dbrepo.db:mypostreftable deploy --config resources/server_config.py` + +Supported options are `--port`, `--dburi`,`--workers`, `--timeout` and `--config`. The `--config` option allows you to specify a python config file from which to load the extended configuration. A template config file is provided in `resources/server_config.py`. The `--port` and `--dburi` options are as before, with the `--workers` and `--timeout` options specifying the number of threads to use when serving through gunicorn, and the timeout after which the threads are presumed to have died, and will be restarted. + +## Contributing + +We would love to work with you to create the best knowledge repository software possible. If you have ideas or would like to have your own code included, add an issue or pull request and we will review it. + +### Adding new filetype support + +Support for conversion of a particular filetype to a knowledge post is added by writing a new `KnowledgePostConverter` object. Each converter should live in its own file in `knowledge_repo/converters`. Refer to the implementation for ipynb, Rmd, and md for more details. If your conversion is site-specific, you can define these subclasses in `.knowledge_repo_config`, whereupon they will be picked up by the conversion code. + +### Adding extra structure and/or verifications to the knowledge post conversion process + +When a KnowledgePost is constructed by converting from support filetypes, the resulting post is then passed through a series of postprocessors (defined in `knowledge_repo/postprocessors`). This allows one to modify the knowledge post, upload images to remote storage facilities (such as S3), and/or verify some additional structure of the knowledge posts. As above, defining or importing these classes in `.knowledge_repo_config.py` allows for postprocessors to be used locally. + +### More + +Is the Knowledge Repository missing something else that you would like to see? Let us know, and we'll see if we cannot help you. + +## Technical Details + +### What is a Knowledge Repository + +A knowledge repository is a virtual filesystem (such as a git repository or database). A GitKnowledgeRepository, for example, has the following structure: + + + + .git # The git repository metadata + + .resources # A folder into which the knowledge_repo repository is checked out (as a git submodule) + - .knowledge_repo_config.py # Local configuration for this knowledge repository + - + +The use of a git submodule to checkout the knowledge_repo into `.resources` allows use to ensure that the client and server are using the same version of the code. When one uses the `knowledge_repo` script, it actually passes the options to the version of the `knowledge_repo` script in `.resources/scripts/knowledge_repo`. Thus, updating the version of knowledge_repo used by client and server alike is as simple as changing which revision is checked out by git submodule in the usual way. That is: + + pushd .resources + git pull + git checkout / + popd + git commit -a -m 'Updated version of the knowledge_repo' + git push + +Then, all users and servers associated with this repository will be updated to the new version. This prevents version mismatches between client and server, and all users of the repository. + +In development, it is often useful to disable this chaining. To use the local code instead of the code in the checked out knowledge repository, pass the `--dev` option as: + +`knowledge_repo --repo --dev ...` + +### What is a Knowledge Post? + +A knowledge post is a directory, with the following structure: + + + - knowledge.md + + images/* [Optional] + + orig_src/* [Optional; stores the original converted file] + +Images are automatically extracted from the local paths on your computer, and placed into images. `orig_src` contains the file(s) from which the knowledge post was converted from. From 3cf5be52be2233eef4850fd7fd9f0f2b025e8c22 Mon Sep 17 00:00:00 2001 From: Joe Taylor Date: Thu, 22 Jun 2017 12:47:53 +0200 Subject: [PATCH 15/18] Remove conda dependency erroneously added to requirements.txt --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index bb881eaf1..93f0b4101 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,6 @@ blinker==1.4 cffi==1.10.0 click==6.7 - conda==4.3.21 cryptography==1.8.1 decorator==4.0.11 entrypoints==0.2.3 From bd894bff3414b72938ac0120cf6393455c5a1e5e Mon Sep 17 00:00:00 2001 From: zerogjoe Date: Thu, 6 Jul 2017 12:42:43 +0200 Subject: [PATCH 16/18] Bump Docker versions to 16.04 --- Dockerfile | 3 +-- Dockerfile.systest | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index ca69c5f06..4c5b11bad 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,4 @@ -# Using officially supported Ubuntu version for Travis CI -FROM ubuntu:14.04 +FROM ubuntu:16.04 ARG TRAVIS_PYTHON_VERSION ARG PORT diff --git a/Dockerfile.systest b/Dockerfile.systest index b2e6ca11c..fdca09724 100644 --- a/Dockerfile.systest +++ b/Dockerfile.systest @@ -1,7 +1,7 @@ # this Dockerfile builds a container from which system tests can be run -FROM ubuntu:14.04 +FROM ubuntu:16.04 RUN apt-get update && apt-get install -yq curl && apt-get clean From dc747b0cc32e17a401b7ade42d0e41f1f3344304 Mon Sep 17 00:00:00 2001 From: zerogjoe Date: Thu, 6 Jul 2017 13:00:05 +0200 Subject: [PATCH 17/18] Added bzip2 (not bundled in Ubuntu 16.04 image) --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 4c5b11bad..79795acf5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,7 @@ ARG PORT # Install required Ubuntu packages RUN apt-get update RUN apt-get install -y wget +RUN apt-get install -y bzip2 RUN apt-get install -y git # Download appropriate version of Miniconda From 87857aff8a66f75a167ab460d17e968e40e3681f Mon Sep 17 00:00:00 2001 From: zerogjoe Date: Thu, 6 Jul 2017 22:13:55 +0200 Subject: [PATCH 18/18] Fix locales in Dockerfiles --- Dockerfile | 8 ++++++++ Dockerfile.systest | 11 +++++++++++ 2 files changed, 19 insertions(+) diff --git a/Dockerfile b/Dockerfile index 79795acf5..bf699217c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,6 +9,14 @@ RUN apt-get install -y wget RUN apt-get install -y bzip2 RUN apt-get install -y git +# Set the locale +RUN apt-get install -y locales +RUN sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && \ + locale-gen +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 + # Download appropriate version of Miniconda RUN if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh; \ else wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; fi diff --git a/Dockerfile.systest b/Dockerfile.systest index fdca09724..12e8b9bf8 100644 --- a/Dockerfile.systest +++ b/Dockerfile.systest @@ -3,6 +3,17 @@ FROM ubuntu:16.04 +# Set the locale +RUN apt-get update +RUN apt-get install -y locales +RUN sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && \ + locale-gen +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 + +ARG PORT + RUN apt-get update && apt-get install -yq curl && apt-get clean WORKDIR /app