CognitiveScale · laguirre-cs · Jun 15, 2023 · Jun 15, 2023 · Jun 15, 2023 · Jun 20, 2023
diff --git a/build.sh b/build.sh
@@ -11,11 +11,14 @@ function setGlobals() {
   SKIP_TOOLKIT="${SKIP_TOOLKIT:-false}"
   RUN_REMOTE_EXAMPLES="${RUN_REMOTE_EXAMPLES:-false}"
   PYTHON_VERSION="3.8"
+  SK_PANDAS_VERSION="sklearn-pandas==2.2.0"
+  XGBOOST_VERSION="xgboost==1.7.2"
   SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P )"
   ARTIFACTS_DIR="${SCRIPT_PATH}/artifacts"
   TOOLKIT_PATH="${ARTIFACTS_DIR}/certifai_toolkit.zip"
   TOOLKIT_WORK_DIR="${ARTIFACTS_DIR}/toolkit"
   PACKAGES_DIR="${TOOLKIT_WORK_DIR}/packages"
+  CONTAINERIZED_EXAMPLES_DIR="${SCRIPT_PATH}/models/containerized_model/examples"
   TEMPLATES_DIR="${SCRIPT_PATH}/models/containerized_model"
   BASE_IMAGES_DIR="${SCRIPT_PATH}/models/containerized_model/base_images"
   NOTEBOOK_DIR="${SCRIPT_PATH}/notebooks"
@@ -157,6 +160,10 @@ function _installModelRequirements() {
   pip install -r "${TEMPLATES_DIR}/requirements.txt"
 }
 
+function _installLocalModelRequirements() {
+  pip install "$SK_PANDAS_VERSION" "$XGBOOST_VERSION"
+}
+
 
 function buildModelDeploymentImages() {
   # Builds Docker images for the example Containerized Model Types (Scikit, H2O, Proxy, R). These are images are used
@@ -292,11 +299,13 @@ function buildPredictionServiceBaseImages() {
   echo "{\"python38\": \"${py38_image}\", \"python39\": \"${py39_image}\"}" > "${BASE_IMAGE_BUILD_REPORT_JSON}"
 }
 
-function test() {
+function testAll() {
   testMarkdownLinks
   testModels
   testNotebooks
   testTutorials
+  # Requires Docker/Minikube - so skipped in pipeline
+  #testContainerizedModels
 }
 
 function testMarkdownLinks() {
@@ -328,12 +337,44 @@ function testMarkdownLinks() {
 }
 
 function testModels() {
-  echo "TODO: automate subset of model examples - "
-  # for each
-  # - train the models
-  # - start the app in one process,
-  # - run the test in another process
-  # - assert both processes exit successfully
+  MODELS_DIR="${SCRIPT_PATH}/models"
+  # run tests for each individual example
+  cd "$MODELS_DIR"/german_credit/
+  python -m unittest -v test.py
+
+  cd "$MODELS_DIR"/german_credit_pandas
+  python -m unittest -v test.py
+
+  cd "$MODELS_DIR"/income_prediction
+  python -m unittest -v test.py
+
+  cd "$MODELS_DIR"/iris
+  python -m unittest -v test.py
+
+  cd "$MODELS_DIR"/patient_readmission
+  python -m unittest -v test.py
+
+  # Go back to root directory
+  cd  "$SCRIPT_PATH"
+
+  # TODO: Run other examples (see https://github.com/CognitiveScale/certifai/issues/4870)
+  # - h2o_dai_german_credit
+  # - h2o_dai_regression_auto_insurance
+  # - r-models
+}
+
+function testContainerizedModels() {
+  # run base of set of containerized model examples locally (with docker)
+  cd "$CONTAINERIZED_EXAMPLES_DIR"
+  TOOLKIT_PATH="$TOOLKIT_WORK_DIR" ./run_test.sh "local"
+
+  # TODO: Add 'RUN_H2O=true' to test other examples (see https://github.com/CognitiveScale/certifai/issues/4870)
+  # - h2o_dai_german_credit
+  # - h2o_dai_regression_auto_insurance
+  # - r-models
+
+  # Go back to root directory
+  cd  "$SCRIPT_PATH"
 }
 
 function testTutorials() {
@@ -512,7 +553,7 @@ function _sagemakerNotebook() {
 function _xgboostModel() {
   # xgboost-model
   cd "${NOTEBOOK_DIR}"
-  pip install xgboost
+  pip install "$XGBOOST_VERSION"
   _runNotebookInPlace "${NOTEBOOK_DIR}/xgboost-model/xgboostDmatrixExample.ipynb"
 }
 
@@ -522,7 +563,9 @@ function main() {
     setGlobals
     activateConda
     installToolkit
-    test
+    _installModelRequirements
+    _installLocalModelRequirements
+    testAll
     rm -rf "${TOOLKIT_WORK_DIR}"
     ;;
    docker)
@@ -558,6 +601,14 @@ function main() {
     activateConda
     testMarkdownLinks
     ;;
+   models)
+    setGlobals
+    activateConda
+    installToolkit
+    _installModelRequirements
+    _installLocalModelRequirements
+    testModels
+    ;;
    notebook)
     setGlobals
     activateConda

diff --git a/models/README.md b/models/README.md
@@ -19,5 +19,7 @@ for detailed information about Cortex Certifai.
 | [german_credit_pandas](./german_credit_pandas)                                                     | Illustrates using the Certifai Model SDK to run a models in a service, where the models expect as input a [pandas DataFrame](https://pandas.pydata.org/) instead of a [numpy array](https://numpy.org/).     | Binary classification      | python   | sklearn                |
 | [h2o_dai_auto_insurance](./h2o_dai_regression_auto_insurance)                                      | Illustrates using the Certifai Model SDK to create a gunicorn prediction service from a regression H2O MOJO model, and scan it for trust scores.                                                             | Regression                 | python   | H2O MOJO               |
 | [h2o_dai_german_credit](./h2o_dai_german_credit)                                                   | Illustrates using the Certifai Model SDK to create a development or gunicorn prediction service from a binary classification H2O MOJO, and scan it for trust scores or for explanations.                     | Binary classification      | python   | H2O MOJO               |
+| [income_prediction](./income_prediction)                                                           | Illustrates using the Certifai Model SDK to run a single binary-classification XGBoost model in a service, using a customized model wrapper.                                                                 | Binary classification      | python   | sklearn <br /> xgboost |
 | [iris](./iris)                                                                                     | Illustrates using the Certifai Model SDK to run a single multi-class model in a service, using a customized model wrapper.                                                                                   | Multi-class classification | python   | sklearn <br /> xgboost |
+| [patient_readmission](./patient_readmission)                                                       | Illustrates using the Certifai Model SDK to run a single binary-classification model in a service, using a customized model wrapper, for creation of fast (bulk) explanations.                               | Binary classification      | python   | sklearn <br /> xgboost |
 | [r-models](https://github.com/CognitiveScale/cortex-certifai-examples/tree/master/models/r-models) | Illustrates running a R model in a service using plumber.                                                                                                                                                    | Binary classification      | R        | randomForest           |
diff --git a/models/base_test.py b/models/base_test.py
@@ -0,0 +1,140 @@
+import time
+import contextlib
+import subprocess
+import tempfile
+import unittest
+from typing import Optional, Sequence
+
+
+def capture_err_and_out(stderr, stdout):
+    if stderr is not None:
+        print("\n---------------------- (main) stderr: ----------------------")
+        print(stderr, end="")
+        print("\n------------------------------------------------------------\n")
+    if stdout is not None:
+        print("\n---------------------- (main) stdout: ----------------------")
+        print(stdout, end="")
+        print("\n------------------------------------------------------------")
+
+
+def capture_output(stdout, stderr, limit=100):
+    count = 0
+    print("\n---------------------- (service) stdout: ----------------------\n")
+    with open(stdout, 'r+') as f:
+        for line in f:
+            if count > limit:
+                break
+            print(line, end="")
+            limit += 1
+    print("\n------------------------------------------------------------\n")
+    print()
+
+    count = 0
+    print("\n---------------------- (service) stderr: ----------------------\n")
+    with open(stderr, 'r+') as f:
+        for line in f:
+            if count > limit:
+                break
+            print(line, end="")
+            limit += 1
+    print("\n------------------------------------------------------------\n")
+
+
+class ModelTest(unittest.TestCase):
+    """Base class for testing Certifai Prediction Service Examples. Each example will typically include multiple
+    scenarios where:
+
+        1) a flask server is launched as a background process (via the Certifai Model SDK)
+        2) a Certifai Scan is launched (or just a plain Python script) is launched in the foreground that calls (1)
+
+    Each process that is launched in the foreground, (2), is expected to complete with a 0 exit code. Each process
+    launched in the background (1) are expected to be run until explicitly killed.
+
+    The following functions should cover scenarios that run plain Python Scripts::
+
+        run_standalone_python_script(python_script)
+        run_python_app_test(model_app, python_script)
+
+    The following functions should cover scenarios that involve running a Certifai Scan::
+
+        run_model_and_definition_test('app_dtree.py', 'my-definition.yaml')
+        run_model_and_scan('app_dtree.py', 'my-definition.yaml')
+        run_model_and_explain('app_dtree.py', 'my-definition.yaml', fast=True)
+    """
+    SLEEP_TIME = 5        # 5 seconds
+    TERMINATION_TIME = 5  # 5 seconds
+    DEFAULT_TEST_TIMEOUT = 2 * 60                     # 2 minutes
+    DEFAULT_SCAN_TIMEOUT = 60 * 60 * 1                # 1 hour
+    PRECALCULATE_TIMEOUT = DEFAULT_SCAN_TIMEOUT * 3   # 3 hours
+    bg = None
+
+    def _run_in_foreground(self, command: Sequence[str], timeout: Optional[int] = None):
+        try:
+            # Run process and wait until it completes
+            process = subprocess.run(command, shell=False, capture_output=True, timeout=timeout, text=True)
+            process.check_returncode()
+        except subprocess.TimeoutExpired as te:
+            error = f"\nProcess did not finish within expected time (command={te.cmd}, timeout={te.timeout} seconds). Error: {str(te)}"
+            capture_err_and_out(te.stderr, te.stdout)
+            self.fail(error)
+        except subprocess.CalledProcessError as ce:
+            error = f"\nProcess finished with non-zero exit code (command={ce.cmd}, code={ce.returncode}). Error: {str(ce)}"
+            capture_err_and_out(ce.stderr, ce.stdout)
+            self.fail(error)
+
+    @contextlib.contextmanager
+    def _run_in_background(self, command: Sequence[str]):
+        with tempfile.NamedTemporaryFile(mode='w+') as stdout, tempfile.NamedTemporaryFile(mode='w+') as stderr:
+            try:
+                p = subprocess.Popen(command, shell=False, stdout=stdout, stderr=stderr, stdin=subprocess.DEVNULL,
+                                     close_fds=True, text=True)
+                yield
+            except Exception:
+                # WARNING: Killing the subprocess may not kill any workers spawned by the process (e.g. gunicorn!)
+                p.kill()
+                p.wait()
+                capture_output(stdout.name, stderr.name)
+                raise
+            finally:
+                # WARNING: Killing the subprocess may not kill any workers spawned by the process (e.g. gunicorn!)
+                p.kill()
+                p.wait()
+
+    # Outward facing API
+
+    def run_python_app_test(self, model_app: str, test_script: str):
+        # Run a Python Model (flask app) in the background, give it a couple seconds to start up, before running test
+        with self._run_in_background(["python", model_app]):
+            time.sleep(self.SLEEP_TIME)
+            self._run_in_foreground(["python", test_script], timeout=self.DEFAULT_TEST_TIMEOUT)
+
+    def run_standalone_python_script(self, script: str):
+        # Run the standalone test script
+        self._run_in_foreground(["python", script], timeout=self.DEFAULT_SCAN_TIMEOUT)
+
+    def run_model_and_definition_test(self, model_app: str, definition: str):
+        # Run a Python Model (flask app) in the background, give it a couple seconds to start up, before running test
+        with self._run_in_background(["python", model_app]):
+            time.sleep(self.SLEEP_TIME)
+            self._run_in_foreground(["certifai", "definition-test", "-f", definition], timeout=self.DEFAULT_SCAN_TIMEOUT)
+
+    def run_model_and_scan(self, model_app: str, definition: str):
+        # Run a Python Model (flask app) in the background, give it a couple seconds to start up, before running test
+        with self._run_in_background(f"python {model_app}".split()):
+            time.sleep(self.SLEEP_TIME)
+            self._run_in_foreground(["certifai", "scan", "-f", definition], timeout=self.DEFAULT_SCAN_TIMEOUT)
+
+    def run_model_and_explain(self, model_app: str, definition: str, fast: bool = False):
+        # Run a Python Model (flask app) in the background, give it a couple seconds to start up.
+        with self._run_in_background(f"python {model_app}".split()):
+            time.sleep(self.SLEEP_TIME)
+            if fast:
+                # Run the precalculate step prior to the fast explain
+                pre_calc_command = ["certifai", "explain", "-f", definition, "--precalculate"]
+                self._run_in_foreground(pre_calc_command, timeout=self.PRECALCULATE_TIMEOUT)
+                command = ["certifai", "explain", "-f", definition, "--fast"]
+            else:
+                command = ["certifai", "explain", "-f", definition]
+            # Run the explanation scan
+            self._run_in_foreground(command, timeout=self.DEFAULT_SCAN_TIMEOUT)
+
diff --git a/models/containerized_model/README.md b/models/containerized_model/README.md
@@ -242,7 +242,7 @@ Add respective cloud storage credentials and `MODEL_PATH` to `generated-containe
 ### Step 5 - Add extra-dependencies (optional)
 
 The dependencies work out of the box with a standard scikit-learn model,
-providing the model was trained with version 0.23.2 of scikit-learn. If
+providing the model was trained with a version `1.0.2` of scikit-learn. If
 you are using a different version, you should update
 `generated-container-model/requirements.txt`.
 

diff --git a/models/containerized_model/examples/README.md b/models/containerized_model/examples/README.md
@@ -30,12 +30,12 @@ The following files must exist in this folder:
 * certifai_toolkit/ - certifai toolkit v1.3.6 or above
 
 The current conda environment has been setup with:
-* python 3.6 (if 3.7 or 3.8, update PYTHON_VERSION in `run_test.sh`)
+* python 3.8 (otherwise, update PYTHON_VERSION in `run_test.sh`)
 * pip install -U Jinja2
 
 To train and test the models:
 * Certifai toolkit installed in the current conda environment
-* conda install -c conda-forge xgboost==1.2.0
+* conda install -c conda-forge xgboost==1.7.2
 
 To build/run the prediction services: Docker
 
@@ -70,3 +70,5 @@ storage by changing the environment.yml.
 
 The tests exit on any error, printing out the prediction service log
 and deleting the running prediction service container.
+
+For more options, run: `sh run_test.sh -h`