From 0108361b293f16f8d0b2b9d59e1851c1a2c6c01a Mon Sep 17 00:00:00 2001 From: Luis Aguirre Date: Thu, 15 Jun 2023 16:40:06 -0500 Subject: [PATCH 01/17] Add unit tests for each model example and logic in build.sh to run them. Requires running apps WITHOUT gunicorn to avoid zombies --- build.sh | 39 ++++- models/base_test.py | 140 ++++++++++++++++++ models/german_credit/app_mlp_soft_scoring.py | 3 +- models/german_credit/composed_app.py | 1 + models/german_credit/test.py | 35 +++++ .../app_mlp_soft_scoring.py | 3 +- models/german_credit_pandas/test.py | 32 ++++ models/income_prediction/test.py | 23 +++ models/iris/test.py | 28 ++++ models/patient_readmission/app.py | 3 +- models/patient_readmission/test.py | 29 ++++ 11 files changed, 327 insertions(+), 9 deletions(-) create mode 100644 models/base_test.py create mode 100644 models/german_credit/test.py create mode 100644 models/german_credit_pandas/test.py create mode 100644 models/income_prediction/test.py create mode 100644 models/iris/test.py create mode 100644 models/patient_readmission/test.py diff --git a/build.sh b/build.sh index 6bd8dd43..e6a18ef0 100755 --- a/build.sh +++ b/build.sh @@ -328,12 +328,33 @@ function testMarkdownLinks() { } function testModels() { - echo "TODO: automate subset of model examples - " - # for each - # - train the models - # - start the app in one process, - # - run the test in another process - # - assert both processes exit successfully + MODELS_DIR="${SCRIPT_PATH}/models" + # run tests for each individual example + cd "$MODELS_DIR"/german_credit/ + python -m unittest -v test.py + + cd "$MODELS_DIR"/german_credit_pandas + pip install sklearn-pandas + python -m unittest -v test.py + + cd "$MODELS_DIR"/income_prediction + pip install xgboost + python -m unittest -v test.py + + cd "$MODELS_DIR"/iris + pip install xgboost + python -m unittest -v test.py + + cd "$MODELS_DIR"/patient_readmission + python -m unittest -v test.py + + # Go back to root directory + cd "$SCRIPT_PATH" + + # TODO: see https://github.com/CognitiveScale/certifai/issues/4870 + # - h2o_dai_german_credit + # - h2o_dai_regression_auto_insurance + # - r-models } function testTutorials() { @@ -558,6 +579,12 @@ function main() { activateConda testMarkdownLinks ;; + models) + setGlobals + activateConda + installToolkit + testModels + ;; notebook) setGlobals activateConda diff --git a/models/base_test.py b/models/base_test.py new file mode 100644 index 00000000..b9a89fcc --- /dev/null +++ b/models/base_test.py @@ -0,0 +1,140 @@ +import time +import contextlib +import subprocess +import tempfile +import unittest +from typing import Optional, Sequence + + +def capture_err_and_out(stderr, stdout): + if stderr is not None: + print("\n---------------------- (main) stderr: ----------------------") + print(stderr, end="") + print("\n------------------------------------------------------------\n") + if stdout is not None: + print("\n---------------------- (main) stdout: ----------------------") + print(stdout, end="") + print("\n------------------------------------------------------------") + + +def capture_output(stdout, stderr, limit=100): + count = 0 + print("\n---------------------- (service) stdout: ----------------------\n") + with open(stdout, 'r+') as f: + for line in f: + if count > limit: + break + print(line, end="") + limit += 1 + print("\n------------------------------------------------------------\n") + print() + + count = 0 + print("\n---------------------- (service) stderr: ----------------------\n") + with open(stderr, 'r+') as f: + for line in f: + if count > limit: + break + print(line, end="") + limit += 1 + print("\n------------------------------------------------------------\n") + + +class ModelTest(unittest.TestCase): + """Base class for testing Certifai Prediction Service Examples. Each example will typically include multiple + scenarios where: + + 1) a flask server is launched as a background process (via the Certifai Model SDK) + 2) a Certifai Scan is launched (or just a plain Python script) is launched in the foreground that calls (1) + + Each process that is launched in the foreground, (2), is expected to complete with a 0 exit code. Each process + launched in the background (1) are expected to be run until explicitly killed. + + The following functions should cover scenarios that run plain Python Scripts:: + + run_standalone_python_script(python_script) + run_python_app_test(model_app, python_script) + + The following functions should cover scenarios that involve running a Certifai Scan:: + + run_model_and_definition_test('app_dtree.py', 'my-definition.yaml') + run_model_and_scan('app_dtree.py', 'my-definition.yaml') + run_model_and_explain('app_dtree.py', 'my-definition.yaml', fast=True) + """ + + SLEEP_TIME = 5 # 5 seconds + TERMINATION_TIME = 5 # 5 seconds + DEFAULT_TEST_TIMEOUT = 2 * 60 # 2 minutes + DEFAULT_SCAN_TIMEOUT = 60 * 60 # 1 hour + bg = None + + def _run_in_foreground(self, command: Sequence[str], timeout: Optional[int] = None): + try: + # Run process and wait until it completes + process = subprocess.run(command, shell=False, capture_output=True, timeout=timeout, text=True) + process.check_returncode() + except subprocess.TimeoutExpired as te: + error = f"\nProcess did not finish within expected time (command={te.cmd}, timeout={te.timeout} seconds). Error: {str(te)}" + capture_err_and_out(te.stderr, te.stdout) + self.fail(error) + except subprocess.CalledProcessError as ce: + error = f"\nProcess finished with non-zero exit code (command={ce.cmd}, code={ce.returncode}). Error: {str(ce)}" + capture_err_and_out(ce.stderr, ce.stdout) + self.fail(error) + + @contextlib.contextmanager + def _run_in_background(self, command: Sequence[str]): + with tempfile.NamedTemporaryFile(mode='w+') as stdout, tempfile.NamedTemporaryFile(mode='w+') as stderr: + try: + p = subprocess.Popen(command, shell=False, stdout=stdout, stderr=stderr, stdin=subprocess.DEVNULL, + close_fds=True, text=True) + yield + except Exception: + # WARNING: Killing the subprocess may not kill any workers spawned by the process (e.g. gunicorn!) + p.kill() + p.wait() + capture_output(stdout.name, stderr.name) + raise + finally: + # WARNING: Killing the subprocess may not kill any workers spawned by the process (e.g. gunicorn!) + p.kill() + p.wait() + + # Outward facing API + + def run_python_app_test(self, model_app: str, test_script: str): + # Run a Python Model (flask app) in the background, give it a couple seconds to start up, before running test + with self._run_in_background(["python", model_app]): + time.sleep(self.SLEEP_TIME) + self._run_in_foreground(["python", test_script], timeout=self.DEFAULT_TEST_TIMEOUT) + + def run_standalone_python_script(self, script: str): + # Run the standalone test script + self._run_in_foreground(["python", script], timeout=self.DEFAULT_SCAN_TIMEOUT) + + def run_model_and_definition_test(self, model_app: str, definition: str): + # Run a Python Model (flask app) in the background, give it a couple seconds to start up, before running test + with self._run_in_background(["python", model_app]): + time.sleep(self.SLEEP_TIME) + self._run_in_foreground(["certifai", "definition-test", "-f", definition], timeout=self.DEFAULT_SCAN_TIMEOUT) + + def run_model_and_scan(self, model_app: str, definition: str): + # Run a Python Model (flask app) in the background, give it a couple seconds to start up, before running test + with self._run_in_background(f"python {model_app}".split()): + time.sleep(self.SLEEP_TIME) + self._run_in_foreground(["certifai", "scan", "-f", definition], timeout=self.DEFAULT_SCAN_TIMEOUT) + + def run_model_and_explain(self, model_app: str, definition: str, fast: bool = False): + # Run a Python Model (flask app) in the background, give it a couple seconds to start up. + with self._run_in_background(f"python {model_app}".split()): + time.sleep(self.SLEEP_TIME) + if fast: + # Run the precalculate step prior to the fast explain + pre_calc_command = ["certifai", "explain", "-f", definition, "--precalculate"] + self._run_in_foreground(pre_calc_command, timeout=self.DEFAULT_SCAN_TIMEOUT) + command = ["certifai", "explain", "-f", definition, "--fast"] + else: + command = ["certifai", "explain", "-f", definition] + # Run the explanation scan + self._run_in_foreground(command, timeout=self.DEFAULT_SCAN_TIMEOUT) + diff --git a/models/german_credit/app_mlp_soft_scoring.py b/models/german_credit/app_mlp_soft_scoring.py index 29395a72..e2444cdc 100644 --- a/models/german_credit/app_mlp_soft_scoring.py +++ b/models/german_credit/app_mlp_soft_scoring.py @@ -24,4 +24,5 @@ endpoint_url='/german_credit_mlp/predict') # to start production ready gunicorn server use `production=True` -app.run(production=True) +#app.run(production=True) +app.run() diff --git a/models/german_credit/composed_app.py b/models/german_credit/composed_app.py index b9eb15c3..febced39 100644 --- a/models/german_credit/composed_app.py +++ b/models/german_credit/composed_app.py @@ -40,4 +40,5 @@ composed_app.add_wrapped_model('/german_credit_logit', logit_app) composed_app.add_wrapped_model('/german_credit_svm', svm_app) composed_app.add_wrapped_model('/german_credit_mlp', mlp_app) +#composed_app.run(production=True) composed_app.run() diff --git a/models/german_credit/test.py b/models/german_credit/test.py new file mode 100644 index 00000000..fb2d1c77 --- /dev/null +++ b/models/german_credit/test.py @@ -0,0 +1,35 @@ +import sys +import os +import unittest + +# Add the file to classpath for relative import +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +from base_test import ModelTest + + +class GermanCreditTest(ModelTest): + + def setUp(self): + self.run_standalone_python_script("train.py") + + def test_single_app(self): + self.run_python_app_test("app_dtree.py", "app_test.py") + + def test_composed_app(self): + self.run_python_app_test("composed_app.py", "composed_app_test.py") + + def test_trust_scan(self): + self.run_model_and_scan("composed_app.py", "german_credit_scanner_definition.yaml") + + def test_explain_scan(self): + self.run_standalone_python_script("explain.py") + + def test_soft_scoring_app(self): + self.run_python_app_test("app_mlp_soft_scoring.py", "app_mlp_soft_scoring_test.py") + + def test_soft_scoring_scan(self): + self.run_model_and_scan("app_mlp_soft_scoring.py", "german_credit_shap_explanation_scanner_definition.yaml") + + +if __name__ == '__main__': + unittest.main() diff --git a/models/german_credit_pandas/app_mlp_soft_scoring.py b/models/german_credit_pandas/app_mlp_soft_scoring.py index 3138b759..7219df0b 100644 --- a/models/german_credit_pandas/app_mlp_soft_scoring.py +++ b/models/german_credit_pandas/app_mlp_soft_scoring.py @@ -25,4 +25,5 @@ endpoint_url='/german_credit_mlp/predict', pandas_kwargs={'columns': columns}) # to start production ready gunicorn server use `production=True` -app.run(production=True) +#app.run(production=True) +app.run() diff --git a/models/german_credit_pandas/test.py b/models/german_credit_pandas/test.py new file mode 100644 index 00000000..e06fd4f0 --- /dev/null +++ b/models/german_credit_pandas/test.py @@ -0,0 +1,32 @@ +import sys +import os +import unittest + +# Add the file to classpath for relative import +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +from base_test import ModelTest + + +class GermanCreditPandasTest(ModelTest): + + def setUp(self): + self.run_standalone_python_script("train_pandas.py") + + def test_single_app(self): + self.run_python_app_test("app_dtree.py", "app_test.py") + + def test_composed_app(self): + self.run_python_app_test("composed_app.py", "composed_app_test.py") + + def test_trust_scan(self): + self.run_model_and_scan("composed_app.py", "german_credit_scanner_definition.yaml") + + def test_soft_scoring_app(self): + self.run_python_app_test("app_mlp_soft_scoring.py", "app_mlp_soft_scoring_test.py") + + def test_soft_scoring_scan(self): + self.run_model_and_scan("app_mlp_soft_scoring.py", "german_credit_shap_explanation_scanner_definition.yaml") + + +if __name__ == '__main__': + unittest.main() diff --git a/models/income_prediction/test.py b/models/income_prediction/test.py new file mode 100644 index 00000000..70a6411c --- /dev/null +++ b/models/income_prediction/test.py @@ -0,0 +1,23 @@ +import sys +import os +import unittest + +# Add the file to classpath for relative import +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +from base_test import ModelTest + + +class IncomePredictionTest(ModelTest): + + def setUp(self): + self.run_standalone_python_script("train.py") + + def test_single_app(self): + self.run_python_app_test("app_xgb.py", "app_test.py") + + def test_explain_scan(self): + self.run_model_and_scan("app_xgb.py", "income_explain_definition.yaml") + + +if __name__ == '__main__': + unittest.main() diff --git a/models/iris/test.py b/models/iris/test.py new file mode 100644 index 00000000..c0096c0b --- /dev/null +++ b/models/iris/test.py @@ -0,0 +1,28 @@ +import sys +import os +import unittest + +# Add the file to classpath for relative import +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +from base_test import ModelTest + + +class IrisTest(ModelTest): + + def setUp(self): + # Train all models at the start of each test - possibly excessive, but gives full flexibility to run + # a single test in isolation + self.run_standalone_python_script("train.py") + + def test_single_app(self): + self.run_python_app_test("app_svm.py", "app_test.py") + + def test_single_app_xgboost(self): + self.run_python_app_test("app_xgb.py", "app_test.py") + + def test_scan(self): + self.run_model_and_scan("app_svm.py", "iris_scanner_definition.yaml") + + +if __name__ == '__main__': + unittest.main() diff --git a/models/patient_readmission/app.py b/models/patient_readmission/app.py index 2fc314de..29b667d8 100644 --- a/models/patient_readmission/app.py +++ b/models/patient_readmission/app.py @@ -14,4 +14,5 @@ encoder = saved.get('encoder', None) app = SimpleModelWrapper(model=model, encoder=encoder) -app.run(production=True) +#app.run(production=True) +app.run() diff --git a/models/patient_readmission/test.py b/models/patient_readmission/test.py new file mode 100644 index 00000000..970650d4 --- /dev/null +++ b/models/patient_readmission/test.py @@ -0,0 +1,29 @@ +import sys +import os +import unittest + +# Add the file to classpath for relative import +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +from base_test import ModelTest + + +class PatientReadmissionTest(ModelTest): + + def setUp(self): + self.run_standalone_python_script("train.py") + + def test_single_app(self): + self.run_python_app_test("app.py", "app_test.py") + + def test_definition_test(self): + self.run_model_and_definition_test("app.py", "explain_def.yml") + + def test_fast_explain(self): + self.run_model_and_explain("app.py", "explain_def.yml", fast=True) + + def test_traditional_explain(self): + self.run_model_and_explain("app.py", "explain_def.yml", fast=False) + + +if __name__ == '__main__': + unittest.main() From ec6293ad7472ceca8ea612562fe8ce1f363cdc53 Mon Sep 17 00:00:00 2001 From: Luis Aguirre Date: Thu, 15 Jun 2023 16:54:35 -0500 Subject: [PATCH 02/17] Update model README and notebook README(s) to for currency - add missing exampels, fix broken link, remove beta flag warning --- models/README.md | 2 ++ models/patient_readmission/README.md | 6 ------ notebooks/target_encoded/dataset_generation/README.md | 2 +- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/models/README.md b/models/README.md index 6bf68a3f..80de67f3 100644 --- a/models/README.md +++ b/models/README.md @@ -19,5 +19,7 @@ for detailed information about Cortex Certifai. | [german_credit_pandas](./german_credit_pandas) | Illustrates using the Certifai Model SDK to run a models in a service, where the models expect as input a [pandas DataFrame](https://pandas.pydata.org/) instead of a [numpy array](https://numpy.org/). | Binary classification | python | sklearn | | [h2o_dai_auto_insurance](./h2o_dai_regression_auto_insurance) | Illustrates using the Certifai Model SDK to create a gunicorn prediction service from a regression H2O MOJO model, and scan it for trust scores. | Regression | python | H2O MOJO | | [h2o_dai_german_credit](./h2o_dai_german_credit) | Illustrates using the Certifai Model SDK to create a development or gunicorn prediction service from a binary classification H2O MOJO, and scan it for trust scores or for explanations. | Binary classification | python | H2O MOJO | +| [income_prediction](./income_prediction) | Illustrates using the Certifai Model SDK to run a single binary-classification XGBoost model in a service, using a customized model wrapper. | Binary classification | python | sklearn
xgboost | | [iris](./iris) | Illustrates using the Certifai Model SDK to run a single multi-class model in a service, using a customized model wrapper. | Multi-class classification | python | sklearn
xgboost | +| [patient_readmission](./patient_readmission) | Illustrates using the Certifai Model SDK to run a single binary-classification model in a service, using a customized model wrapper, for creation of fast (bulk) explanations. | Binary classification | python | sklearn
xgboost | | [r-models](https://github.com/CognitiveScale/cortex-certifai-examples/tree/master/models/r-models) | Illustrates running a R model in a service using plumber. | Binary classification | R | randomForest | diff --git a/models/patient_readmission/README.md b/models/patient_readmission/README.md index f10a0cee..07d80bf7 100644 --- a/models/patient_readmission/README.md +++ b/models/patient_readmission/README.md @@ -11,12 +11,6 @@ Specifically, it shows how to use the Certifai toolkit to: * generate explanations for the model's predictions using the CLI * set up fast-explanation, for generating large numbers of explanations at scale -*Note*: Fast explanation is currently a beta feature. To enable it you will need to -edit your `~/.certifai/certifai_conf.ini` file to add: -``` -[scanner] -support_fast_explanations_beta = True -``` ## Wrap a single model as a service 1. Make sure you have activated your Certifai toolkit environment: diff --git a/notebooks/target_encoded/dataset_generation/README.md b/notebooks/target_encoded/dataset_generation/README.md index 6301ae1c..520abfa6 100644 --- a/notebooks/target_encoded/dataset_generation/README.md +++ b/notebooks/target_encoded/dataset_generation/README.md @@ -3,7 +3,7 @@ ## Notebooks -- `german_credit_multiclass_dataset_generation`: Creates a neural network model with soft-scoring to transform binary outcome-labels (loan granted/loan denied (1/2) ) [German-Credit-Dataset](https://archive.ics.uci.edu/ml/datasets/Statlog+%28German+Credit+Data%29) into multiclass problem with three outcome-labels (loan granted/loan denied/further inspection (1/2/3) ) and saves the dataset as csv +- `german_credit_multiclass_dataset_generation`: Creates a neural network model with soft-scoring to transform binary outcome-labels (loan granted/loan denied (1/2) ) [German-Credit-Dataset](https://archive.ics.uci.edu/dataset/144/statlog+german+credit+data) into multiclass problem with three outcome-labels (loan granted/loan denied/further inspection (1/2/3) ) and saves the dataset as csv - `german_credit_multiclass_dataset_encoding`: Encodes the above generated dataset into target encoded and one-hot encoded feature columns and dumps the generated mappings to disk as json From c9742b2950f1813b99a8a585a209ba83d9de1862 Mon Sep 17 00:00:00 2001 From: Luis Aguirre Date: Thu, 15 Jun 2023 17:08:07 -0500 Subject: [PATCH 03/17] Replace scikit_0.23 with python in Scan Manager Config and add current base image - not permanent --- scan-manager/docs/README.md | 2 +- .../docs/setup_artifacts/deployment/config.yml | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/scan-manager/docs/README.md b/scan-manager/docs/README.md index 7a98eeba..8aa6ebe0 100644 --- a/scan-manager/docs/README.md +++ b/scan-manager/docs/README.md @@ -49,7 +49,7 @@ artifacts include deployment templates and .yaml config file(s); and are provide The `deployment` folder contains: - Deployment templates: `.yaml` templates that provide the configuration templates for deploying each of the specified model types on Kubernetes: - - `scikit_0.23`: Uses a `python3.8` base image with `scikit-learn v0.23` pre-installed. + - `python`: Uses a `python3.8` base image with `scikit-learn` pre-installed. - `h2o_mojo`: Uses a `python3.8` base image with `daimojo-2.4.8-cp36` whl pre-installed. - `r_model`: Uses `rocker/r-ver:latest` base image with `r-cran-randomforest` pre-installed. - `hosted_model`: Uses a `python3.8` base image for wrapping an already hosted model service. diff --git a/scan-manager/docs/setup_artifacts/deployment/config.yml b/scan-manager/docs/setup_artifacts/deployment/config.yml index f4ed1aa3..a1eeb0a5 100644 --- a/scan-manager/docs/setup_artifacts/deployment/config.yml +++ b/scan-manager/docs/setup_artifacts/deployment/config.yml @@ -1,11 +1,13 @@ -scikit_0.23: - deployment: scikit_0.23_deployment.yml +python: + deployment: python_deployment.yml default_base_image: name: python38_scikit - value: c12e/cortex-certifai-model-scikit:v4-1.3.16-25-g527dfe93 + value: c12e/cortex-certifai-model-scikit:base-py38-1.3.17-63-g2ecf19f0-d357535 available_base_images: - name: python38_scikit - value: c12e/cortex-certifai-model-scikit:v4-1.3.16-25-g527dfe93 + value: c12e/cortex-certifai-model-scikit:base-py38-1.3.17-63-g2ecf19f0-d357535 + - name: python39_scikit + value: c12e/cortex-certifai-model-scikit:base-py39-1.3.17-63-g2ecf19f0-d357535 h2o_mojo: deployment: h2o_mojo_deployment.yml default_base_image: From c92c0b8867b9f6abd7fff16dd944678a3bd51c59 Mon Sep 17 00:00:00 2001 From: Luis Aguirre Date: Tue, 20 Jun 2023 09:21:02 -0500 Subject: [PATCH 04/17] Fix broken link in azureml notebook --- models/containerized_model/README.md | 2 +- .../german_credit_azure_ml_demo.ipynb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/models/containerized_model/README.md b/models/containerized_model/README.md index 0d11219f..510857ea 100644 --- a/models/containerized_model/README.md +++ b/models/containerized_model/README.md @@ -242,7 +242,7 @@ Add respective cloud storage credentials and `MODEL_PATH` to `generated-containe ### Step 5 - Add extra-dependencies (optional) The dependencies work out of the box with a standard scikit-learn model, -providing the model was trained with version 0.23.2 of scikit-learn. If +providing the model was trained with a version `1.0.2` of scikit-learn. If you are using a different version, you should update `generated-container-model/requirements.txt`. diff --git a/notebooks/azureml_model_headers_demo/german_credit_azure_ml_demo.ipynb b/notebooks/azureml_model_headers_demo/german_credit_azure_ml_demo.ipynb index b9d21a21..dbb88867 100644 --- a/notebooks/azureml_model_headers_demo/german_credit_azure_ml_demo.ipynb +++ b/notebooks/azureml_model_headers_demo/german_credit_azure_ml_demo.ipynb @@ -21,7 +21,7 @@ "source": [ "## CONTENTS\n", "\n", - "1. In this tutorial we will create sklearn models to classify [german credit loan risk](https://archive.ics.uci.edu/ml/datasets/Statlog+%28German+Credit+Data%29) (predict whether loan will be granted or not)\n", + "1. In this tutorial we will create sklearn models to classify [german credit loan risk](https://archive.ics.uci.edu/dataset/144/statlog+german+credit+data) (predict whether loan will be granted or not)\n", "\n", "2. Register model and deploy as webservice in ACI (AZURE CONTAINER INSTANCE) with authentication\n", "\n", @@ -45,7 +45,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Training Scikit-learn models on [UCI German credit data](https://archive.ics.uci.edu/ml/datasets/Statlog+%28German+Credit+Data%29)" + "## Training Scikit-learn models on [UCI German credit data](https://archive.ics.uci.edu/dataset/144/statlog+german+credit+data)" ] }, { From 427818f5dcf87d83ce5b2962a4622557ea34ba6b Mon Sep 17 00:00:00 2001 From: Luis Aguirre Date: Tue, 20 Jun 2023 15:47:52 -0500 Subject: [PATCH 05/17] Bump scan duration time to 2 hours for Fast Exp. Precalculate that timed out --- models/base_test.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/models/base_test.py b/models/base_test.py index b9a89fcc..80d80a56 100644 --- a/models/base_test.py +++ b/models/base_test.py @@ -61,11 +61,10 @@ class ModelTest(unittest.TestCase): run_model_and_scan('app_dtree.py', 'my-definition.yaml') run_model_and_explain('app_dtree.py', 'my-definition.yaml', fast=True) """ - SLEEP_TIME = 5 # 5 seconds TERMINATION_TIME = 5 # 5 seconds - DEFAULT_TEST_TIMEOUT = 2 * 60 # 2 minutes - DEFAULT_SCAN_TIMEOUT = 60 * 60 # 1 hour + DEFAULT_TEST_TIMEOUT = 2 * 60 # 2 minutes + DEFAULT_SCAN_TIMEOUT = 60 * 60 * 2 # 2 hours bg = None def _run_in_foreground(self, command: Sequence[str], timeout: Optional[int] = None): From 7a58d8c13a8dfa19c9dadf559882edff6332f05e Mon Sep 17 00:00:00 2001 From: Luis Aguirre Date: Tue, 20 Jun 2023 15:59:11 -0500 Subject: [PATCH 06/17] Restructure run_test.sh script for Containerized Model Examples to add Help Message and skip H2O. Required bumping xgboost --- build.sh | 2 +- models/containerized_model/examples/README.md | 6 +- .../containerized_model/examples/run_test.sh | 317 +++++++++++------- .../templates/python/requirements.txt | 2 +- .../python_xgboost_dmatrix/requirements.txt | 2 +- 5 files changed, 207 insertions(+), 122 deletions(-) mode change 100644 => 100755 models/containerized_model/examples/run_test.sh diff --git a/build.sh b/build.sh index e6a18ef0..18588bf2 100755 --- a/build.sh +++ b/build.sh @@ -351,7 +351,7 @@ function testModels() { # Go back to root directory cd "$SCRIPT_PATH" - # TODO: see https://github.com/CognitiveScale/certifai/issues/4870 + # TODO: Run other examples (see https://github.com/CognitiveScale/certifai/issues/4870) # - h2o_dai_german_credit # - h2o_dai_regression_auto_insurance # - r-models diff --git a/models/containerized_model/examples/README.md b/models/containerized_model/examples/README.md index 4173c05f..86eee591 100644 --- a/models/containerized_model/examples/README.md +++ b/models/containerized_model/examples/README.md @@ -30,12 +30,12 @@ The following files must exist in this folder: * certifai_toolkit/ - certifai toolkit v1.3.6 or above The current conda environment has been setup with: -* python 3.6 (if 3.7 or 3.8, update PYTHON_VERSION in `run_test.sh`) +* python 3.8 (otherwise, update PYTHON_VERSION in `run_test.sh`) * pip install -U Jinja2 To train and test the models: * Certifai toolkit installed in the current conda environment -* conda install -c conda-forge xgboost==1.2.0 +* conda install -c conda-forge xgboost==1.7.2 To build/run the prediction services: Docker @@ -70,3 +70,5 @@ storage by changing the environment.yml. The tests exit on any error, printing out the prediction service log and deleting the running prediction service container. + +For more options, run: `sh run_test.sh -h` diff --git a/models/containerized_model/examples/run_test.sh b/models/containerized_model/examples/run_test.sh old mode 100644 new mode 100755 index 9e7be220..3425bcf2 --- a/models/containerized_model/examples/run_test.sh +++ b/models/containerized_model/examples/run_test.sh @@ -1,51 +1,68 @@ # -# Copyright (c) 2020. Cognitive Scale Inc. All rights reserved. +# Copyright (c) 2023. Cognitive Scale Inc. All rights reserved. # Licensed under CognitiveScale Example Code License https://github.com/CognitiveScale/cortex-certifai-examples/blob/master/LICENSE.md # #!/usr/bin/env bash set -e -target=${1:-local} +# - ask in Trusted AI chat if we still have an h2o license? -THIS_DIR="$( cd "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P )" -GEN_DIR="${THIS_DIR}/generated-container-model" -PYTHON_VERSION=${PYTHON_VERSION:-3.8} # or 3.7 -NAMESPACE=certifai-models +function set_globals() { + THIS_DIR="$( cd "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P )" + GEN_DIR="${THIS_DIR}/generated-container-model" + PYTHON_VERSION=${PYTHON_VERSION:-3.8} # or 3.7 + XGBOOST_VERSION="xgboost==1.7.2" + NAMESPACE=certifai-models + MODEL_DIR=${THIS_DIR}/../.. + RUN_H2O=${RUN_H2O-"false"} + MINIO="mc" + + # Default toolkit to './certifai_toolkit' + TOOLKIT_PATH="${TOOLKIT_PATH:-$THIS_DIR/certifai_toolkit}" + PACKAGES_DIR="${TOOLKIT_PATH}/packages" +} + +function check_minio_installed() { + if ! command -v $MINIO &> /dev/null + then + echo "'$MINIO' CLI could not be found! Install Minio Client: https://min.io/docs/minio/linux/reference/minio-mc.html" + exit 1 + fi +} function base_setup() { model_type=$1 image_name=$2 model_file=$3 echo "***Generating ${model_type}***" - rm -rf ${GEN_DIR} - sh ${THIS_DIR}/../generate.sh -i ${image_name}:latest -m ${model_type} -d ${GEN_DIR} + rm -rf "${GEN_DIR}" + sh "${THIS_DIR}/../generate.sh" -i "${image_name}":latest -m "${model_type}" -d "${GEN_DIR}" -t "$TOOLKIT_PATH" all_dir=${GEN_DIR}/packages/all - mkdir -p ${all_dir} - cp ${THIS_DIR}/certifai_toolkit/packages/all/cortex-certifai-common*.zip ${all_dir} - cp ${THIS_DIR}/certifai_toolkit/packages/all/cortex-certifai-model*.zip ${all_dir} + mkdir -p "${all_dir}" + cp "${PACKAGES_DIR}"/all/cortex-certifai-common*.zip "${all_dir}" + cp "${PACKAGES_DIR}"/all/cortex-certifai-model*.zip "${all_dir}" } function h2o_setup() { - base_setup $1 $2 pipeline.mojo - cp ${THIS_DIR}/license.txt ${GEN_DIR}/license/license.txt - cp ${THIS_DIR}/daimojo*linux_x86_64.whl ${GEN_DIR}/ext_packages/ + base_setup "$1" "$2" pipeline.mojo + cp "${THIS_DIR}"/license.txt "${GEN_DIR}"/license/license.txt + cp "${THIS_DIR}"/daimojo*linux_x86_64.whl "${GEN_DIR}"/ext_packages/ } -MODEL_DIR=${THIS_DIR}/../.. function train_models() { - (cd ${MODEL_DIR}/german_credit && python train.py) - cp ${MODEL_DIR}/german_credit/german_credit_dtree.pkl ${THIS_DIR}/sklearn_german_credit/model/model.pkl - (cd ${MODEL_DIR}/income_prediction && python train.py) - cp ${MODEL_DIR}/income_prediction/adult_income_xgb.pkl ${THIS_DIR}/xgboost_dmatrix_income/model/model.pkl - (cd ${MODEL_DIR}/iris && python train.py) - cp ${MODEL_DIR}/iris/iris_xgb.pkl ${THIS_DIR}/xgboost_iris/model/model.pkl + (cd "${MODEL_DIR}"/german_credit && python train.py) + cp "${MODEL_DIR}"/german_credit/models/german_credit_dtree.pkl "${THIS_DIR}"/sklearn_german_credit/model/model.pkl + (cd "${MODEL_DIR}"/income_prediction && python train.py) + cp "${MODEL_DIR}"/income_prediction/adult_income_xgb.pkl "${THIS_DIR}"/xgboost_dmatrix_income/model/model.pkl + (cd "${MODEL_DIR}"/iris && python train.py) + cp "${MODEL_DIR}"/iris/iris_xgb.pkl "${THIS_DIR}"/xgboost_iris/model/model.pkl } function python_setup() { - base_setup $1 $2 model.pkl + base_setup "$1" "$2" model.pkl } function wait_for() { @@ -54,19 +71,19 @@ function wait_for() { until [ $next_wait_time -eq 30 ] || $(command); do sleep $(( next_wait_time=next_wait_time+5 )) done - [ $next_wait_time -lt 30 ] + [ "$next_wait_time" -lt 30 ] } function end_prediction_service() { result=${1:-failed} - if [ ${result} = 'failed' ] + if [ "${result}" = 'failed' ] then echo "!!!TEST FAILED for ${name}!!!" fi echo "Removing running prediction service, if any" - if [ $target == "local" ]; then + if [ "$target" == "local" ]; then end_prediction_service_local - elif [ $target == "minikube" ]; then + elif [ "$target" == "minikube" ]; then end_prediction_service_minikube fi result=failed # set for next test, until it explicitly succeeds @@ -76,51 +93,56 @@ function end_prediction_service() { function end_prediction_service_local() { if [ -n "$container_id" ] then - if [ ${result} = 'failed' ] + if [ "${result}" = 'failed' ] then - docker logs ${container_id} + docker logs "${container_id}" fi - docker stop ${container_id} - docker rm -f ${container_id} + docker stop "${container_id}" + docker rm -f "${container_id}" fi unset container_id } function end_prediction_service_minikube() { - if [ ${result} = 'failed' ] + if [ "${result}" = 'failed' ] then - kubectl logs -l app=${resource_name} --namespace $NAMESPACE + kubectl logs -l app="${resource_name}" --namespace $NAMESPACE fi - kubectl delete service ${resource_name} --ignore-not-found --namespace $NAMESPACE - kubectl delete deployment ${resource_name} --ignore-not-found --namespace $NAMESPACE + kubectl delete service "${resource_name}" --ignore-not-found --namespace $NAMESPACE + kubectl delete deployment "${resource_name}" --ignore-not-found --namespace $NAMESPACE } function build() { image_name=$1 echo "***Building ${image_name}***" - sh ${GEN_DIR}/container_util.sh build + sh "${GEN_DIR}"/container_util.sh build } function minikube_setup() { eval $(minikube docker-env) # build images in shared registry # setup minio server on local port 9000 set +e - kubectl create namespace $NAMESPACE - kubectl apply -f ${THIS_DIR}/minikube/test-minio.yml - kubectl get svc test-minio 2>&1 > /dev/null - if [ "$?" -ne 0 ]; then + kubectl create namespace "$NAMESPACE" + kubectl apply -f "${THIS_DIR}"/minikube/test-minio.yml + sleep 5 # wait for service to come up + #kubectl get svc test-minio 2>&1 > /dev/null + #if [ "$?" -ne 0 ]; then + if kubectl get svc test-minio >> /dev/null; then kubectl expose deployment test-minio --type=LoadBalancer --port 9000 --target-port 9000 fi - echo "***Setting up minio command line (mc) and certifai bucket***" - mc config host list minikube 2>&1 > /dev/null + echo "***Setting up minio command line ($MINIO) and certifai bucket***" + $MINIO config host list minikube 2>&1 > /dev/null if [ "$?" -ne 0 ]; then - mc config host add minikube http://127.0.0.1:9000 minio minio123 + $MINIO config host add minikube http://127.0.0.1:9000 minio minio123 fi - mc ls minikube/certifai 2>&1 > /dev/null + $MINIO ls minikube/certifai 2>&1 > /dev/null if [ "$?" -ne 0 ]; then - mc mb minikube/certifai + $MINIO mb minikube/certifai + fi + + if [[ "$RUN_H2O" == "true" ]]; then + $MINIO cp "${THIS_DIR}"/license.txt minikube/certifai/files/license.txt fi - mc cp ${THIS_DIR}/license.txt minikube/certifai/files/license.txt set -e } @@ -128,15 +150,15 @@ function data_setup() { local_name=$1 model_file=$2 model_use_case_id=$3 - if [ $target == "local" ]; then + if [ "$target" == "local" ]; then return 0 fi echo "***Setting up prediction service data in minio for ${local_name}***" - mc config host add minikube http://127.0.0.1:9000 minio minio123 + $MINIO config host add minikube http://127.0.0.1:9000 minio minio123 model_data_path="minikube/certifai/${model_use_case_id}/models" local_path="${THIS_DIR}/${local_name}/model" - mc cp ${local_path}/${model_file} ${model_data_path}/${model_file} - mc cp ${local_path}/metadata.yml ${model_data_path}/metadata.yml + $MINIO cp "${local_path}"/"${model_file}" "${model_data_path}"/"${model_file}" + $MINIO cp "${local_path}"/metadata.yml "${model_data_path}"/metadata.yml } function run_and_test() { @@ -144,89 +166,150 @@ function run_and_test() { model_file=$2 image_name=$3 resource_name=$4 - if [ $target == "local" ]; then + if [ "$target" == "local" ]; then echo "***Running ${local_name}***" - container_id=$(docker run -d -p 8551:8551 -v ${THIS_DIR}/${local_name}/model:/tmp/model \ - -e MODEL_PATH=/tmp/model/${model_file} \ - -e METADATA_PATH=/tmp/model/metadata.yml -t ${image_name}) - elif [ $target == "minikube" ]; then + container_id=$(docker run -d -p 8551:8551 -v "${THIS_DIR}"/"${local_name}"/model:/tmp/model \ + -e MODEL_PATH=/tmp/model/"${model_file}" \ + -e METADATA_PATH=/tmp/model/metadata.yml -t "${image_name}") + elif [ "$target" == "minikube" ]; then echo "***Generating deployment definition for ${resource_name}***" - sh ${GEN_DIR}/config_deploy.sh -c ${THIS_DIR}/${local_name}/model/deployment_config.yml - kubectl apply -f ${GEN_DIR}/deployment.yml - kubectl wait --for=condition=ready --timeout=300s pod -l app=${resource_name} -n ${NAMESPACE} + sh "${GEN_DIR}"/config_deploy.sh -c "${THIS_DIR}"/"${local_name}"/model/deployment_config.yml + kubectl apply -f "${GEN_DIR}"/deployment.yml + kubectl wait --for=condition=ready --timeout=300s pod -l app="${resource_name}" -n ${NAMESPACE} # we need to delete the created service so expose can create it - kubectl delete svc ${resource_name} --ignore-not-found --namespace certifai-models - kubectl expose deployment ${resource_name} --type=LoadBalancer \ + kubectl delete svc "${resource_name}" --ignore-not-found --namespace certifai-models + kubectl expose deployment "${resource_name}" --type=LoadBalancer \ --port 8551 --target-port 8551 --namespace certifai-models fi # Wait until health endpoint is available wait_for 'curl -X GET http://127.0.0.1:8551/health' echo "***Testing ${local_name}***" - certifai scan -f ${THIS_DIR}/${local_name}/explain_def.yml + certifai scan -f "${THIS_DIR}"/"${local_name}"/explain_def.yml echo "***Successfully tested ${local_name}***" end_prediction_service succeeded } +function install_toolkit() { + # Install the toolkit, if its not already installed + if ! command -v certifai &> /dev/null + then + echo "Installing Certifai Toolkit!" + pip install "${THIS_DIR}"/certifai_toolkit/packages/all/* + pip install "${THIS_DIR}"/certifai_toolkit/packages/python"${PYTHON_VERSION}"/* + fi +} + + +function run_sklearn_model() { + # Predict service for Sklearn + python_setup python sklearn_predict + build sklearn_predict + data_setup sklearn_german_credit model.pkl test_german_credit + run_and_test sklearn_german_credit model.pkl sklearn_predict test-german-credit-dtree +} + +function run_xgboost_iris_model() { + # Predict service for XGBClassifier or XGBRegressor + python_setup python xgboost_predict + # Add xgboost to requirements + echo "\n$XGBOOST_VERSION\n" >> "${GEN_DIR}"/requirements.txt + build xgboost_predict + data_setup xgboost_iris model.pkl test_iris + run_and_test xgboost_iris model.pkl xgboost_predict test-iris-xgb-iris +} + +function run_xgboost_income_prediction() { + # Predict service for xgboost using DMatrix + python_setup python_xgboost_dmatrix xgboost_dmatrix_predict + build xgboost_dmatrix_predict + data_setup xgboost_dmatrix_income model.pkl test_income + run_and_test xgboost_dmatrix_income model.pkl xgboost_dmatrix_predict test-income-xgboost +} + +function run_h2o_models() { + # Predict service for H2O MOJO + h2o_setup h2o_mojo h2o_mojo_predict + build h2o_mojo_predict + for name in auto_insurance german_credit iris + do + local_name="h2o_${name}" + muc_id="test_${name}" + model_id="dai-mojo" + data_setup ${local_name} pipeline.mojo ${muc_id} + name_dashed=$(echo ${muc_id}-${model_id} | tr '_' '-') + run_and_test "${local_name}" pipeline.mojo h2o_mojo_predict "${name_dashed}" + done +} -# Install the toolkit, if its not already installed -if ! command -v certifai &> /dev/null -then - pip install ${THIS_DIR}/certifai_toolkit/packages/all/* - pip install ${THIS_DIR}/certifai_toolkit/packages/python${PYTHON_VERSION}/* -fi + +function printUsage() { + local prog_name + prog_name="$(basename "$0")" + + local usage + usage="$prog_name [-h|--help] [local | minikube] + +Test Containerized Model examples using the Certifai Model SDK. + +Options: + + local - Run local prediction service tests + + minikube - Run minikube prediction service tests. Requires Minikube to already be running. + +Environment Variables: + + RUN_H2O - if 'true', then h2o examples will be tested and the 'license.txt' must contain a valid h2o license. + Defaults to 'false'. + + TOOLKIT_PATH - The path to the (unzipped) Certifai Toolkit, defaults to: ./certifai_toolkit + +Examples: + + TOOLKIT_PATH=./toolkit ./run_test.sh local + + TOOLKIT_PATH=./toolkit ./run_test.sh minikube +" + echo "$usage" +} # # MAIN EXECUTION STARTS HERE # -if [ $target == "local" ]; then - echo "Running local prediction service tests" -elif [ $target == "minikube" ]; then - echo "Running minikube prediction service tests" - minikube_setup -else - echo "Invalid target environment" - exit 1 -fi - -set -exv -trap end_prediction_service EXIT - -# Predict service for H2O MOJO -h2o_setup h2o_mojo h2o_mojo_predict -build h2o_mojo_predict -for name in auto_insurance german_credit iris -do - local_name="h2o_${name}" - muc_id="test_${name}" - model_id="dai-mojo" - data_setup ${local_name} pipeline.mojo ${muc_id} - name_dashed=$(echo ${muc_id}-${model_id} | tr '_' '-') - run_and_test ${local_name} pipeline.mojo h2o_mojo_predict "${name_dashed}" -done - -train_models # For the non-H2O models -# -# Predict service for Sklearn -python_setup python sklearn_predict -build sklearn_predict -data_setup sklearn_german_credit model.pkl test_german_credit -run_and_test sklearn_german_credit model.pkl sklearn_predict test-german-credit-dtree - -# Predict service for XGBClassifier or XGBRegressor -python_setup python xgboost_predict -# Add xgboost to requirements -echo "\nxgboost==1.2.0\n" >> ${GEN_DIR}/requirements.txt -build xgboost_predict -data_setup xgboost_iris model.pkl test_iris -run_and_test xgboost_iris model.pkl xgboost_predict test-iris-xgb-iris -# -# -# Predict service for xgboost using DMatrix -python_setup python_xgboost_dmatrix xgboost_dmatrix_predict -build xgboost_dmatrix_predict -data_setup xgboost_dmatrix_income model.pkl test_income -run_and_test xgboost_dmatrix_income model.pkl xgboost_dmatrix_predict test-income-xgboost - -echo "***All tests completed successfully***" -trap - EXIT +function main() { + target=${1:-local} + set -exv + set_globals + install_toolkit + check_minio_installed + if [ "$target" == "local" ]; then + echo "Running local prediction service tests" + elif [ "$target" == "minikube" ]; then + echo "Running minikube prediction service tests" + minikube_setup + elif [ "$target" == "-h" ] || [ "$target" == "--help" ]; then + printUsage + exit 0 + else + echo "Invalid target environment" + printUsage + exit 1 + fi + + # catch any EXIT signals & clean prediction services + trap end_prediction_service EXIT + if [[ "$RUN_H2O" == "true" ]]; then + run_h2o_models + fi + # Train the non-H2O models + train_models + # Run the non-H2O models + run_sklearn_model + run_xgboost_iris_model + run_xgboost_income_prediction + echo "***All tests completed successfully***" + trap - EXIT +} + +main "$@" diff --git a/models/containerized_model/templates/python/requirements.txt b/models/containerized_model/templates/python/requirements.txt index e6153084..a66d672b 100644 --- a/models/containerized_model/templates/python/requirements.txt +++ b/models/containerized_model/templates/python/requirements.txt @@ -1,4 +1,4 @@ # add python pip install dependencies below pyyaml # required by prediction service - do not remove scikit-learn==1.0.2 -#xgboost==1.2.0 # uncomment if using xgboost and pin to same version as model +#xgboost==1.7.2 # uncomment if using xgboost and pin to same version as model diff --git a/models/containerized_model/templates/python_xgboost_dmatrix/requirements.txt b/models/containerized_model/templates/python_xgboost_dmatrix/requirements.txt index 1899193f..3a1724a7 100644 --- a/models/containerized_model/templates/python_xgboost_dmatrix/requirements.txt +++ b/models/containerized_model/templates/python_xgboost_dmatrix/requirements.txt @@ -1,3 +1,3 @@ # add python pip install dependencies below pyyaml # required by prediction service - do not remove -xgboost==1.2.0 # pin to match the environment in which the model was pickled +xgboost==1.7.2 # pin to match the environment in which the model was pickled From 46974230cc62e6885bc29f48ec93c0025acd414c Mon Sep 17 00:00:00 2001 From: Luis Aguirre Date: Tue, 20 Jun 2023 16:02:45 -0500 Subject: [PATCH 07/17] Run base tests for containerized model examples in pipeline --- build.sh | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/build.sh b/build.sh index 18588bf2..90b1916f 100755 --- a/build.sh +++ b/build.sh @@ -16,6 +16,7 @@ function setGlobals() { TOOLKIT_PATH="${ARTIFACTS_DIR}/certifai_toolkit.zip" TOOLKIT_WORK_DIR="${ARTIFACTS_DIR}/toolkit" PACKAGES_DIR="${TOOLKIT_WORK_DIR}/packages" + CONTAINERIZED_EXAMPLES_DIR="${SCRIPT_PATH}/models/containerized_model/examples" TEMPLATES_DIR="${SCRIPT_PATH}/models/containerized_model" BASE_IMAGES_DIR="${SCRIPT_PATH}/models/containerized_model/base_images" NOTEBOOK_DIR="${SCRIPT_PATH}/notebooks" @@ -292,9 +293,10 @@ function buildPredictionServiceBaseImages() { echo "{\"python38\": \"${py38_image}\", \"python39\": \"${py39_image}\"}" > "${BASE_IMAGE_BUILD_REPORT_JSON}" } -function test() { +function testAll() { testMarkdownLinks testModels + testContainerizedModels testNotebooks testTutorials } @@ -357,6 +359,20 @@ function testModels() { # - r-models } +function testContainerizedModels() { + # run base of set of containerized model examples locally (with docker) + cd "$CONTAINERIZED_EXAMPLES_DIR" + TOOLKIT_PATH="$TOOLKIT_WORK_DIR" ./run_test.sh "local" + + # TODO: Add 'RUN_H2O=true' to test other examples (see https://github.com/CognitiveScale/certifai/issues/4870) + # - h2o_dai_german_credit + # - h2o_dai_regression_auto_insurance + # - r-models + + # Go back to root directory + cd "$SCRIPT_PATH" +} + function testTutorials() { cd "${TUTORIALS_DIR}" _installAutomatedDeps @@ -543,7 +559,7 @@ function main() { setGlobals activateConda installToolkit - test + testAll rm -rf "${TOOLKIT_WORK_DIR}" ;; docker) @@ -584,6 +600,7 @@ function main() { activateConda installToolkit testModels + testContainerizedModels ;; notebook) setGlobals From f56f089d56c48629b918f6d024e64bf733fb6497 Mon Sep 17 00:00:00 2001 From: Luis Aguirre Date: Tue, 20 Jun 2023 16:44:28 -0500 Subject: [PATCH 08/17] Renamed scikit_0.23_deployment.yml to python_deployment.yml --- .../{scikit_0.23_deployment.yml => python_deployment.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scan-manager/docs/setup_artifacts/deployment/{scikit_0.23_deployment.yml => python_deployment.yml} (100%) diff --git a/scan-manager/docs/setup_artifacts/deployment/scikit_0.23_deployment.yml b/scan-manager/docs/setup_artifacts/deployment/python_deployment.yml similarity index 100% rename from scan-manager/docs/setup_artifacts/deployment/scikit_0.23_deployment.yml rename to scan-manager/docs/setup_artifacts/deployment/python_deployment.yml From 937662325bc85ccb40ffdbcd2576f6efa473868e Mon Sep 17 00:00:00 2001 From: Luis Aguirre Date: Wed, 21 Jun 2023 08:12:23 -0500 Subject: [PATCH 09/17] Update precalculate timeout to be 3 hours and reduce scan timeout to 1 hour --- models/base_test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/models/base_test.py b/models/base_test.py index 80d80a56..8afcfa1f 100644 --- a/models/base_test.py +++ b/models/base_test.py @@ -63,8 +63,9 @@ class ModelTest(unittest.TestCase): """ SLEEP_TIME = 5 # 5 seconds TERMINATION_TIME = 5 # 5 seconds - DEFAULT_TEST_TIMEOUT = 2 * 60 # 2 minutes - DEFAULT_SCAN_TIMEOUT = 60 * 60 * 2 # 2 hours + DEFAULT_TEST_TIMEOUT = 2 * 60 # 2 minutes + DEFAULT_SCAN_TIMEOUT = 60 * 60 * 1 # 1 hour + PRECALCULATE_TIMEOUT = DEFAULT_SCAN_TIMEOUT * 3 # 3 hours bg = None def _run_in_foreground(self, command: Sequence[str], timeout: Optional[int] = None): @@ -130,7 +131,7 @@ def run_model_and_explain(self, model_app: str, definition: str, fast: bool = Fa if fast: # Run the precalculate step prior to the fast explain pre_calc_command = ["certifai", "explain", "-f", definition, "--precalculate"] - self._run_in_foreground(pre_calc_command, timeout=self.DEFAULT_SCAN_TIMEOUT) + self._run_in_foreground(pre_calc_command, timeout=self.PRECALCULATE_TIMEOUT) command = ["certifai", "explain", "-f", definition, "--fast"] else: command = ["certifai", "explain", "-f", definition] From 60d472eccdd58d2fa9c94a545e9d355922dcfc20 Mon Sep 17 00:00:00 2001 From: Luis Aguirre Date: Wed, 21 Jun 2023 13:28:13 -0500 Subject: [PATCH 10/17] Move minio check to only happen when running minikube examples --- models/containerized_model/examples/run_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/containerized_model/examples/run_test.sh b/models/containerized_model/examples/run_test.sh index 3425bcf2..b4ccf62e 100755 --- a/models/containerized_model/examples/run_test.sh +++ b/models/containerized_model/examples/run_test.sh @@ -282,11 +282,11 @@ function main() { set -exv set_globals install_toolkit - check_minio_installed if [ "$target" == "local" ]; then echo "Running local prediction service tests" elif [ "$target" == "minikube" ]; then echo "Running minikube prediction service tests" + check_minio_installed minikube_setup elif [ "$target" == "-h" ] || [ "$target" == "--help" ]; then printUsage From 6d900e32edb4c0b628a50c04dd21a3e8a0730baf Mon Sep 17 00:00:00 2001 From: Luis Aguirre Date: Thu, 22 Jun 2023 09:39:10 -0500 Subject: [PATCH 11/17] Remove call to sh when calling generate.sh script - use bash instead to support function() syntax --- models/containerized_model/examples/run_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/containerized_model/examples/run_test.sh b/models/containerized_model/examples/run_test.sh index b4ccf62e..1caa89a3 100755 --- a/models/containerized_model/examples/run_test.sh +++ b/models/containerized_model/examples/run_test.sh @@ -37,7 +37,7 @@ function base_setup() { model_file=$3 echo "***Generating ${model_type}***" rm -rf "${GEN_DIR}" - sh "${THIS_DIR}/../generate.sh" -i "${image_name}":latest -m "${model_type}" -d "${GEN_DIR}" -t "$TOOLKIT_PATH" + "${THIS_DIR}/../generate.sh" -i "${image_name}":latest -m "${model_type}" -d "${GEN_DIR}" -t "$TOOLKIT_PATH" all_dir=${GEN_DIR}/packages/all mkdir -p "${all_dir}" From 5184d6fb708c156916aeeebbf72280d4085e15d7 Mon Sep 17 00:00:00 2001 From: Luis Aguirre Date: Thu, 22 Jun 2023 16:20:20 -0500 Subject: [PATCH 12/17] Install Model requirements before running containerized model examples --- build.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/build.sh b/build.sh index 90b1916f..f486199f 100755 --- a/build.sh +++ b/build.sh @@ -559,6 +559,7 @@ function main() { setGlobals activateConda installToolkit + _installModelRequirements testAll rm -rf "${TOOLKIT_WORK_DIR}" ;; @@ -600,6 +601,7 @@ function main() { activateConda installToolkit testModels + _installModelRequirements testContainerizedModels ;; notebook) From 7595f0c31c52729e3a71e5d3d3132c36da1ff727 Mon Sep 17 00:00:00 2001 From: Luis Aguirre Date: Thu, 22 Jun 2023 16:20:55 -0500 Subject: [PATCH 13/17] Test containerized models first --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index f486199f..6a62d64f 100755 --- a/build.sh +++ b/build.sh @@ -295,8 +295,8 @@ function buildPredictionServiceBaseImages() { function testAll() { testMarkdownLinks - testModels testContainerizedModels + testModels testNotebooks testTutorials } From a06d9455f5882980901abe6f5d9aa1c2ad597465 Mon Sep 17 00:00:00 2001 From: Luis Aguirre Date: Thu, 22 Jun 2023 16:26:42 -0500 Subject: [PATCH 14/17] Consolidate model dependency installation --- build.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/build.sh b/build.sh index 6a62d64f..e0850029 100755 --- a/build.sh +++ b/build.sh @@ -155,6 +155,7 @@ function buildLocal() { } function _installModelRequirements() { + pip install sklearn-pandas xgboost pip install -r "${TEMPLATES_DIR}/requirements.txt" } @@ -336,15 +337,12 @@ function testModels() { python -m unittest -v test.py cd "$MODELS_DIR"/german_credit_pandas - pip install sklearn-pandas python -m unittest -v test.py cd "$MODELS_DIR"/income_prediction - pip install xgboost python -m unittest -v test.py cd "$MODELS_DIR"/iris - pip install xgboost python -m unittest -v test.py cd "$MODELS_DIR"/patient_readmission From aad0d3f7a862cb7ae2807fa8bd6652c5e30c2d0c Mon Sep 17 00:00:00 2001 From: Luis Aguirre Date: Thu, 22 Jun 2023 16:35:01 -0500 Subject: [PATCH 15/17] Skip containerized model test script in pipeline - cannot run docker --- build.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/build.sh b/build.sh index e0850029..b471d565 100755 --- a/build.sh +++ b/build.sh @@ -296,10 +296,11 @@ function buildPredictionServiceBaseImages() { function testAll() { testMarkdownLinks - testContainerizedModels testModels testNotebooks testTutorials + # Requires Docker/Minikube - so skipped in pipeline + #testContainerizedModels } function testMarkdownLinks() { @@ -598,9 +599,8 @@ function main() { setGlobals activateConda installToolkit - testModels _installModelRequirements - testContainerizedModels + testModels ;; notebook) setGlobals From 74c5211be946524a9ae40c2a0ca4283b3c7a892f Mon Sep 17 00:00:00 2001 From: Luis Aguirre Date: Mon, 26 Jun 2023 10:01:00 -0500 Subject: [PATCH 16/17] Pin sklearn-pandas & xgboost versions - and be sure to activate conda env before doing install in pipeline --- build.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/build.sh b/build.sh index b471d565..35733716 100755 --- a/build.sh +++ b/build.sh @@ -11,6 +11,8 @@ function setGlobals() { SKIP_TOOLKIT="${SKIP_TOOLKIT:-false}" RUN_REMOTE_EXAMPLES="${RUN_REMOTE_EXAMPLES:-false}" PYTHON_VERSION="3.8" + SK_PANDAS_VERSION="sklearn-pandas==2.2.0" + XGBOOST_VERSION="xgboost==1.7.2" SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P )" ARTIFACTS_DIR="${SCRIPT_PATH}/artifacts" TOOLKIT_PATH="${ARTIFACTS_DIR}/certifai_toolkit.zip" @@ -155,7 +157,7 @@ function buildLocal() { } function _installModelRequirements() { - pip install sklearn-pandas xgboost + pip install "$SK_PANDAS_VERSION" "$XGBOOST_VERSION" pip install -r "${TEMPLATES_DIR}/requirements.txt" } @@ -548,7 +550,7 @@ function _sagemakerNotebook() { function _xgboostModel() { # xgboost-model cd "${NOTEBOOK_DIR}" - pip install xgboost + pip install "$XGBOOST_VERSION" _runNotebookInPlace "${NOTEBOOK_DIR}/xgboost-model/xgboostDmatrixExample.ipynb" } @@ -565,6 +567,7 @@ function main() { docker) setGlobals PUSH_IMAGES=true + activateConda extractToolkit _installModelRequirements buildModelDeploymentImages @@ -572,6 +575,7 @@ function main() { local-docker) setGlobals PUSH_IMAGES=false + activateConda extractToolkit _installModelRequirements buildModelDeploymentImages @@ -579,6 +583,7 @@ function main() { docker-builder) setGlobals PUSH_IMAGES=true + activateConda extractToolkit _installModelRequirements buildPredictionServiceBaseImages @@ -586,6 +591,7 @@ function main() { local-docker-builder) setGlobals PUSH_IMAGES=false + activateConda extractToolkit _installModelRequirements buildPredictionServiceBaseImages From 50649b9334e540e2a09a3fe1fe5800a5d73633c6 Mon Sep 17 00:00:00 2001 From: Luis Aguirre Date: Tue, 27 Jun 2023 09:28:15 -0500 Subject: [PATCH 17/17] Separate install of ML libraries for Models from packaging related Model requirements. Undo set up of conda for docker related commands --- build.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/build.sh b/build.sh index 35733716..264578e2 100755 --- a/build.sh +++ b/build.sh @@ -157,10 +157,13 @@ function buildLocal() { } function _installModelRequirements() { - pip install "$SK_PANDAS_VERSION" "$XGBOOST_VERSION" pip install -r "${TEMPLATES_DIR}/requirements.txt" } +function _installLocalModelRequirements() { + pip install "$SK_PANDAS_VERSION" "$XGBOOST_VERSION" +} + function buildModelDeploymentImages() { # Builds Docker images for the example Containerized Model Types (Scikit, H2O, Proxy, R). These are images are used @@ -561,13 +564,13 @@ function main() { activateConda installToolkit _installModelRequirements + _installLocalModelRequirements testAll rm -rf "${TOOLKIT_WORK_DIR}" ;; docker) setGlobals PUSH_IMAGES=true - activateConda extractToolkit _installModelRequirements buildModelDeploymentImages @@ -575,7 +578,6 @@ function main() { local-docker) setGlobals PUSH_IMAGES=false - activateConda extractToolkit _installModelRequirements buildModelDeploymentImages @@ -583,7 +585,6 @@ function main() { docker-builder) setGlobals PUSH_IMAGES=true - activateConda extractToolkit _installModelRequirements buildPredictionServiceBaseImages @@ -591,7 +592,6 @@ function main() { local-docker-builder) setGlobals PUSH_IMAGES=false - activateConda extractToolkit _installModelRequirements buildPredictionServiceBaseImages @@ -606,6 +606,7 @@ function main() { activateConda installToolkit _installModelRequirements + _installLocalModelRequirements testModels ;; notebook)