diff --git a/.dvc/.gitignore b/.dvc/.gitignore new file mode 100644 index 00000000..528f30c7 --- /dev/null +++ b/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/.dvc/config b/.dvc/config new file mode 100644 index 00000000..f9aae639 --- /dev/null +++ b/.dvc/config @@ -0,0 +1,10 @@ +[core] + remote = my_s3_remote_2 +['remote "myremote"'] + url = /tmp/dvc +['remote "localremote"'] + url = ../../../localremote +['remote "myremote-amazon"'] + url = ../arn:aws:s3:::bee-ml-dataset-jan-24 +['remote "my_s3_remote_2"'] + url = ../arn:aws:s3:::dvc-projects-research-mrbestnaija1 diff --git a/.dvc/plots/confusion.json b/.dvc/plots/confusion.json new file mode 100644 index 00000000..84ec022f --- /dev/null +++ b/.dvc/plots/confusion.json @@ -0,0 +1,107 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "data": { + "values": "" + }, + "title": "", + "facet": { + "field": "rev", + "type": "nominal" + }, + "spec": { + "transform": [ + { + "aggregate": [ + { + "op": "count", + "as": "xy_count" + } + ], + "groupby": [ + "", + "" + ] + }, + { + "impute": "xy_count", + "groupby": [ + "rev", + "" + ], + "key": "", + "value": 0 + }, + { + "impute": "xy_count", + "groupby": [ + "rev", + "" + ], + "key": "", + "value": 0 + }, + { + "joinaggregate": [ + { + "op": "max", + "field": "xy_count", + "as": "max_count" + } + ], + "groupby": [] + }, + { + "calculate": "datum.xy_count / datum.max_count", + "as": "percent_of_max" + } + ], + "encoding": { + "x": { + "field": "", + "type": "nominal", + "sort": "ascending", + "title": "" + }, + "y": { + "field": "", + "type": "nominal", + "sort": "ascending", + "title": "" + } + }, + "layer": [ + { + "mark": "rect", + "width": 300, + "height": 300, + "encoding": { + "color": { + "field": "xy_count", + "type": "quantitative", + "title": "", + "scale": { + "domainMin": 0, + "nice": true + } + } + } + }, + { + "mark": "text", + "encoding": { + "text": { + "field": "xy_count", + "type": "quantitative" + }, + "color": { + "condition": { + "test": "datum.percent_of_max > 0.5", + "value": "white" + }, + "value": "black" + } + } + } + ] + } +} diff --git a/.dvc/plots/confusion_normalized.json b/.dvc/plots/confusion_normalized.json new file mode 100644 index 00000000..92c77739 --- /dev/null +++ b/.dvc/plots/confusion_normalized.json @@ -0,0 +1,112 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "data": { + "values": "" + }, + "title": "", + "facet": { + "field": "rev", + "type": "nominal" + }, + "spec": { + "transform": [ + { + "aggregate": [ + { + "op": "count", + "as": "xy_count" + } + ], + "groupby": [ + "", + "" + ] + }, + { + "impute": "xy_count", + "groupby": [ + "rev", + "" + ], + "key": "", + "value": 0 + }, + { + "impute": "xy_count", + "groupby": [ + "rev", + "" + ], + "key": "", + "value": 0 + }, + { + "joinaggregate": [ + { + "op": "sum", + "field": "xy_count", + "as": "sum_y" + } + ], + "groupby": [ + "" + ] + }, + { + "calculate": "datum.xy_count / datum.sum_y", + "as": "percent_of_y" + } + ], + "encoding": { + "x": { + "field": "", + "type": "nominal", + "sort": "ascending", + "title": "" + }, + "y": { + "field": "", + "type": "nominal", + "sort": "ascending", + "title": "" + } + }, + "layer": [ + { + "mark": "rect", + "width": 300, + "height": 300, + "encoding": { + "color": { + "field": "percent_of_y", + "type": "quantitative", + "title": "", + "scale": { + "domain": [ + 0, + 1 + ] + } + } + } + }, + { + "mark": "text", + "encoding": { + "text": { + "field": "percent_of_y", + "type": "quantitative", + "format": ".2f" + }, + "color": { + "condition": { + "test": "datum.percent_of_y > 0.5", + "value": "white" + }, + "value": "black" + } + } + } + ] + } +} diff --git a/.dvc/plots/linear.json b/.dvc/plots/linear.json new file mode 100644 index 00000000..970dc929 --- /dev/null +++ b/.dvc/plots/linear.json @@ -0,0 +1,116 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "data": { + "values": "" + }, + "title": "", + "width": 300, + "height": 300, + "layer": [ + { + "encoding": { + "x": { + "field": "", + "type": "quantitative", + "title": "" + }, + "y": { + "field": "", + "type": "quantitative", + "title": "", + "scale": { + "zero": false + } + }, + "color": { + "field": "rev", + "type": "nominal" + } + }, + "layer": [ + { + "mark": "line" + }, + { + "selection": { + "label": { + "type": "single", + "nearest": true, + "on": "mouseover", + "encodings": [ + "x" + ], + "empty": "none", + "clear": "mouseout" + } + }, + "mark": "point", + "encoding": { + "opacity": { + "condition": { + "selection": "label", + "value": 1 + }, + "value": 0 + } + } + } + ] + }, + { + "transform": [ + { + "filter": { + "selection": "label" + } + } + ], + "layer": [ + { + "mark": { + "type": "rule", + "color": "gray" + }, + "encoding": { + "x": { + "field": "", + "type": "quantitative" + } + } + }, + { + "encoding": { + "text": { + "type": "quantitative", + "field": "" + }, + "x": { + "field": "", + "type": "quantitative" + }, + "y": { + "field": "", + "type": "quantitative" + } + }, + "layer": [ + { + "mark": { + "type": "text", + "align": "left", + "dx": 5, + "dy": -5 + }, + "encoding": { + "color": { + "type": "nominal", + "field": "rev" + } + } + } + ] + } + ] + } + ] +} diff --git a/.dvc/plots/scatter.json b/.dvc/plots/scatter.json new file mode 100644 index 00000000..6e8cf5b4 --- /dev/null +++ b/.dvc/plots/scatter.json @@ -0,0 +1,104 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "data": { + "values": "" + }, + "title": "", + "width": 300, + "height": 300, + "layer": [ + { + "encoding": { + "x": { + "field": "", + "type": "quantitative", + "title": "" + }, + "y": { + "field": "", + "type": "quantitative", + "title": "", + "scale": { + "zero": false + } + }, + "color": { + "field": "rev", + "type": "nominal" + } + }, + "layer": [ + { + "mark": "point" + }, + { + "selection": { + "label": { + "type": "single", + "nearest": true, + "on": "mouseover", + "encodings": [ + "x" + ], + "empty": "none", + "clear": "mouseout" + } + }, + "mark": "point", + "encoding": { + "opacity": { + "condition": { + "selection": "label", + "value": 1 + }, + "value": 0 + } + } + } + ] + }, + { + "transform": [ + { + "filter": { + "selection": "label" + } + } + ], + "layer": [ + { + "encoding": { + "text": { + "type": "quantitative", + "field": "" + }, + "x": { + "field": "", + "type": "quantitative" + }, + "y": { + "field": "", + "type": "quantitative" + } + }, + "layer": [ + { + "mark": { + "type": "text", + "align": "left", + "dx": 5, + "dy": -5 + }, + "encoding": { + "color": { + "type": "nominal", + "field": "rev" + } + } + } + ] + } + ] + } + ] +} diff --git a/.dvc/plots/simple.json b/.dvc/plots/simple.json new file mode 100644 index 00000000..1cebce9b --- /dev/null +++ b/.dvc/plots/simple.json @@ -0,0 +1,31 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "data": { + "values": "" + }, + "title": "", + "width": 300, + "height": 300, + "mark": { + "type": "line" + }, + "encoding": { + "x": { + "field": "", + "type": "quantitative", + "title": "" + }, + "y": { + "field": "", + "type": "quantitative", + "title": "", + "scale": { + "zero": false + } + }, + "color": { + "field": "rev", + "type": "nominal" + } + } +} diff --git a/.dvc/plots/smooth.json b/.dvc/plots/smooth.json new file mode 100644 index 00000000..42b1ecff --- /dev/null +++ b/.dvc/plots/smooth.json @@ -0,0 +1,39 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "data": { + "values": "" + }, + "title": "", + "mark": { + "type": "line" + }, + "encoding": { + "x": { + "field": "", + "type": "quantitative", + "title": "" + }, + "y": { + "field": "", + "type": "quantitative", + "title": "", + "scale": { + "zero": false + } + }, + "color": { + "field": "rev", + "type": "nominal" + } + }, + "transform": [ + { + "loess": "", + "on": "", + "groupby": [ + "rev" + ], + "bandwidth": 0.3 + } + ] +} diff --git a/.dvcignore b/.dvcignore new file mode 100644 index 00000000..51973055 --- /dev/null +++ b/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/.env/my_key_github_ssh.pub b/.env/my_key_github_ssh.pub new file mode 100644 index 00000000..d90d5d56 --- /dev/null +++ b/.env/my_key_github_ssh.pub @@ -0,0 +1 @@ +#deleted the ssh key diff --git a/.github/workflows/cml.yaml b/.github/workflows/cml.yaml new file mode 100644 index 00000000..3536e220 --- /dev/null +++ b/.github/workflows/cml.yaml @@ -0,0 +1,63 @@ +name: CML Report +on: pull_request +jobs: + run: + runs-on: [ubuntu-latest] + steps: + - uses: iterative/setup-cml@v2 + - uses: iterative/setup-dvc@v1 + - uses: actions/checkout@v3 + with: + fetch-depth: 2 + # Needed for https://github.com/iterative/example-repos-dev/issues/225 + - name: Installs JSON5 + run: npm install -g json5 + - name: Generate metrics report + env: + REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + cml ci + if [ $GITHUB_REF = refs/heads/main ]; then + PREVIOUS_REF=HEAD~1 + else + PREVIOUS_REF=main + git fetch origin main:main + fi + + dvc pull eval + dvc plots diff $PREVIOUS_REF workspace \ + --show-vega --targets ROC | json5 > vega.json + vl2svg vega.json roc.svg + + dvc plots diff $PREVIOUS_REF workspace \ + --show-vega --targets Precision-Recall | json5 > vega.json + vl2svg vega.json prc.svg + + dvc plots diff $PREVIOUS_REF workspace \ + --show-vega --targets Confusion-Matrix | json5 > vega.json + vl2svg vega.json confusion.svg + + cp eval/plots/images/importance.png importance_workspace.png + + git checkout $PREVIOUS_REF -- dvc.lock + cp eval/plots/images/importance.png importance_previous.png + + dvc_report=$(dvc exp diff $PREVIOUS_REF --md) + + cat < report.md + # CML Report + ## Plots + ![ROC](./roc.svg) + ![Precision-Recall](./prc.svg) + ![Confusion Matrix](./confusion.svg) + #### Feature Importance: ${PREVIOUS_REF} + ![Feature Importance: ${PREVIOUS_REF}](./importance_previous.png) + #### Feature Importance: workspace + ![Feature Importance: workspace](./importance_workspace.png) + + ## Metrics and Params + ### ${PREVIOUS_REF} → workspace + ${dvc_report} + EOF + + cml comment create --publish --pr=false report.md diff --git a/.gitignore b/.gitignore index 186220be..9f5df164 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,55 @@ __pycache__ .ipynb_checkpoints # Venv -dvc-venv \ No newline at end of file +dvc-venv + +# ssh keys +env/* +.env/**/* +.env/* + +################################ + +# From gittemplate import + +# Compiled source # +################### +*.com +*.class +*.dll +*.exe +*.o +*.so + +# Packages # +############ +# it's better to unpack these files and commit the raw source +# git has its own built in compression methods +*.7z +*.dmg +*.gz +*.iso +*.jar +*.rar +*.tar +*.zip + +# Logs and databases # +###################### +*.log +*.sql +*.sqlite + +# OS generated files # +###################### +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db + +# DVc tracks # +/file.txt +/datadir +/file_data.txt diff --git a/README.md b/README.md index 5d3aa41d..41870ca4 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,8 @@ ### 1. Fork / Clone this repository ```bash -git clone https://gitlab.com/iterative.ai/cse/tutorials/course-ds-base.git +git clone https://github.com/mrbestnaija/gitworkflow-course-ds-base.git cd course-ds-base -git checkout step-1 ``` @@ -16,6 +15,7 @@ git checkout step-1 Create virtual environment named `dvc-venv` (you may use other name) ```bash python3 -m venv dvc-venv +echo "export PYTHONPATH=$PWD" >> dvc-venv/bin/activate source dvc-venv/bin/activate ``` Install python libraries @@ -31,9 +31,13 @@ Add Virtual Environment to Jupyter Notebook python -m ipykernel install --user --name=dvc-venv ``` -Configure ToC for jupyter notebook (optional) +Configure ToC for jupyter notebook (optional)/Install the python package ```bash + + +pip install jupyter_contrib_nbextensions + jupyter contrib nbextension install --user jupyter nbextension enable toc2/main ``` diff --git a/arn:aws:s3:::bee-ml-dataset-jan-24/01/05828cebc2a54ad9a65b6660f3209c b/arn:aws:s3:::bee-ml-dataset-jan-24/01/05828cebc2a54ad9a65b6660f3209c new file mode 100644 index 00000000..b1a2771f --- /dev/null +++ b/arn:aws:s3:::bee-ml-dataset-jan-24/01/05828cebc2a54ad9a65b6660f3209c @@ -0,0 +1,119 @@ +sepal_length,sepal_width,petal_length,petal_width,sepal_length_to_sepal_width,petal_length_to_petal_width,target +6.7,3.1,4.4,1.4,2.161290322580645,3.1428571428571432,1 +4.8,3.4,1.6,0.2,1.411764705882353,8.0,0 +4.4,3.2,1.3,0.2,1.375,6.5,0 +6.3,2.5,5.0,1.9,2.52,2.631578947368421,2 +6.4,3.2,4.5,1.5,2.0,3.0,1 +5.2,3.5,1.5,0.2,1.4857142857142858,7.5,0 +5.0,3.6,1.4,0.2,1.3888888888888888,6.999999999999999,0 +5.2,4.1,1.5,0.1,1.2682926829268295,15.0,0 +5.8,2.7,5.1,1.9,2.148148148148148,2.6842105263157894,2 +6.0,3.4,4.5,1.6,1.7647058823529411,2.8125,1 +6.7,3.1,4.7,1.5,2.161290322580645,3.1333333333333333,1 +5.4,3.9,1.3,0.4,1.3846153846153848,3.25,0 +5.4,3.7,1.5,0.2,1.4594594594594594,7.5,0 +5.5,2.4,3.7,1.0,2.291666666666667,3.7,1 +6.3,2.8,5.1,1.5,2.25,3.4,2 +6.4,3.1,5.5,1.8,2.064516129032258,3.055555555555556,2 +6.6,3.0,4.4,1.4,2.2,3.1428571428571432,1 +7.2,3.6,6.1,2.5,2.0,2.44,2 +5.7,2.9,4.2,1.3,1.9655172413793105,3.230769230769231,1 +7.6,3.0,6.6,2.1,2.533333333333333,3.1428571428571423,2 +5.6,3.0,4.5,1.5,1.8666666666666665,3.0,1 +5.1,3.5,1.4,0.2,1.457142857142857,6.999999999999999,0 +7.7,2.8,6.7,2.0,2.7500000000000004,3.35,2 +5.8,2.7,4.1,1.0,2.148148148148148,4.1,1 +5.2,3.4,1.4,0.2,1.5294117647058825,6.999999999999999,0 +5.0,3.5,1.3,0.3,1.4285714285714286,4.333333333333334,0 +5.1,3.8,1.9,0.4,1.3421052631578947,4.749999999999999,0 +5.0,2.0,3.5,1.0,2.5,3.5,1 +6.3,2.7,4.9,1.8,2.333333333333333,2.7222222222222223,2 +4.8,3.4,1.9,0.2,1.411764705882353,9.499999999999998,0 +5.0,3.0,1.6,0.2,1.6666666666666667,8.0,0 +5.1,3.3,1.7,0.5,1.5454545454545454,3.4,0 +5.6,2.7,4.2,1.3,2.074074074074074,3.230769230769231,1 +5.1,3.4,1.5,0.2,1.5,7.5,0 +5.7,3.0,4.2,1.2,1.9,3.5000000000000004,1 +7.7,3.8,6.7,2.2,2.0263157894736845,3.0454545454545454,2 +4.6,3.2,1.4,0.2,1.4374999999999998,6.999999999999999,0 +6.2,2.9,4.3,1.3,2.137931034482759,3.3076923076923075,1 +5.7,2.5,5.0,2.0,2.28,2.5,2 +5.5,4.2,1.4,0.2,1.3095238095238095,6.999999999999999,0 +6.0,3.0,4.8,1.8,2.0,2.6666666666666665,2 +5.8,2.7,5.1,1.9,2.148148148148148,2.6842105263157894,2 +6.0,2.2,4.0,1.0,2.727272727272727,4.0,1 +5.4,3.0,4.5,1.5,1.8,3.0,1 +6.2,3.4,5.4,2.3,1.823529411764706,2.347826086956522,2 +5.5,2.3,4.0,1.3,2.391304347826088,3.0769230769230766,1 +5.4,3.9,1.7,0.4,1.3846153846153848,4.25,0 +5.0,2.3,3.3,1.0,2.173913043478261,3.3,1 +6.4,2.7,5.3,1.9,2.3703703703703702,2.7894736842105265,2 +5.0,3.3,1.4,0.2,1.5151515151515151,6.999999999999999,0 +5.0,3.2,1.2,0.2,1.5625,5.999999999999999,0 +5.5,2.4,3.8,1.1,2.291666666666667,3.454545454545454,1 +6.7,3.0,5.0,1.7,2.2333333333333334,2.9411764705882355,1 +4.9,3.1,1.5,0.2,1.5806451612903227,7.5,0 +5.8,2.8,5.1,2.4,2.071428571428572,2.125,2 +5.0,3.4,1.5,0.2,1.4705882352941178,7.5,0 +5.0,3.5,1.6,0.6,1.4285714285714286,2.666666666666667,0 +5.9,3.2,4.8,1.8,1.84375,2.6666666666666665,1 +5.1,2.5,3.0,1.1,2.04,2.727272727272727,1 +6.9,3.2,5.7,2.3,2.15625,2.4782608695652177,2 +6.0,2.7,5.1,1.6,2.222222222222222,3.1875,1 +6.1,2.6,5.6,1.4,2.346153846153846,4.0,2 +7.7,3.0,6.1,2.3,2.566666666666667,2.6521739130434785,2 +5.5,2.5,4.0,1.3,2.2,3.0769230769230766,1 +4.4,2.9,1.4,0.2,1.517241379310345,6.999999999999999,0 +4.3,3.0,1.1,0.1,1.4333333333333331,11.0,0 +6.0,2.2,5.0,1.5,2.727272727272727,3.333333333333333,2 +7.2,3.2,6.0,1.8,2.25,3.333333333333333,2 +4.6,3.1,1.5,0.2,1.4838709677419353,7.5,0 +5.1,3.5,1.4,0.3,1.457142857142857,4.666666666666667,0 +4.4,3.0,1.3,0.2,1.4666666666666668,6.5,0 +6.3,2.5,4.9,1.5,2.52,3.266666666666667,1 +6.3,3.4,5.6,2.4,1.8529411764705883,2.333333333333333,2 +4.6,3.4,1.4,0.3,1.352941176470588,4.666666666666667,0 +6.8,3.0,5.5,2.1,2.2666666666666666,2.619047619047619,2 +6.3,3.3,6.0,2.5,1.9090909090909087,2.4,2 +4.7,3.2,1.3,0.2,1.46875,6.5,0 +6.1,2.9,4.7,1.4,2.103448275862069,3.3571428571428577,1 +6.5,2.8,4.6,1.5,2.321428571428572,3.0666666666666664,1 +6.2,2.8,4.8,1.8,2.2142857142857144,2.6666666666666665,2 +7.0,3.2,4.7,1.4,2.1875,3.3571428571428577,1 +6.4,3.2,5.3,2.3,2.0,2.3043478260869565,2 +5.1,3.8,1.6,0.2,1.3421052631578947,8.0,0 +6.9,3.1,5.4,2.1,2.2258064516129035,2.571428571428572,2 +5.9,3.0,4.2,1.5,1.9666666666666668,2.8000000000000003,1 +6.5,3.0,5.2,2.0,2.1666666666666665,2.6,2 +5.7,2.6,3.5,1.0,2.1923076923076925,3.5,1 +5.2,2.7,3.9,1.4,1.9259259259259256,2.785714285714286,1 +6.1,3.0,4.6,1.4,2.033333333333333,3.2857142857142856,1 +4.5,2.3,1.3,0.3,1.956521739130435,4.333333333333334,0 +6.6,2.9,4.6,1.3,2.2758620689655173,3.538461538461538,1 +5.5,2.6,4.4,1.2,2.1153846153846154,3.666666666666667,1 +5.3,3.7,1.5,0.2,1.4324324324324322,7.5,0 +5.6,3.0,4.1,1.3,1.8666666666666665,3.1538461538461533,1 +7.3,2.9,6.3,1.8,2.5172413793103448,3.5,2 +6.7,3.3,5.7,2.1,2.0303030303030303,2.7142857142857144,2 +5.1,3.7,1.5,0.4,1.3783783783783785,3.75,0 +4.9,2.4,3.3,1.0,2.041666666666667,3.3,1 +6.7,3.3,5.7,2.5,2.0303030303030303,2.28,2 +7.2,3.0,5.8,1.6,2.4,3.625,2 +4.9,3.6,1.4,0.1,1.3611111111111112,13.999999999999998,0 +6.7,3.1,5.6,2.4,2.161290322580645,2.333333333333333,2 +4.9,3.0,1.4,0.2,1.6333333333333335,6.999999999999999,0 +6.9,3.1,4.9,1.5,2.2258064516129035,3.266666666666667,1 +7.4,2.8,6.1,1.9,2.6428571428571432,3.210526315789473,2 +6.3,2.9,5.6,1.8,2.1724137931034484,3.1111111111111107,2 +5.7,2.8,4.1,1.3,2.035714285714286,3.1538461538461533,1 +6.5,3.0,5.5,1.8,2.1666666666666665,3.055555555555556,2 +6.3,2.3,4.4,1.3,2.739130434782609,3.3846153846153846,1 +6.4,2.9,4.3,1.3,2.206896551724138,3.3076923076923075,1 +5.6,2.8,4.9,2.0,2.0,2.45,2 +5.9,3.0,5.1,1.8,1.9666666666666668,2.833333333333333,2 +5.4,3.4,1.7,0.2,1.5882352941176472,8.5,0 +6.1,2.8,4.0,1.3,2.1785714285714284,3.0769230769230766,1 +4.9,2.5,4.5,1.7,1.96,2.647058823529412,2 +5.8,4.0,1.2,0.2,1.45,5.999999999999999,0 +5.8,2.6,4.0,1.2,2.230769230769231,3.333333333333333,1 +7.1,3.0,5.9,2.1,2.3666666666666667,2.8095238095238098,2 diff --git a/arn:aws:s3:::bee-ml-dataset-jan-24/42/24576f0267bf88902f87f0f6200967 b/arn:aws:s3:::bee-ml-dataset-jan-24/42/24576f0267bf88902f87f0f6200967 new file mode 100644 index 00000000..c6c10dc2 --- /dev/null +++ b/arn:aws:s3:::bee-ml-dataset-jan-24/42/24576f0267bf88902f87f0f6200967 @@ -0,0 +1,151 @@ +sepal_length,sepal_width,petal_length,petal_width,target +5.1,3.5,1.4,0.2,0 +4.9,3.0,1.4,0.2,0 +4.7,3.2,1.3,0.2,0 +4.6,3.1,1.5,0.2,0 +5.0,3.6,1.4,0.2,0 +5.4,3.9,1.7,0.4,0 +4.6,3.4,1.4,0.3,0 +5.0,3.4,1.5,0.2,0 +4.4,2.9,1.4,0.2,0 +4.9,3.1,1.5,0.1,0 +5.4,3.7,1.5,0.2,0 +4.8,3.4,1.6,0.2,0 +4.8,3.0,1.4,0.1,0 +4.3,3.0,1.1,0.1,0 +5.8,4.0,1.2,0.2,0 +5.7,4.4,1.5,0.4,0 +5.4,3.9,1.3,0.4,0 +5.1,3.5,1.4,0.3,0 +5.7,3.8,1.7,0.3,0 +5.1,3.8,1.5,0.3,0 +5.4,3.4,1.7,0.2,0 +5.1,3.7,1.5,0.4,0 +4.6,3.6,1.0,0.2,0 +5.1,3.3,1.7,0.5,0 +4.8,3.4,1.9,0.2,0 +5.0,3.0,1.6,0.2,0 +5.0,3.4,1.6,0.4,0 +5.2,3.5,1.5,0.2,0 +5.2,3.4,1.4,0.2,0 +4.7,3.2,1.6,0.2,0 +4.8,3.1,1.6,0.2,0 +5.4,3.4,1.5,0.4,0 +5.2,4.1,1.5,0.1,0 +5.5,4.2,1.4,0.2,0 +4.9,3.1,1.5,0.2,0 +5.0,3.2,1.2,0.2,0 +5.5,3.5,1.3,0.2,0 +4.9,3.6,1.4,0.1,0 +4.4,3.0,1.3,0.2,0 +5.1,3.4,1.5,0.2,0 +5.0,3.5,1.3,0.3,0 +4.5,2.3,1.3,0.3,0 +4.4,3.2,1.3,0.2,0 +5.0,3.5,1.6,0.6,0 +5.1,3.8,1.9,0.4,0 +4.8,3.0,1.4,0.3,0 +5.1,3.8,1.6,0.2,0 +4.6,3.2,1.4,0.2,0 +5.3,3.7,1.5,0.2,0 +5.0,3.3,1.4,0.2,0 +7.0,3.2,4.7,1.4,1 +6.4,3.2,4.5,1.5,1 +6.9,3.1,4.9,1.5,1 +5.5,2.3,4.0,1.3,1 +6.5,2.8,4.6,1.5,1 +5.7,2.8,4.5,1.3,1 +6.3,3.3,4.7,1.6,1 +4.9,2.4,3.3,1.0,1 +6.6,2.9,4.6,1.3,1 +5.2,2.7,3.9,1.4,1 +5.0,2.0,3.5,1.0,1 +5.9,3.0,4.2,1.5,1 +6.0,2.2,4.0,1.0,1 +6.1,2.9,4.7,1.4,1 +5.6,2.9,3.6,1.3,1 +6.7,3.1,4.4,1.4,1 +5.6,3.0,4.5,1.5,1 +5.8,2.7,4.1,1.0,1 +6.2,2.2,4.5,1.5,1 +5.6,2.5,3.9,1.1,1 +5.9,3.2,4.8,1.8,1 +6.1,2.8,4.0,1.3,1 +6.3,2.5,4.9,1.5,1 +6.1,2.8,4.7,1.2,1 +6.4,2.9,4.3,1.3,1 +6.6,3.0,4.4,1.4,1 +6.8,2.8,4.8,1.4,1 +6.7,3.0,5.0,1.7,1 +6.0,2.9,4.5,1.5,1 +5.7,2.6,3.5,1.0,1 +5.5,2.4,3.8,1.1,1 +5.5,2.4,3.7,1.0,1 +5.8,2.7,3.9,1.2,1 +6.0,2.7,5.1,1.6,1 +5.4,3.0,4.5,1.5,1 +6.0,3.4,4.5,1.6,1 +6.7,3.1,4.7,1.5,1 +6.3,2.3,4.4,1.3,1 +5.6,3.0,4.1,1.3,1 +5.5,2.5,4.0,1.3,1 +5.5,2.6,4.4,1.2,1 +6.1,3.0,4.6,1.4,1 +5.8,2.6,4.0,1.2,1 +5.0,2.3,3.3,1.0,1 +5.6,2.7,4.2,1.3,1 +5.7,3.0,4.2,1.2,1 +5.7,2.9,4.2,1.3,1 +6.2,2.9,4.3,1.3,1 +5.1,2.5,3.0,1.1,1 +5.7,2.8,4.1,1.3,1 +6.3,3.3,6.0,2.5,2 +5.8,2.7,5.1,1.9,2 +7.1,3.0,5.9,2.1,2 +6.3,2.9,5.6,1.8,2 +6.5,3.0,5.8,2.2,2 +7.6,3.0,6.6,2.1,2 +4.9,2.5,4.5,1.7,2 +7.3,2.9,6.3,1.8,2 +6.7,2.5,5.8,1.8,2 +7.2,3.6,6.1,2.5,2 +6.5,3.2,5.1,2.0,2 +6.4,2.7,5.3,1.9,2 +6.8,3.0,5.5,2.1,2 +5.7,2.5,5.0,2.0,2 +5.8,2.8,5.1,2.4,2 +6.4,3.2,5.3,2.3,2 +6.5,3.0,5.5,1.8,2 +7.7,3.8,6.7,2.2,2 +7.7,2.6,6.9,2.3,2 +6.0,2.2,5.0,1.5,2 +6.9,3.2,5.7,2.3,2 +5.6,2.8,4.9,2.0,2 +7.7,2.8,6.7,2.0,2 +6.3,2.7,4.9,1.8,2 +6.7,3.3,5.7,2.1,2 +7.2,3.2,6.0,1.8,2 +6.2,2.8,4.8,1.8,2 +6.1,3.0,4.9,1.8,2 +6.4,2.8,5.6,2.1,2 +7.2,3.0,5.8,1.6,2 +7.4,2.8,6.1,1.9,2 +7.9,3.8,6.4,2.0,2 +6.4,2.8,5.6,2.2,2 +6.3,2.8,5.1,1.5,2 +6.1,2.6,5.6,1.4,2 +7.7,3.0,6.1,2.3,2 +6.3,3.4,5.6,2.4,2 +6.4,3.1,5.5,1.8,2 +6.0,3.0,4.8,1.8,2 +6.9,3.1,5.4,2.1,2 +6.7,3.1,5.6,2.4,2 +6.9,3.1,5.1,2.3,2 +5.8,2.7,5.1,1.9,2 +6.8,3.2,5.9,2.3,2 +6.7,3.3,5.7,2.5,2 +6.7,3.0,5.2,2.3,2 +6.3,2.5,5.0,1.9,2 +6.5,3.0,5.2,2.0,2 +6.2,3.4,5.4,2.3,2 +5.9,3.0,5.1,1.8,2 diff --git a/arn:aws:s3:::bee-ml-dataset-jan-24/5d/03a1564b3038fc35a842f8e4bde491 b/arn:aws:s3:::bee-ml-dataset-jan-24/5d/03a1564b3038fc35a842f8e4bde491 new file mode 100644 index 00000000..1f34b3b5 --- /dev/null +++ b/arn:aws:s3:::bee-ml-dataset-jan-24/5d/03a1564b3038fc35a842f8e4bde491 @@ -0,0 +1,151 @@ +sepal_length,sepal_width,petal_length,petal_width,sepal_length_to_sepal_width,petal_length_to_petal_width,target +5.1,3.5,1.4,0.2,1.457142857142857,6.999999999999999,0 +4.9,3.0,1.4,0.2,1.6333333333333335,6.999999999999999,0 +4.7,3.2,1.3,0.2,1.46875,6.5,0 +4.6,3.1,1.5,0.2,1.4838709677419353,7.5,0 +5.0,3.6,1.4,0.2,1.3888888888888888,6.999999999999999,0 +5.4,3.9,1.7,0.4,1.3846153846153848,4.25,0 +4.6,3.4,1.4,0.3,1.352941176470588,4.666666666666667,0 +5.0,3.4,1.5,0.2,1.4705882352941178,7.5,0 +4.4,2.9,1.4,0.2,1.517241379310345,6.999999999999999,0 +4.9,3.1,1.5,0.1,1.5806451612903227,15.0,0 +5.4,3.7,1.5,0.2,1.4594594594594594,7.5,0 +4.8,3.4,1.6,0.2,1.411764705882353,8.0,0 +4.8,3.0,1.4,0.1,1.5999999999999999,13.999999999999998,0 +4.3,3.0,1.1,0.1,1.4333333333333333,11.0,0 +5.8,4.0,1.2,0.2,1.45,5.999999999999999,0 +5.7,4.4,1.5,0.4,1.2954545454545454,3.75,0 +5.4,3.9,1.3,0.4,1.3846153846153848,3.25,0 +5.1,3.5,1.4,0.3,1.457142857142857,4.666666666666667,0 +5.7,3.8,1.7,0.3,1.5000000000000002,5.666666666666667,0 +5.1,3.8,1.5,0.3,1.3421052631578947,5.0,0 +5.4,3.4,1.7,0.2,1.5882352941176472,8.5,0 +5.1,3.7,1.5,0.4,1.3783783783783783,3.75,0 +4.6,3.6,1.0,0.2,1.2777777777777777,5.0,0 +5.1,3.3,1.7,0.5,1.5454545454545454,3.4,0 +4.8,3.4,1.9,0.2,1.411764705882353,9.499999999999998,0 +5.0,3.0,1.6,0.2,1.6666666666666667,8.0,0 +5.0,3.4,1.6,0.4,1.4705882352941178,4.0,0 +5.2,3.5,1.5,0.2,1.4857142857142858,7.5,0 +5.2,3.4,1.4,0.2,1.5294117647058825,6.999999999999999,0 +4.7,3.2,1.6,0.2,1.46875,8.0,0 +4.8,3.1,1.6,0.2,1.5483870967741935,8.0,0 +5.4,3.4,1.5,0.4,1.5882352941176472,3.75,0 +5.2,4.1,1.5,0.1,1.2682926829268295,15.0,0 +5.5,4.2,1.4,0.2,1.3095238095238095,6.999999999999999,0 +4.9,3.1,1.5,0.2,1.5806451612903227,7.5,0 +5.0,3.2,1.2,0.2,1.5625,5.999999999999999,0 +5.5,3.5,1.3,0.2,1.5714285714285714,6.5,0 +4.9,3.6,1.4,0.1,1.3611111111111112,13.999999999999998,0 +4.4,3.0,1.3,0.2,1.4666666666666668,6.5,0 +5.1,3.4,1.5,0.2,1.5,7.5,0 +5.0,3.5,1.3,0.3,1.4285714285714286,4.333333333333334,0 +4.5,2.3,1.3,0.3,1.956521739130435,4.333333333333334,0 +4.4,3.2,1.3,0.2,1.375,6.5,0 +5.0,3.5,1.6,0.6,1.4285714285714286,2.666666666666667,0 +5.1,3.8,1.9,0.4,1.3421052631578947,4.749999999999999,0 +4.8,3.0,1.4,0.3,1.5999999999999999,4.666666666666667,0 +5.1,3.8,1.6,0.2,1.3421052631578947,8.0,0 +4.6,3.2,1.4,0.2,1.4374999999999998,6.999999999999999,0 +5.3,3.7,1.5,0.2,1.4324324324324322,7.5,0 +5.0,3.3,1.4,0.2,1.5151515151515151,6.999999999999999,0 +7.0,3.2,4.7,1.4,2.1875,3.3571428571428577,1 +6.4,3.2,4.5,1.5,2.0,3.0,1 +6.9,3.1,4.9,1.5,2.2258064516129035,3.266666666666667,1 +5.5,2.3,4.0,1.3,2.3913043478260874,3.0769230769230766,1 +6.5,2.8,4.6,1.5,2.3214285714285716,3.0666666666666664,1 +5.7,2.8,4.5,1.3,2.035714285714286,3.4615384615384612,1 +6.3,3.3,4.7,1.6,1.9090909090909092,2.9375,1 +4.9,2.4,3.3,1.0,2.041666666666667,3.3,1 +6.6,2.9,4.6,1.3,2.2758620689655173,3.538461538461538,1 +5.2,2.7,3.9,1.4,1.9259259259259258,2.785714285714286,1 +5.0,2.0,3.5,1.0,2.5,3.5,1 +5.9,3.0,4.2,1.5,1.9666666666666668,2.8000000000000003,1 +6.0,2.2,4.0,1.0,2.727272727272727,4.0,1 +6.1,2.9,4.7,1.4,2.103448275862069,3.3571428571428577,1 +5.6,2.9,3.6,1.3,1.9310344827586206,2.769230769230769,1 +6.7,3.1,4.4,1.4,2.161290322580645,3.1428571428571432,1 +5.6,3.0,4.5,1.5,1.8666666666666665,3.0,1 +5.8,2.7,4.1,1.0,2.148148148148148,4.1,1 +6.2,2.2,4.5,1.5,2.818181818181818,3.0,1 +5.6,2.5,3.9,1.1,2.2399999999999998,3.545454545454545,1 +5.9,3.2,4.8,1.8,1.84375,2.6666666666666665,1 +6.1,2.8,4.0,1.3,2.1785714285714284,3.0769230769230766,1 +6.3,2.5,4.9,1.5,2.52,3.266666666666667,1 +6.1,2.8,4.7,1.2,2.1785714285714284,3.916666666666667,1 +6.4,2.9,4.3,1.3,2.206896551724138,3.3076923076923075,1 +6.6,3.0,4.4,1.4,2.1999999999999997,3.1428571428571432,1 +6.8,2.8,4.8,1.4,2.428571428571429,3.428571428571429,1 +6.7,3.0,5.0,1.7,2.2333333333333334,2.9411764705882355,1 +6.0,2.9,4.5,1.5,2.0689655172413794,3.0,1 +5.7,2.6,3.5,1.0,2.1923076923076925,3.5,1 +5.5,2.4,3.8,1.1,2.291666666666667,3.454545454545454,1 +5.5,2.4,3.7,1.0,2.291666666666667,3.7,1 +5.8,2.7,3.9,1.2,2.148148148148148,3.25,1 +6.0,2.7,5.1,1.6,2.222222222222222,3.1874999999999996,1 +5.4,3.0,4.5,1.5,1.8,3.0,1 +6.0,3.4,4.5,1.6,1.7647058823529411,2.8125,1 +6.7,3.1,4.7,1.5,2.161290322580645,3.1333333333333333,1 +6.3,2.3,4.4,1.3,2.739130434782609,3.3846153846153846,1 +5.6,3.0,4.1,1.3,1.8666666666666665,3.1538461538461533,1 +5.5,2.5,4.0,1.3,2.2,3.0769230769230766,1 +5.5,2.6,4.4,1.2,2.1153846153846154,3.666666666666667,1 +6.1,3.0,4.6,1.4,2.033333333333333,3.2857142857142856,1 +5.8,2.6,4.0,1.2,2.230769230769231,3.3333333333333335,1 +5.0,2.3,3.3,1.0,2.173913043478261,3.3,1 +5.6,2.7,4.2,1.3,2.074074074074074,3.230769230769231,1 +5.7,3.0,4.2,1.2,1.9000000000000001,3.5000000000000004,1 +5.7,2.9,4.2,1.3,1.9655172413793105,3.230769230769231,1 +6.2,2.9,4.3,1.3,2.137931034482759,3.3076923076923075,1 +5.1,2.5,3.0,1.1,2.04,2.727272727272727,1 +5.7,2.8,4.1,1.3,2.035714285714286,3.1538461538461533,1 +6.3,3.3,6.0,2.5,1.9090909090909092,2.4,2 +5.8,2.7,5.1,1.9,2.148148148148148,2.6842105263157894,2 +7.1,3.0,5.9,2.1,2.3666666666666667,2.8095238095238098,2 +6.3,2.9,5.6,1.8,2.1724137931034484,3.1111111111111107,2 +6.5,3.0,5.8,2.2,2.1666666666666665,2.6363636363636362,2 +7.6,3.0,6.6,2.1,2.533333333333333,3.1428571428571423,2 +4.9,2.5,4.5,1.7,1.9600000000000002,2.6470588235294117,2 +7.3,2.9,6.3,1.8,2.5172413793103448,3.5,2 +6.7,2.5,5.8,1.8,2.68,3.222222222222222,2 +7.2,3.6,6.1,2.5,2.0,2.44,2 +6.5,3.2,5.1,2.0,2.03125,2.55,2 +6.4,2.7,5.3,1.9,2.3703703703703702,2.7894736842105265,2 +6.8,3.0,5.5,2.1,2.2666666666666666,2.619047619047619,2 +5.7,2.5,5.0,2.0,2.2800000000000002,2.5,2 +5.8,2.8,5.1,2.4,2.0714285714285716,2.125,2 +6.4,3.2,5.3,2.3,2.0,2.3043478260869565,2 +6.5,3.0,5.5,1.8,2.1666666666666665,3.0555555555555554,2 +7.7,3.8,6.7,2.2,2.0263157894736845,3.0454545454545454,2 +7.7,2.6,6.9,2.3,2.9615384615384617,3.0000000000000004,2 +6.0,2.2,5.0,1.5,2.727272727272727,3.3333333333333335,2 +6.9,3.2,5.7,2.3,2.15625,2.4782608695652177,2 +5.6,2.8,4.9,2.0,2.0,2.45,2 +7.7,2.8,6.7,2.0,2.7500000000000004,3.35,2 +6.3,2.7,4.9,1.8,2.333333333333333,2.7222222222222223,2 +6.7,3.3,5.7,2.1,2.0303030303030303,2.7142857142857144,2 +7.2,3.2,6.0,1.8,2.25,3.333333333333333,2 +6.2,2.8,4.8,1.8,2.2142857142857144,2.6666666666666665,2 +6.1,3.0,4.9,1.8,2.033333333333333,2.7222222222222223,2 +6.4,2.8,5.6,2.1,2.285714285714286,2.6666666666666665,2 +7.2,3.0,5.8,1.6,2.4,3.6249999999999996,2 +7.4,2.8,6.1,1.9,2.6428571428571432,3.2105263157894735,2 +7.9,3.8,6.4,2.0,2.0789473684210527,3.2,2 +6.4,2.8,5.6,2.2,2.285714285714286,2.545454545454545,2 +6.3,2.8,5.1,1.5,2.25,3.4,2 +6.1,2.6,5.6,1.4,2.346153846153846,4.0,2 +7.7,3.0,6.1,2.3,2.566666666666667,2.6521739130434785,2 +6.3,3.4,5.6,2.4,1.8529411764705883,2.3333333333333335,2 +6.4,3.1,5.5,1.8,2.064516129032258,3.0555555555555554,2 +6.0,3.0,4.8,1.8,2.0,2.6666666666666665,2 +6.9,3.1,5.4,2.1,2.2258064516129035,2.5714285714285716,2 +6.7,3.1,5.6,2.4,2.161290322580645,2.3333333333333335,2 +6.9,3.1,5.1,2.3,2.2258064516129035,2.217391304347826,2 +5.8,2.7,5.1,1.9,2.148148148148148,2.6842105263157894,2 +6.8,3.2,5.9,2.3,2.125,2.565217391304348,2 +6.7,3.3,5.7,2.5,2.0303030303030303,2.2800000000000002,2 +6.7,3.0,5.2,2.3,2.2333333333333334,2.2608695652173916,2 +6.3,2.5,5.0,1.9,2.52,2.6315789473684212,2 +6.5,3.0,5.2,2.0,2.1666666666666665,2.6,2 +6.2,3.4,5.4,2.3,1.823529411764706,2.347826086956522,2 +5.9,3.0,5.1,1.8,1.9666666666666668,2.833333333333333,2 diff --git a/arn:aws:s3:::bee-ml-dataset-jan-24/70/44beeffac0f67cc0401146b26e8f3e b/arn:aws:s3:::bee-ml-dataset-jan-24/70/44beeffac0f67cc0401146b26e8f3e new file mode 100644 index 00000000..1a3ad11a Binary files /dev/null and b/arn:aws:s3:::bee-ml-dataset-jan-24/70/44beeffac0f67cc0401146b26e8f3e differ diff --git a/arn:aws:s3:::bee-ml-dataset-jan-24/d4/1d8cd98f00b204e9800998ecf8427e b/arn:aws:s3:::bee-ml-dataset-jan-24/d4/1d8cd98f00b204e9800998ecf8427e new file mode 100644 index 00000000..e69de29b diff --git a/arn:aws:s3:::bee-ml-dataset-jan-24/d5/33847a0ca14ca93752b1b1f1df349e b/arn:aws:s3:::bee-ml-dataset-jan-24/d5/33847a0ca14ca93752b1b1f1df349e new file mode 100644 index 00000000..0b4a08e1 --- /dev/null +++ b/arn:aws:s3:::bee-ml-dataset-jan-24/d5/33847a0ca14ca93752b1b1f1df349e @@ -0,0 +1 @@ +{"f1_score": 0.9305555555555555} \ No newline at end of file diff --git a/arn:aws:s3:::bee-ml-dataset-jan-24/dd/ede7ba843927234678d5ec8d4f9f99.dir b/arn:aws:s3:::bee-ml-dataset-jan-24/dd/ede7ba843927234678d5ec8d4f9f99.dir new file mode 100644 index 00000000..6ed4e592 --- /dev/null +++ b/arn:aws:s3:::bee-ml-dataset-jan-24/dd/ede7ba843927234678d5ec8d4f9f99.dir @@ -0,0 +1 @@ +[{"md5": "d41d8cd98f00b204e9800998ecf8427e", "relpath": "data1.csv"}, {"md5": "d41d8cd98f00b204e9800998ecf8427e", "relpath": "data2.csv"}, {"md5": "d41d8cd98f00b204e9800998ecf8427e", "relpath": "data3.csv"}] \ No newline at end of file diff --git a/arn:aws:s3:::bee-ml-dataset-jan-24/de/44a28d8aa5da6d35dc3778e613449d b/arn:aws:s3:::bee-ml-dataset-jan-24/de/44a28d8aa5da6d35dc3778e613449d new file mode 100644 index 00000000..95234fa4 Binary files /dev/null and b/arn:aws:s3:::bee-ml-dataset-jan-24/de/44a28d8aa5da6d35dc3778e613449d differ diff --git a/arn:aws:s3:::bee-ml-dataset-jan-24/f9/53ee125de2bd311a3f846acfac349c b/arn:aws:s3:::bee-ml-dataset-jan-24/f9/53ee125de2bd311a3f846acfac349c new file mode 100644 index 00000000..548c0a05 --- /dev/null +++ b/arn:aws:s3:::bee-ml-dataset-jan-24/f9/53ee125de2bd311a3f846acfac349c @@ -0,0 +1,33 @@ +sepal_length,sepal_width,petal_length,petal_width,sepal_length_to_sepal_width,petal_length_to_petal_width,target +6.1,2.8,4.7,1.2,2.1785714285714284,3.916666666666667,1 +5.7,3.8,1.7,0.3,1.5000000000000002,5.666666666666667,0 +7.7,2.6,6.9,2.3,2.9615384615384617,3.0000000000000004,2 +6.0,2.9,4.5,1.5,2.0689655172413794,3.0,1 +6.8,2.8,4.8,1.4,2.428571428571429,3.428571428571429,1 +5.4,3.4,1.5,0.4,1.5882352941176472,3.75,0 +5.6,2.9,3.6,1.3,1.9310344827586208,2.769230769230769,1 +6.9,3.1,5.1,2.3,2.2258064516129035,2.217391304347826,2 +6.2,2.2,4.5,1.5,2.818181818181818,3.0,1 +5.8,2.7,3.9,1.2,2.148148148148148,3.25,1 +6.5,3.2,5.1,2.0,2.03125,2.55,2 +4.8,3.0,1.4,0.1,1.6,13.999999999999998,0 +5.5,3.5,1.3,0.2,1.5714285714285714,6.5,0 +4.9,3.1,1.5,0.1,1.5806451612903227,15.0,0 +5.1,3.8,1.5,0.3,1.3421052631578947,5.0,0 +6.3,3.3,4.7,1.6,1.9090909090909087,2.9375,1 +6.5,3.0,5.8,2.2,2.1666666666666665,2.636363636363636,2 +5.6,2.5,3.9,1.1,2.24,3.545454545454545,1 +5.7,2.8,4.5,1.3,2.035714285714286,3.461538461538461,1 +6.4,2.8,5.6,2.2,2.285714285714286,2.545454545454545,2 +4.7,3.2,1.6,0.2,1.46875,8.0,0 +6.1,3.0,4.9,1.8,2.033333333333333,2.7222222222222223,2 +5.0,3.4,1.6,0.4,1.4705882352941178,4.0,0 +6.4,2.8,5.6,2.1,2.285714285714286,2.6666666666666665,2 +7.9,3.8,6.4,2.0,2.0789473684210527,3.2,2 +6.7,3.0,5.2,2.3,2.2333333333333334,2.260869565217392,2 +6.7,2.5,5.8,1.8,2.68,3.222222222222222,2 +6.8,3.2,5.9,2.3,2.125,2.565217391304348,2 +4.8,3.0,1.4,0.3,1.6,4.666666666666667,0 +4.8,3.1,1.6,0.2,1.5483870967741935,8.0,0 +4.6,3.6,1.0,0.2,1.2777777777777777,5.0,0 +5.7,4.4,1.5,0.4,1.2954545454545454,3.75,0 diff --git a/arn:aws:s3:::dvc-projects-research-mrbestnaija1/01/05828cebc2a54ad9a65b6660f3209c b/arn:aws:s3:::dvc-projects-research-mrbestnaija1/01/05828cebc2a54ad9a65b6660f3209c new file mode 100644 index 00000000..b1a2771f --- /dev/null +++ b/arn:aws:s3:::dvc-projects-research-mrbestnaija1/01/05828cebc2a54ad9a65b6660f3209c @@ -0,0 +1,119 @@ +sepal_length,sepal_width,petal_length,petal_width,sepal_length_to_sepal_width,petal_length_to_petal_width,target +6.7,3.1,4.4,1.4,2.161290322580645,3.1428571428571432,1 +4.8,3.4,1.6,0.2,1.411764705882353,8.0,0 +4.4,3.2,1.3,0.2,1.375,6.5,0 +6.3,2.5,5.0,1.9,2.52,2.631578947368421,2 +6.4,3.2,4.5,1.5,2.0,3.0,1 +5.2,3.5,1.5,0.2,1.4857142857142858,7.5,0 +5.0,3.6,1.4,0.2,1.3888888888888888,6.999999999999999,0 +5.2,4.1,1.5,0.1,1.2682926829268295,15.0,0 +5.8,2.7,5.1,1.9,2.148148148148148,2.6842105263157894,2 +6.0,3.4,4.5,1.6,1.7647058823529411,2.8125,1 +6.7,3.1,4.7,1.5,2.161290322580645,3.1333333333333333,1 +5.4,3.9,1.3,0.4,1.3846153846153848,3.25,0 +5.4,3.7,1.5,0.2,1.4594594594594594,7.5,0 +5.5,2.4,3.7,1.0,2.291666666666667,3.7,1 +6.3,2.8,5.1,1.5,2.25,3.4,2 +6.4,3.1,5.5,1.8,2.064516129032258,3.055555555555556,2 +6.6,3.0,4.4,1.4,2.2,3.1428571428571432,1 +7.2,3.6,6.1,2.5,2.0,2.44,2 +5.7,2.9,4.2,1.3,1.9655172413793105,3.230769230769231,1 +7.6,3.0,6.6,2.1,2.533333333333333,3.1428571428571423,2 +5.6,3.0,4.5,1.5,1.8666666666666665,3.0,1 +5.1,3.5,1.4,0.2,1.457142857142857,6.999999999999999,0 +7.7,2.8,6.7,2.0,2.7500000000000004,3.35,2 +5.8,2.7,4.1,1.0,2.148148148148148,4.1,1 +5.2,3.4,1.4,0.2,1.5294117647058825,6.999999999999999,0 +5.0,3.5,1.3,0.3,1.4285714285714286,4.333333333333334,0 +5.1,3.8,1.9,0.4,1.3421052631578947,4.749999999999999,0 +5.0,2.0,3.5,1.0,2.5,3.5,1 +6.3,2.7,4.9,1.8,2.333333333333333,2.7222222222222223,2 +4.8,3.4,1.9,0.2,1.411764705882353,9.499999999999998,0 +5.0,3.0,1.6,0.2,1.6666666666666667,8.0,0 +5.1,3.3,1.7,0.5,1.5454545454545454,3.4,0 +5.6,2.7,4.2,1.3,2.074074074074074,3.230769230769231,1 +5.1,3.4,1.5,0.2,1.5,7.5,0 +5.7,3.0,4.2,1.2,1.9,3.5000000000000004,1 +7.7,3.8,6.7,2.2,2.0263157894736845,3.0454545454545454,2 +4.6,3.2,1.4,0.2,1.4374999999999998,6.999999999999999,0 +6.2,2.9,4.3,1.3,2.137931034482759,3.3076923076923075,1 +5.7,2.5,5.0,2.0,2.28,2.5,2 +5.5,4.2,1.4,0.2,1.3095238095238095,6.999999999999999,0 +6.0,3.0,4.8,1.8,2.0,2.6666666666666665,2 +5.8,2.7,5.1,1.9,2.148148148148148,2.6842105263157894,2 +6.0,2.2,4.0,1.0,2.727272727272727,4.0,1 +5.4,3.0,4.5,1.5,1.8,3.0,1 +6.2,3.4,5.4,2.3,1.823529411764706,2.347826086956522,2 +5.5,2.3,4.0,1.3,2.391304347826088,3.0769230769230766,1 +5.4,3.9,1.7,0.4,1.3846153846153848,4.25,0 +5.0,2.3,3.3,1.0,2.173913043478261,3.3,1 +6.4,2.7,5.3,1.9,2.3703703703703702,2.7894736842105265,2 +5.0,3.3,1.4,0.2,1.5151515151515151,6.999999999999999,0 +5.0,3.2,1.2,0.2,1.5625,5.999999999999999,0 +5.5,2.4,3.8,1.1,2.291666666666667,3.454545454545454,1 +6.7,3.0,5.0,1.7,2.2333333333333334,2.9411764705882355,1 +4.9,3.1,1.5,0.2,1.5806451612903227,7.5,0 +5.8,2.8,5.1,2.4,2.071428571428572,2.125,2 +5.0,3.4,1.5,0.2,1.4705882352941178,7.5,0 +5.0,3.5,1.6,0.6,1.4285714285714286,2.666666666666667,0 +5.9,3.2,4.8,1.8,1.84375,2.6666666666666665,1 +5.1,2.5,3.0,1.1,2.04,2.727272727272727,1 +6.9,3.2,5.7,2.3,2.15625,2.4782608695652177,2 +6.0,2.7,5.1,1.6,2.222222222222222,3.1875,1 +6.1,2.6,5.6,1.4,2.346153846153846,4.0,2 +7.7,3.0,6.1,2.3,2.566666666666667,2.6521739130434785,2 +5.5,2.5,4.0,1.3,2.2,3.0769230769230766,1 +4.4,2.9,1.4,0.2,1.517241379310345,6.999999999999999,0 +4.3,3.0,1.1,0.1,1.4333333333333331,11.0,0 +6.0,2.2,5.0,1.5,2.727272727272727,3.333333333333333,2 +7.2,3.2,6.0,1.8,2.25,3.333333333333333,2 +4.6,3.1,1.5,0.2,1.4838709677419353,7.5,0 +5.1,3.5,1.4,0.3,1.457142857142857,4.666666666666667,0 +4.4,3.0,1.3,0.2,1.4666666666666668,6.5,0 +6.3,2.5,4.9,1.5,2.52,3.266666666666667,1 +6.3,3.4,5.6,2.4,1.8529411764705883,2.333333333333333,2 +4.6,3.4,1.4,0.3,1.352941176470588,4.666666666666667,0 +6.8,3.0,5.5,2.1,2.2666666666666666,2.619047619047619,2 +6.3,3.3,6.0,2.5,1.9090909090909087,2.4,2 +4.7,3.2,1.3,0.2,1.46875,6.5,0 +6.1,2.9,4.7,1.4,2.103448275862069,3.3571428571428577,1 +6.5,2.8,4.6,1.5,2.321428571428572,3.0666666666666664,1 +6.2,2.8,4.8,1.8,2.2142857142857144,2.6666666666666665,2 +7.0,3.2,4.7,1.4,2.1875,3.3571428571428577,1 +6.4,3.2,5.3,2.3,2.0,2.3043478260869565,2 +5.1,3.8,1.6,0.2,1.3421052631578947,8.0,0 +6.9,3.1,5.4,2.1,2.2258064516129035,2.571428571428572,2 +5.9,3.0,4.2,1.5,1.9666666666666668,2.8000000000000003,1 +6.5,3.0,5.2,2.0,2.1666666666666665,2.6,2 +5.7,2.6,3.5,1.0,2.1923076923076925,3.5,1 +5.2,2.7,3.9,1.4,1.9259259259259256,2.785714285714286,1 +6.1,3.0,4.6,1.4,2.033333333333333,3.2857142857142856,1 +4.5,2.3,1.3,0.3,1.956521739130435,4.333333333333334,0 +6.6,2.9,4.6,1.3,2.2758620689655173,3.538461538461538,1 +5.5,2.6,4.4,1.2,2.1153846153846154,3.666666666666667,1 +5.3,3.7,1.5,0.2,1.4324324324324322,7.5,0 +5.6,3.0,4.1,1.3,1.8666666666666665,3.1538461538461533,1 +7.3,2.9,6.3,1.8,2.5172413793103448,3.5,2 +6.7,3.3,5.7,2.1,2.0303030303030303,2.7142857142857144,2 +5.1,3.7,1.5,0.4,1.3783783783783785,3.75,0 +4.9,2.4,3.3,1.0,2.041666666666667,3.3,1 +6.7,3.3,5.7,2.5,2.0303030303030303,2.28,2 +7.2,3.0,5.8,1.6,2.4,3.625,2 +4.9,3.6,1.4,0.1,1.3611111111111112,13.999999999999998,0 +6.7,3.1,5.6,2.4,2.161290322580645,2.333333333333333,2 +4.9,3.0,1.4,0.2,1.6333333333333335,6.999999999999999,0 +6.9,3.1,4.9,1.5,2.2258064516129035,3.266666666666667,1 +7.4,2.8,6.1,1.9,2.6428571428571432,3.210526315789473,2 +6.3,2.9,5.6,1.8,2.1724137931034484,3.1111111111111107,2 +5.7,2.8,4.1,1.3,2.035714285714286,3.1538461538461533,1 +6.5,3.0,5.5,1.8,2.1666666666666665,3.055555555555556,2 +6.3,2.3,4.4,1.3,2.739130434782609,3.3846153846153846,1 +6.4,2.9,4.3,1.3,2.206896551724138,3.3076923076923075,1 +5.6,2.8,4.9,2.0,2.0,2.45,2 +5.9,3.0,5.1,1.8,1.9666666666666668,2.833333333333333,2 +5.4,3.4,1.7,0.2,1.5882352941176472,8.5,0 +6.1,2.8,4.0,1.3,2.1785714285714284,3.0769230769230766,1 +4.9,2.5,4.5,1.7,1.96,2.647058823529412,2 +5.8,4.0,1.2,0.2,1.45,5.999999999999999,0 +5.8,2.6,4.0,1.2,2.230769230769231,3.333333333333333,1 +7.1,3.0,5.9,2.1,2.3666666666666667,2.8095238095238098,2 diff --git a/arn:aws:s3:::dvc-projects-research-mrbestnaija1/42/24576f0267bf88902f87f0f6200967 b/arn:aws:s3:::dvc-projects-research-mrbestnaija1/42/24576f0267bf88902f87f0f6200967 new file mode 100644 index 00000000..c6c10dc2 --- /dev/null +++ b/arn:aws:s3:::dvc-projects-research-mrbestnaija1/42/24576f0267bf88902f87f0f6200967 @@ -0,0 +1,151 @@ +sepal_length,sepal_width,petal_length,petal_width,target +5.1,3.5,1.4,0.2,0 +4.9,3.0,1.4,0.2,0 +4.7,3.2,1.3,0.2,0 +4.6,3.1,1.5,0.2,0 +5.0,3.6,1.4,0.2,0 +5.4,3.9,1.7,0.4,0 +4.6,3.4,1.4,0.3,0 +5.0,3.4,1.5,0.2,0 +4.4,2.9,1.4,0.2,0 +4.9,3.1,1.5,0.1,0 +5.4,3.7,1.5,0.2,0 +4.8,3.4,1.6,0.2,0 +4.8,3.0,1.4,0.1,0 +4.3,3.0,1.1,0.1,0 +5.8,4.0,1.2,0.2,0 +5.7,4.4,1.5,0.4,0 +5.4,3.9,1.3,0.4,0 +5.1,3.5,1.4,0.3,0 +5.7,3.8,1.7,0.3,0 +5.1,3.8,1.5,0.3,0 +5.4,3.4,1.7,0.2,0 +5.1,3.7,1.5,0.4,0 +4.6,3.6,1.0,0.2,0 +5.1,3.3,1.7,0.5,0 +4.8,3.4,1.9,0.2,0 +5.0,3.0,1.6,0.2,0 +5.0,3.4,1.6,0.4,0 +5.2,3.5,1.5,0.2,0 +5.2,3.4,1.4,0.2,0 +4.7,3.2,1.6,0.2,0 +4.8,3.1,1.6,0.2,0 +5.4,3.4,1.5,0.4,0 +5.2,4.1,1.5,0.1,0 +5.5,4.2,1.4,0.2,0 +4.9,3.1,1.5,0.2,0 +5.0,3.2,1.2,0.2,0 +5.5,3.5,1.3,0.2,0 +4.9,3.6,1.4,0.1,0 +4.4,3.0,1.3,0.2,0 +5.1,3.4,1.5,0.2,0 +5.0,3.5,1.3,0.3,0 +4.5,2.3,1.3,0.3,0 +4.4,3.2,1.3,0.2,0 +5.0,3.5,1.6,0.6,0 +5.1,3.8,1.9,0.4,0 +4.8,3.0,1.4,0.3,0 +5.1,3.8,1.6,0.2,0 +4.6,3.2,1.4,0.2,0 +5.3,3.7,1.5,0.2,0 +5.0,3.3,1.4,0.2,0 +7.0,3.2,4.7,1.4,1 +6.4,3.2,4.5,1.5,1 +6.9,3.1,4.9,1.5,1 +5.5,2.3,4.0,1.3,1 +6.5,2.8,4.6,1.5,1 +5.7,2.8,4.5,1.3,1 +6.3,3.3,4.7,1.6,1 +4.9,2.4,3.3,1.0,1 +6.6,2.9,4.6,1.3,1 +5.2,2.7,3.9,1.4,1 +5.0,2.0,3.5,1.0,1 +5.9,3.0,4.2,1.5,1 +6.0,2.2,4.0,1.0,1 +6.1,2.9,4.7,1.4,1 +5.6,2.9,3.6,1.3,1 +6.7,3.1,4.4,1.4,1 +5.6,3.0,4.5,1.5,1 +5.8,2.7,4.1,1.0,1 +6.2,2.2,4.5,1.5,1 +5.6,2.5,3.9,1.1,1 +5.9,3.2,4.8,1.8,1 +6.1,2.8,4.0,1.3,1 +6.3,2.5,4.9,1.5,1 +6.1,2.8,4.7,1.2,1 +6.4,2.9,4.3,1.3,1 +6.6,3.0,4.4,1.4,1 +6.8,2.8,4.8,1.4,1 +6.7,3.0,5.0,1.7,1 +6.0,2.9,4.5,1.5,1 +5.7,2.6,3.5,1.0,1 +5.5,2.4,3.8,1.1,1 +5.5,2.4,3.7,1.0,1 +5.8,2.7,3.9,1.2,1 +6.0,2.7,5.1,1.6,1 +5.4,3.0,4.5,1.5,1 +6.0,3.4,4.5,1.6,1 +6.7,3.1,4.7,1.5,1 +6.3,2.3,4.4,1.3,1 +5.6,3.0,4.1,1.3,1 +5.5,2.5,4.0,1.3,1 +5.5,2.6,4.4,1.2,1 +6.1,3.0,4.6,1.4,1 +5.8,2.6,4.0,1.2,1 +5.0,2.3,3.3,1.0,1 +5.6,2.7,4.2,1.3,1 +5.7,3.0,4.2,1.2,1 +5.7,2.9,4.2,1.3,1 +6.2,2.9,4.3,1.3,1 +5.1,2.5,3.0,1.1,1 +5.7,2.8,4.1,1.3,1 +6.3,3.3,6.0,2.5,2 +5.8,2.7,5.1,1.9,2 +7.1,3.0,5.9,2.1,2 +6.3,2.9,5.6,1.8,2 +6.5,3.0,5.8,2.2,2 +7.6,3.0,6.6,2.1,2 +4.9,2.5,4.5,1.7,2 +7.3,2.9,6.3,1.8,2 +6.7,2.5,5.8,1.8,2 +7.2,3.6,6.1,2.5,2 +6.5,3.2,5.1,2.0,2 +6.4,2.7,5.3,1.9,2 +6.8,3.0,5.5,2.1,2 +5.7,2.5,5.0,2.0,2 +5.8,2.8,5.1,2.4,2 +6.4,3.2,5.3,2.3,2 +6.5,3.0,5.5,1.8,2 +7.7,3.8,6.7,2.2,2 +7.7,2.6,6.9,2.3,2 +6.0,2.2,5.0,1.5,2 +6.9,3.2,5.7,2.3,2 +5.6,2.8,4.9,2.0,2 +7.7,2.8,6.7,2.0,2 +6.3,2.7,4.9,1.8,2 +6.7,3.3,5.7,2.1,2 +7.2,3.2,6.0,1.8,2 +6.2,2.8,4.8,1.8,2 +6.1,3.0,4.9,1.8,2 +6.4,2.8,5.6,2.1,2 +7.2,3.0,5.8,1.6,2 +7.4,2.8,6.1,1.9,2 +7.9,3.8,6.4,2.0,2 +6.4,2.8,5.6,2.2,2 +6.3,2.8,5.1,1.5,2 +6.1,2.6,5.6,1.4,2 +7.7,3.0,6.1,2.3,2 +6.3,3.4,5.6,2.4,2 +6.4,3.1,5.5,1.8,2 +6.0,3.0,4.8,1.8,2 +6.9,3.1,5.4,2.1,2 +6.7,3.1,5.6,2.4,2 +6.9,3.1,5.1,2.3,2 +5.8,2.7,5.1,1.9,2 +6.8,3.2,5.9,2.3,2 +6.7,3.3,5.7,2.5,2 +6.7,3.0,5.2,2.3,2 +6.3,2.5,5.0,1.9,2 +6.5,3.0,5.2,2.0,2 +6.2,3.4,5.4,2.3,2 +5.9,3.0,5.1,1.8,2 diff --git a/arn:aws:s3:::dvc-projects-research-mrbestnaija1/5d/03a1564b3038fc35a842f8e4bde491 b/arn:aws:s3:::dvc-projects-research-mrbestnaija1/5d/03a1564b3038fc35a842f8e4bde491 new file mode 100644 index 00000000..1f34b3b5 --- /dev/null +++ b/arn:aws:s3:::dvc-projects-research-mrbestnaija1/5d/03a1564b3038fc35a842f8e4bde491 @@ -0,0 +1,151 @@ +sepal_length,sepal_width,petal_length,petal_width,sepal_length_to_sepal_width,petal_length_to_petal_width,target +5.1,3.5,1.4,0.2,1.457142857142857,6.999999999999999,0 +4.9,3.0,1.4,0.2,1.6333333333333335,6.999999999999999,0 +4.7,3.2,1.3,0.2,1.46875,6.5,0 +4.6,3.1,1.5,0.2,1.4838709677419353,7.5,0 +5.0,3.6,1.4,0.2,1.3888888888888888,6.999999999999999,0 +5.4,3.9,1.7,0.4,1.3846153846153848,4.25,0 +4.6,3.4,1.4,0.3,1.352941176470588,4.666666666666667,0 +5.0,3.4,1.5,0.2,1.4705882352941178,7.5,0 +4.4,2.9,1.4,0.2,1.517241379310345,6.999999999999999,0 +4.9,3.1,1.5,0.1,1.5806451612903227,15.0,0 +5.4,3.7,1.5,0.2,1.4594594594594594,7.5,0 +4.8,3.4,1.6,0.2,1.411764705882353,8.0,0 +4.8,3.0,1.4,0.1,1.5999999999999999,13.999999999999998,0 +4.3,3.0,1.1,0.1,1.4333333333333333,11.0,0 +5.8,4.0,1.2,0.2,1.45,5.999999999999999,0 +5.7,4.4,1.5,0.4,1.2954545454545454,3.75,0 +5.4,3.9,1.3,0.4,1.3846153846153848,3.25,0 +5.1,3.5,1.4,0.3,1.457142857142857,4.666666666666667,0 +5.7,3.8,1.7,0.3,1.5000000000000002,5.666666666666667,0 +5.1,3.8,1.5,0.3,1.3421052631578947,5.0,0 +5.4,3.4,1.7,0.2,1.5882352941176472,8.5,0 +5.1,3.7,1.5,0.4,1.3783783783783783,3.75,0 +4.6,3.6,1.0,0.2,1.2777777777777777,5.0,0 +5.1,3.3,1.7,0.5,1.5454545454545454,3.4,0 +4.8,3.4,1.9,0.2,1.411764705882353,9.499999999999998,0 +5.0,3.0,1.6,0.2,1.6666666666666667,8.0,0 +5.0,3.4,1.6,0.4,1.4705882352941178,4.0,0 +5.2,3.5,1.5,0.2,1.4857142857142858,7.5,0 +5.2,3.4,1.4,0.2,1.5294117647058825,6.999999999999999,0 +4.7,3.2,1.6,0.2,1.46875,8.0,0 +4.8,3.1,1.6,0.2,1.5483870967741935,8.0,0 +5.4,3.4,1.5,0.4,1.5882352941176472,3.75,0 +5.2,4.1,1.5,0.1,1.2682926829268295,15.0,0 +5.5,4.2,1.4,0.2,1.3095238095238095,6.999999999999999,0 +4.9,3.1,1.5,0.2,1.5806451612903227,7.5,0 +5.0,3.2,1.2,0.2,1.5625,5.999999999999999,0 +5.5,3.5,1.3,0.2,1.5714285714285714,6.5,0 +4.9,3.6,1.4,0.1,1.3611111111111112,13.999999999999998,0 +4.4,3.0,1.3,0.2,1.4666666666666668,6.5,0 +5.1,3.4,1.5,0.2,1.5,7.5,0 +5.0,3.5,1.3,0.3,1.4285714285714286,4.333333333333334,0 +4.5,2.3,1.3,0.3,1.956521739130435,4.333333333333334,0 +4.4,3.2,1.3,0.2,1.375,6.5,0 +5.0,3.5,1.6,0.6,1.4285714285714286,2.666666666666667,0 +5.1,3.8,1.9,0.4,1.3421052631578947,4.749999999999999,0 +4.8,3.0,1.4,0.3,1.5999999999999999,4.666666666666667,0 +5.1,3.8,1.6,0.2,1.3421052631578947,8.0,0 +4.6,3.2,1.4,0.2,1.4374999999999998,6.999999999999999,0 +5.3,3.7,1.5,0.2,1.4324324324324322,7.5,0 +5.0,3.3,1.4,0.2,1.5151515151515151,6.999999999999999,0 +7.0,3.2,4.7,1.4,2.1875,3.3571428571428577,1 +6.4,3.2,4.5,1.5,2.0,3.0,1 +6.9,3.1,4.9,1.5,2.2258064516129035,3.266666666666667,1 +5.5,2.3,4.0,1.3,2.3913043478260874,3.0769230769230766,1 +6.5,2.8,4.6,1.5,2.3214285714285716,3.0666666666666664,1 +5.7,2.8,4.5,1.3,2.035714285714286,3.4615384615384612,1 +6.3,3.3,4.7,1.6,1.9090909090909092,2.9375,1 +4.9,2.4,3.3,1.0,2.041666666666667,3.3,1 +6.6,2.9,4.6,1.3,2.2758620689655173,3.538461538461538,1 +5.2,2.7,3.9,1.4,1.9259259259259258,2.785714285714286,1 +5.0,2.0,3.5,1.0,2.5,3.5,1 +5.9,3.0,4.2,1.5,1.9666666666666668,2.8000000000000003,1 +6.0,2.2,4.0,1.0,2.727272727272727,4.0,1 +6.1,2.9,4.7,1.4,2.103448275862069,3.3571428571428577,1 +5.6,2.9,3.6,1.3,1.9310344827586206,2.769230769230769,1 +6.7,3.1,4.4,1.4,2.161290322580645,3.1428571428571432,1 +5.6,3.0,4.5,1.5,1.8666666666666665,3.0,1 +5.8,2.7,4.1,1.0,2.148148148148148,4.1,1 +6.2,2.2,4.5,1.5,2.818181818181818,3.0,1 +5.6,2.5,3.9,1.1,2.2399999999999998,3.545454545454545,1 +5.9,3.2,4.8,1.8,1.84375,2.6666666666666665,1 +6.1,2.8,4.0,1.3,2.1785714285714284,3.0769230769230766,1 +6.3,2.5,4.9,1.5,2.52,3.266666666666667,1 +6.1,2.8,4.7,1.2,2.1785714285714284,3.916666666666667,1 +6.4,2.9,4.3,1.3,2.206896551724138,3.3076923076923075,1 +6.6,3.0,4.4,1.4,2.1999999999999997,3.1428571428571432,1 +6.8,2.8,4.8,1.4,2.428571428571429,3.428571428571429,1 +6.7,3.0,5.0,1.7,2.2333333333333334,2.9411764705882355,1 +6.0,2.9,4.5,1.5,2.0689655172413794,3.0,1 +5.7,2.6,3.5,1.0,2.1923076923076925,3.5,1 +5.5,2.4,3.8,1.1,2.291666666666667,3.454545454545454,1 +5.5,2.4,3.7,1.0,2.291666666666667,3.7,1 +5.8,2.7,3.9,1.2,2.148148148148148,3.25,1 +6.0,2.7,5.1,1.6,2.222222222222222,3.1874999999999996,1 +5.4,3.0,4.5,1.5,1.8,3.0,1 +6.0,3.4,4.5,1.6,1.7647058823529411,2.8125,1 +6.7,3.1,4.7,1.5,2.161290322580645,3.1333333333333333,1 +6.3,2.3,4.4,1.3,2.739130434782609,3.3846153846153846,1 +5.6,3.0,4.1,1.3,1.8666666666666665,3.1538461538461533,1 +5.5,2.5,4.0,1.3,2.2,3.0769230769230766,1 +5.5,2.6,4.4,1.2,2.1153846153846154,3.666666666666667,1 +6.1,3.0,4.6,1.4,2.033333333333333,3.2857142857142856,1 +5.8,2.6,4.0,1.2,2.230769230769231,3.3333333333333335,1 +5.0,2.3,3.3,1.0,2.173913043478261,3.3,1 +5.6,2.7,4.2,1.3,2.074074074074074,3.230769230769231,1 +5.7,3.0,4.2,1.2,1.9000000000000001,3.5000000000000004,1 +5.7,2.9,4.2,1.3,1.9655172413793105,3.230769230769231,1 +6.2,2.9,4.3,1.3,2.137931034482759,3.3076923076923075,1 +5.1,2.5,3.0,1.1,2.04,2.727272727272727,1 +5.7,2.8,4.1,1.3,2.035714285714286,3.1538461538461533,1 +6.3,3.3,6.0,2.5,1.9090909090909092,2.4,2 +5.8,2.7,5.1,1.9,2.148148148148148,2.6842105263157894,2 +7.1,3.0,5.9,2.1,2.3666666666666667,2.8095238095238098,2 +6.3,2.9,5.6,1.8,2.1724137931034484,3.1111111111111107,2 +6.5,3.0,5.8,2.2,2.1666666666666665,2.6363636363636362,2 +7.6,3.0,6.6,2.1,2.533333333333333,3.1428571428571423,2 +4.9,2.5,4.5,1.7,1.9600000000000002,2.6470588235294117,2 +7.3,2.9,6.3,1.8,2.5172413793103448,3.5,2 +6.7,2.5,5.8,1.8,2.68,3.222222222222222,2 +7.2,3.6,6.1,2.5,2.0,2.44,2 +6.5,3.2,5.1,2.0,2.03125,2.55,2 +6.4,2.7,5.3,1.9,2.3703703703703702,2.7894736842105265,2 +6.8,3.0,5.5,2.1,2.2666666666666666,2.619047619047619,2 +5.7,2.5,5.0,2.0,2.2800000000000002,2.5,2 +5.8,2.8,5.1,2.4,2.0714285714285716,2.125,2 +6.4,3.2,5.3,2.3,2.0,2.3043478260869565,2 +6.5,3.0,5.5,1.8,2.1666666666666665,3.0555555555555554,2 +7.7,3.8,6.7,2.2,2.0263157894736845,3.0454545454545454,2 +7.7,2.6,6.9,2.3,2.9615384615384617,3.0000000000000004,2 +6.0,2.2,5.0,1.5,2.727272727272727,3.3333333333333335,2 +6.9,3.2,5.7,2.3,2.15625,2.4782608695652177,2 +5.6,2.8,4.9,2.0,2.0,2.45,2 +7.7,2.8,6.7,2.0,2.7500000000000004,3.35,2 +6.3,2.7,4.9,1.8,2.333333333333333,2.7222222222222223,2 +6.7,3.3,5.7,2.1,2.0303030303030303,2.7142857142857144,2 +7.2,3.2,6.0,1.8,2.25,3.333333333333333,2 +6.2,2.8,4.8,1.8,2.2142857142857144,2.6666666666666665,2 +6.1,3.0,4.9,1.8,2.033333333333333,2.7222222222222223,2 +6.4,2.8,5.6,2.1,2.285714285714286,2.6666666666666665,2 +7.2,3.0,5.8,1.6,2.4,3.6249999999999996,2 +7.4,2.8,6.1,1.9,2.6428571428571432,3.2105263157894735,2 +7.9,3.8,6.4,2.0,2.0789473684210527,3.2,2 +6.4,2.8,5.6,2.2,2.285714285714286,2.545454545454545,2 +6.3,2.8,5.1,1.5,2.25,3.4,2 +6.1,2.6,5.6,1.4,2.346153846153846,4.0,2 +7.7,3.0,6.1,2.3,2.566666666666667,2.6521739130434785,2 +6.3,3.4,5.6,2.4,1.8529411764705883,2.3333333333333335,2 +6.4,3.1,5.5,1.8,2.064516129032258,3.0555555555555554,2 +6.0,3.0,4.8,1.8,2.0,2.6666666666666665,2 +6.9,3.1,5.4,2.1,2.2258064516129035,2.5714285714285716,2 +6.7,3.1,5.6,2.4,2.161290322580645,2.3333333333333335,2 +6.9,3.1,5.1,2.3,2.2258064516129035,2.217391304347826,2 +5.8,2.7,5.1,1.9,2.148148148148148,2.6842105263157894,2 +6.8,3.2,5.9,2.3,2.125,2.565217391304348,2 +6.7,3.3,5.7,2.5,2.0303030303030303,2.2800000000000002,2 +6.7,3.0,5.2,2.3,2.2333333333333334,2.2608695652173916,2 +6.3,2.5,5.0,1.9,2.52,2.6315789473684212,2 +6.5,3.0,5.2,2.0,2.1666666666666665,2.6,2 +6.2,3.4,5.4,2.3,1.823529411764706,2.347826086956522,2 +5.9,3.0,5.1,1.8,1.9666666666666668,2.833333333333333,2 diff --git a/arn:aws:s3:::dvc-projects-research-mrbestnaija1/70/44beeffac0f67cc0401146b26e8f3e b/arn:aws:s3:::dvc-projects-research-mrbestnaija1/70/44beeffac0f67cc0401146b26e8f3e new file mode 100644 index 00000000..1a3ad11a Binary files /dev/null and b/arn:aws:s3:::dvc-projects-research-mrbestnaija1/70/44beeffac0f67cc0401146b26e8f3e differ diff --git a/arn:aws:s3:::dvc-projects-research-mrbestnaija1/d4/1d8cd98f00b204e9800998ecf8427e b/arn:aws:s3:::dvc-projects-research-mrbestnaija1/d4/1d8cd98f00b204e9800998ecf8427e new file mode 100644 index 00000000..e69de29b diff --git a/arn:aws:s3:::dvc-projects-research-mrbestnaija1/d5/33847a0ca14ca93752b1b1f1df349e b/arn:aws:s3:::dvc-projects-research-mrbestnaija1/d5/33847a0ca14ca93752b1b1f1df349e new file mode 100644 index 00000000..0b4a08e1 --- /dev/null +++ b/arn:aws:s3:::dvc-projects-research-mrbestnaija1/d5/33847a0ca14ca93752b1b1f1df349e @@ -0,0 +1 @@ +{"f1_score": 0.9305555555555555} \ No newline at end of file diff --git a/arn:aws:s3:::dvc-projects-research-mrbestnaija1/dd/ede7ba843927234678d5ec8d4f9f99.dir b/arn:aws:s3:::dvc-projects-research-mrbestnaija1/dd/ede7ba843927234678d5ec8d4f9f99.dir new file mode 100644 index 00000000..6ed4e592 --- /dev/null +++ b/arn:aws:s3:::dvc-projects-research-mrbestnaija1/dd/ede7ba843927234678d5ec8d4f9f99.dir @@ -0,0 +1 @@ +[{"md5": "d41d8cd98f00b204e9800998ecf8427e", "relpath": "data1.csv"}, {"md5": "d41d8cd98f00b204e9800998ecf8427e", "relpath": "data2.csv"}, {"md5": "d41d8cd98f00b204e9800998ecf8427e", "relpath": "data3.csv"}] \ No newline at end of file diff --git a/arn:aws:s3:::dvc-projects-research-mrbestnaija1/de/44a28d8aa5da6d35dc3778e613449d b/arn:aws:s3:::dvc-projects-research-mrbestnaija1/de/44a28d8aa5da6d35dc3778e613449d new file mode 100644 index 00000000..95234fa4 Binary files /dev/null and b/arn:aws:s3:::dvc-projects-research-mrbestnaija1/de/44a28d8aa5da6d35dc3778e613449d differ diff --git a/arn:aws:s3:::dvc-projects-research-mrbestnaija1/f9/53ee125de2bd311a3f846acfac349c b/arn:aws:s3:::dvc-projects-research-mrbestnaija1/f9/53ee125de2bd311a3f846acfac349c new file mode 100644 index 00000000..548c0a05 --- /dev/null +++ b/arn:aws:s3:::dvc-projects-research-mrbestnaija1/f9/53ee125de2bd311a3f846acfac349c @@ -0,0 +1,33 @@ +sepal_length,sepal_width,petal_length,petal_width,sepal_length_to_sepal_width,petal_length_to_petal_width,target +6.1,2.8,4.7,1.2,2.1785714285714284,3.916666666666667,1 +5.7,3.8,1.7,0.3,1.5000000000000002,5.666666666666667,0 +7.7,2.6,6.9,2.3,2.9615384615384617,3.0000000000000004,2 +6.0,2.9,4.5,1.5,2.0689655172413794,3.0,1 +6.8,2.8,4.8,1.4,2.428571428571429,3.428571428571429,1 +5.4,3.4,1.5,0.4,1.5882352941176472,3.75,0 +5.6,2.9,3.6,1.3,1.9310344827586208,2.769230769230769,1 +6.9,3.1,5.1,2.3,2.2258064516129035,2.217391304347826,2 +6.2,2.2,4.5,1.5,2.818181818181818,3.0,1 +5.8,2.7,3.9,1.2,2.148148148148148,3.25,1 +6.5,3.2,5.1,2.0,2.03125,2.55,2 +4.8,3.0,1.4,0.1,1.6,13.999999999999998,0 +5.5,3.5,1.3,0.2,1.5714285714285714,6.5,0 +4.9,3.1,1.5,0.1,1.5806451612903227,15.0,0 +5.1,3.8,1.5,0.3,1.3421052631578947,5.0,0 +6.3,3.3,4.7,1.6,1.9090909090909087,2.9375,1 +6.5,3.0,5.8,2.2,2.1666666666666665,2.636363636363636,2 +5.6,2.5,3.9,1.1,2.24,3.545454545454545,1 +5.7,2.8,4.5,1.3,2.035714285714286,3.461538461538461,1 +6.4,2.8,5.6,2.2,2.285714285714286,2.545454545454545,2 +4.7,3.2,1.6,0.2,1.46875,8.0,0 +6.1,3.0,4.9,1.8,2.033333333333333,2.7222222222222223,2 +5.0,3.4,1.6,0.4,1.4705882352941178,4.0,0 +6.4,2.8,5.6,2.1,2.285714285714286,2.6666666666666665,2 +7.9,3.8,6.4,2.0,2.0789473684210527,3.2,2 +6.7,3.0,5.2,2.3,2.2333333333333334,2.260869565217392,2 +6.7,2.5,5.8,1.8,2.68,3.222222222222222,2 +6.8,3.2,5.9,2.3,2.125,2.565217391304348,2 +4.8,3.0,1.4,0.3,1.6,4.666666666666667,0 +4.8,3.1,1.6,0.2,1.5483870967741935,8.0,0 +4.6,3.6,1.0,0.2,1.2777777777777777,5.0,0 +5.7,4.4,1.5,0.4,1.2954545454545454,3.75,0 diff --git a/datadir.dvc b/datadir.dvc new file mode 100644 index 00000000..44f53dd4 --- /dev/null +++ b/datadir.dvc @@ -0,0 +1,5 @@ +outs: +- md5: ddede7ba843927234678d5ec8d4f9f99.dir + size: 0 + nfiles: 3 + path: datadir diff --git a/dvc.lock b/dvc.lock new file mode 100644 index 00000000..6f876095 --- /dev/null +++ b/dvc.lock @@ -0,0 +1,152 @@ +schema: '2.0' +stages: + data_load: + cmd: python src/stages/data_load.py --config=params.yaml + deps: + - path: src/stages/data_load.py + md5: 7e8c530e135da91b31ed742d95b7288c + size: 1084 + params: + params.yaml: + base: + random_state: 42 + log_level: INFO + data_load: + dataset_csv: data/raw/iris.csv + outs: + - path: data/raw/iris.csv + md5: 4224576f0267bf88902f87f0f6200967 + size: 2757 + isexec: true + featurize: + cmd: python src/stages/featurize.py --config=params.yaml + deps: + - path: data/raw/iris.csv + md5: 4224576f0267bf88902f87f0f6200967 + size: 2757 + - path: src/stages/featurize.py + md5: d1cc78e9ae6c9a43099cf2b43e377975 + size: 1395 + params: + params.yaml: + base: + random_state: 42 + log_level: INFO + featurize: + features_path: data/processed/featured_iris.csv + target_column: target + outs: + - path: data/processed/featured_iris.csv + md5: 5d03a1564b3038fc35a842f8e4bde491 + size: 7260 + isexec: true + data_split: + cmd: python src/stages/data_split.py --config=params.yaml + deps: + - path: data/processed/featured_iris.csv + md5: 5d03a1564b3038fc35a842f8e4bde491 + size: 7260 + - path: src/stages/data_split.py + md5: 146a803b3261f01f798da85b49cfe00e + size: 1401 + params: + params.yaml: + base: + random_state: 42 + log_level: INFO + data_split: + test_size: 0.21 + trainset_path: data/processed/train_iris.csv + testset_path: data/processed/test_iris.csv + featurize: + features_path: data/processed/featured_iris.csv + target_column: target + outs: + - path: data/processed/test_iris.csv + md5: f953ee125de2bd311a3f846acfac349c + size: 1575 + isexec: true + - path: data/processed/train_iris.csv + md5: 0105828cebc2a54ad9a65b6660f3209c + size: 5641 + isexec: true + train: + cmd: python src/stages/train.py --config=params.yaml + deps: + - path: data/processed/test_iris.csv + md5: f953ee125de2bd311a3f846acfac349c + size: 1575 + - path: data/processed/train_iris.csv + md5: 0105828cebc2a54ad9a65b6660f3209c + size: 5641 + - path: src/stages/train.py + md5: e755fbd9d95efacf4ded17ebc3c93dc2 + size: 1564 + params: + params.yaml: + base: + random_state: 42 + log_level: INFO + train: + cv: 3 + estimator_name: logreg + estimators: + logreg: + param_grid: + C: + - 0.001 + max_iter: + - 100 + solver: + - lbfgs + multi_class: + - multinomial + svm: + param_grid: + C: + - 0.1 + - 1.0 + kernel: + - rbf + - linear + gamma: + - scale + degree: + - 3 + - 5 + model_path: models/model.joblib + outs: + - path: models/model.joblib + md5: de44a28d8aa5da6d35dc3778e613449d + size: 2883 + isexec: true + evaluate: + cmd: python src/stages/evaluate.py --config=params.yaml + deps: + - path: data/processed/test_iris.csv + md5: f953ee125de2bd311a3f846acfac349c + size: 1575 + - path: models/model.joblib + md5: de44a28d8aa5da6d35dc3778e613449d + size: 2883 + - path: src/stages/evaluate.py + md5: eab9636bc1bf222815f1941a3abfc99e + size: 2492 + params: + params.yaml: + base: + random_state: 42 + log_level: INFO + evaluate: + reports_dir: reports + metrics_file: metrics.json + confusion_matrix_image: confusion_matrix.png + outs: + - path: reports/confusion_matrix.png + md5: 7044beeffac0f67cc0401146b26e8f3e + size: 25286 + isexec: true + - path: reports/metrics.json + md5: d533847a0ca14ca93752b1b1f1df349e + size: 32 + isexec: true diff --git a/dvc.yaml b/dvc.yaml new file mode 100644 index 00000000..68c96bff --- /dev/null +++ b/dvc.yaml @@ -0,0 +1,68 @@ +# DAG of all the stages in the pipeline +stages: +# The first stage of the pipeline + data_load: + cmd: python src/stages/data_load.py --config=params.yaml + deps: + - src/stages/data_load.py + params: + - base + - data_load + outs: + - data/raw/iris.csv +# The second stage of the pipeline + featurize: + cmd: python src/stages/featurize.py --config=params.yaml + deps: + - data/raw/iris.csv + - src/stages/featurize.py + params: + - base + - featurize + outs: + - data/processed/featured_iris.csv +# The third stage of the pipeline + data_split: + cmd: python src/stages/data_split.py --config=params.yaml + deps: + - data/processed/featured_iris.csv + - src/stages/data_split.py + params: + - base + - data_split + - featurize + outs: + - data/processed/test_iris.csv + - data/processed/train_iris.csv +# The fourth stage of the pipeline + train: + cmd: python src/stages/train.py --config=params.yaml + deps: + - data/processed/test_iris.csv + - data/processed/train_iris.csv + - src/stages/train.py + params: + - base + - train + outs: + - models/model.joblib +# The fifth stage of the pipeline + evaluate: + cmd: python src/stages/evaluate.py --config=params.yaml + deps: + - models/model.joblib + - data/processed/test_iris.csv + - src/stages/evaluate.py + + params: + - base + - evaluate + + outs: + - 'reports/confusion_matrix.png' + + + metrics: + - reports/metrics.json: + + cache: false diff --git a/env/.gitignore b/env/.gitignore new file mode 100644 index 00000000..3797191b --- /dev/null +++ b/env/.gitignore @@ -0,0 +1,36 @@ +# Compiled source # +################### +*.com +*.class +*.dll +*.exe +*.o +*.so + +# Packages # +############ +# it's better to unpack these files and commit the raw source +# git has its own built in compression methods +*.7z +*.dmg +*.gz +*.iso +*.jar +*.rar +*.tar +*.zip + +# Logs and databases # +###################### +*.log +*.sql +*.sqlite + +# OS generated files # +###################### +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db \ No newline at end of file diff --git a/env/my_key_github_ssh b/env/my_key_github_ssh new file mode 100644 index 00000000..005b8a39 --- /dev/null +++ b/env/my_key_github_ssh @@ -0,0 +1,49 @@ +-----BEGIN OPENSSH PRIVATE KEY----- +b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAACFwAAAAdzc2gtcn +NhAAAAAwEAAQAAAgEAgzkPerIrdLyUg1rcnn/1nvis1b3v2X+2sA9Z9G/AAvFl6biImgRN +dEWYnB7llZwhcv61GTVZZdRXqrtmEOM6+eI4DxIQIxtOVLn4/8DzxoB7ghSY6uihRu0wIg +TWqUq6nUCrs+v8trNPMt5qASxHBAd+eGlSnxqOCSTb6CqS7Tf3GsRY34iv1nczCDONCAa1 +sDGTLRXVF8ztzZs86HU3JmJobMT65MI78qq5PjEA9Xuq4Y0faSHhOrnZ1QvXBGTHtDub39 +cvrvxlRbS/VBPHkW8XMM5kkaJUhKY0aL7CdA1sEDtdzWPjcUxGk9VVyYvwVO/NkDtDFMBn +MK73VdANOhp0pCIqcDFOIvNP12Is3cRKbfbMgvmZhBIni2h/Ushr1fX3xbOgOn9sOe+CJS +whSu6kS2njzgtxuoYP8Kn4TnC1HEOYrHEG9AO+WhQxGhjLOL9ecgARMdIb/Dt3FionOsHO +Zcp/AujmBUyvDObLZFv2U3iNADzyPGf89pYXaz6oXGFZ7SiULKq9ouzvqVfPoHbzW0PRPV +cYQOUjTNNLl5ReAtptWwF+fp0l4JWI6AkioOGNb0QGb3o8z+XoOnotRi1mD/1gtUcDkTF4 +3or9vp2ejCCwxgaTulnrQcP22cWHo5onp5YqOWJCP9QVi4tX9KWbsOiy7QDj+DjFw1HzsD +MAAAdIuyE9L7shPS8AAAAHc3NoLXJzYQAAAgEAgzkPerIrdLyUg1rcnn/1nvis1b3v2X+2 +sA9Z9G/AAvFl6biImgRNdEWYnB7llZwhcv61GTVZZdRXqrtmEOM6+eI4DxIQIxtOVLn4/8 +DzxoB7ghSY6uihRu0wIgTWqUq6nUCrs+v8trNPMt5qASxHBAd+eGlSnxqOCSTb6CqS7Tf3 +GsRY34iv1nczCDONCAa1sDGTLRXVF8ztzZs86HU3JmJobMT65MI78qq5PjEA9Xuq4Y0faS +HhOrnZ1QvXBGTHtDub39cvrvxlRbS/VBPHkW8XMM5kkaJUhKY0aL7CdA1sEDtdzWPjcUxG +k9VVyYvwVO/NkDtDFMBnMK73VdANOhp0pCIqcDFOIvNP12Is3cRKbfbMgvmZhBIni2h/Us +hr1fX3xbOgOn9sOe+CJSwhSu6kS2njzgtxuoYP8Kn4TnC1HEOYrHEG9AO+WhQxGhjLOL9e +cgARMdIb/Dt3FionOsHOZcp/AujmBUyvDObLZFv2U3iNADzyPGf89pYXaz6oXGFZ7SiULK +q9ouzvqVfPoHbzW0PRPVcYQOUjTNNLl5ReAtptWwF+fp0l4JWI6AkioOGNb0QGb3o8z+Xo +OnotRi1mD/1gtUcDkTF43or9vp2ejCCwxgaTulnrQcP22cWHo5onp5YqOWJCP9QVi4tX9K +WbsOiy7QDj+DjFw1HzsDMAAAADAQABAAACAAQYkjN0lPIA4kCIuTLisEpfssK9OgZiUSkv +ttFwHOCP+Ra8NtoFNlxhwRYRH34gWymZame4sutVyc1qKhEZjAFkHqHXINUns2CQ/tpk2H +civY2IPQ8KvItX41UR15goh/4D7cLLo/Nup2iMFBJTTSgtX7TnJ2K7U8xUmon7AjkQsnrj +FBnDd3xMnjWBqRj4tKUk9wDE71uIfwfC8mTDJo82f9HyrW4vOJXQ3xViNt3IGBkQQaG+Iq +dQSSINoOQ3N8XtcmZ182a/5B1dO0z8N7zXaImSd7CADlP8Us3OM92m1EUGxV0gOBI/nO2y +ZAogKcaiRvAF7t3FSl30/YLFNWnGcQuBuQp5b4jbTkvEg1AEgUlvGzHdVZdXmzCc5VgvRu +5NwW64AEjxZTVNuJ4PBFlYJbghu86Euz5druBMtSb25vuC/YGk4DIGUnSFImJS75d2lHCv +mlXrhgPXxsDVx1nWHz5qgZ19+VAzXUTNwGAxjGjGSG06d8baQ7jyQgjWALmgUZvuDnuNlY +OY0fOIGKlqkyiK75noVNa/qURknG/ermrgCJlZOc0dsZErcfm4Uxsii9JxneGuIVheA0il +25pd11pRCqt2VHhLBkdPopX0oyzNzAdDJ7/Bn3uuZwyRYjPCc9SU5Ue2Y4K7vzyWU4/CVu +vDpZAvs0gzi8up2XYZAAABAEeMx+VeWLE4jVdIbXUp9jRgrMTChad4xPRvuon8vUq6nm2U +aId6LesvISKwL3+jLlnJLZbjh2wSVh+tkuruuNQstHXbrvWGkNkB0A209kj45fdwepyfCq +cQkxnztiHvM0VXVKbreu5plP4Zfn2HuptnxsQce7BQmPqPaKyogdGBQsCdSqe+MeNA4k4R +1wYsMm557lDXyqhQlswhzwMzcAEncaEBvCVHIacNszMEiDFwY/Jt99nIl9IqEOeE82w0NZ +MJc0WMk3wpGOncyfj34KeeR3SGm147Dw7bbZnL9zwLiU/WSp6MqWGvK2iShCZb8klSsSku +YD2nFvpKuuESdlkAAAEBALZCeIx9dU3Q9LbuM2+JdYi79m++tL9LCNPy1pTzm59fmoIlAa +NhNiukvTvSxX9vAZxFM65/A5x1OpZs4+sci7EyHszTZNen02IN5//E2gUU5oUlUjenVPce +CbyWXrFBYIVKt55VdMavOwvMunHMyo1MIzbPLYGAcRV/MpE5RhXHTYKsEnfet0SM6he4E+ +ecKTTzxt56HSU1osSe7DFwrqmqLkixQrq7/eckNJ1YCNrQY3k5tJ2YCy3dqA7FGY9o47lz +wm3isdWsEzYlXeHagy8NrtIv/n12vuyItdBeA75kEl4IRN+EzPCvKFaLM8q4WdtAWwOuqR +E41ggcywz4CA8AAAEBALhQdhLVSwDwJmnfW5j8Bm+JN/KZDRu2+WdaWMtKrEdRXQemUpab +a7wrw5VJCN8T75L6kXsUACLDNsdPikYfa6RcxRHcmhqJ8E2zQp5iOSd0HM/ShARFTB/0I+ +lMrg/L6ac1kNZE1iRdpVXEDutQs4NA5zZJYGgj/7XIUwSJVBVev2YYfH/vXXJneFe2xJxv +gwiHGCjA2V9iq5RwWQ41SThWnOhmMv7TZV4yM9TB5DDYizTQv28eg3/CqkcO8I12eeHmBO +2vQa4rZ9piROUiswpPcW/v5gwi1UuxDnKmhaHYC/WinTsl08T5nCanStWA7xtyjq4Ubf4r +CqSgOa4WUZ0AAAASZXpla3d1NzdAZ21haWwuY29tAQ== +-----END OPENSSH PRIVATE KEY----- diff --git a/file_data.txt.dvc b/file_data.txt.dvc new file mode 100644 index 00000000..b125fafc --- /dev/null +++ b/file_data.txt.dvc @@ -0,0 +1,5 @@ +outs: +- md5: d41d8cd98f00b204e9800998ecf8427e + size: 0 + path: file_data.txt + isexec: true diff --git a/gitworkflow-course-ds-base b/gitworkflow-course-ds-base new file mode 160000 index 00000000..53842777 --- /dev/null +++ b/gitworkflow-course-ds-base @@ -0,0 +1 @@ +Subproject commit 53842777e06be196a8392ae116c91a9f2b32315b diff --git a/lineapy-trial-prototype.ipynb b/lineapy-trial-prototype.ipynb new file mode 100644 index 00000000..a1b86c0b --- /dev/null +++ b/lineapy-trial-prototype.ipynb @@ -0,0 +1,1228 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: lineapy in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (0.2.3)\n", + "Requirement already satisfied: jinja2 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (2.11.2)\n", + "Requirement already satisfied: pandas in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (1.3.2)\n", + "Requirement already satisfied: pydantic in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (1.8.2)\n", + "Requirement already satisfied: networkx in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (2.5)\n", + "Requirement already satisfied: SQLAlchemy<2.0.0,>=1.4 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (1.4.47)\n", + "Requirement already satisfied: requests in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (2.25.1)\n", + "Requirement already satisfied: alembic==1.8.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (1.8.0)\n", + "Requirement already satisfied: IPython>=7.0.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (7.19.0)\n", + "Requirement already satisfied: isort in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (5.9.3)\n", + "Requirement already satisfied: rich in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (12.4.4)\n", + "Requirement already satisfied: click>=8.0.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (8.1.3)\n", + "Requirement already satisfied: pyyaml in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (5.3.1)\n", + "Requirement already satisfied: fsspec in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (2022.7.1)\n", + "Requirement already satisfied: nbconvert<7.0.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (6.0.7)\n", + "Requirement already satisfied: nbformat in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (5.0.8)\n", + "Requirement already satisfied: cloudpickle in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (1.6.0)\n", + "Requirement already satisfied: asttokens in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (2.2.1)\n", + "Requirement already satisfied: black in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (21.7b0)\n", + "Requirement already satisfied: typing-extensions>=4.0.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from lineapy) (4.3.0)\n", + "Requirement already satisfied: importlib-metadata in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from alembic==1.8.0->lineapy) (2.0.0)\n", + "Requirement already satisfied: Mako in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from alembic==1.8.0->lineapy) (1.2.4)\n", + "Requirement already satisfied: importlib-resources in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from alembic==1.8.0->lineapy) (5.7.1)\n", + "Requirement already satisfied: appnope in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (0.1.0)\n", + "Requirement already satisfied: jedi>=0.10 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (0.17.1)\n", + "Requirement already satisfied: pygments in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (2.7.2)\n", + "Requirement already satisfied: pickleshare in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (0.7.5)\n", + "Requirement already satisfied: traitlets>=4.2 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (5.0.5)\n", + "Requirement already satisfied: pexpect>4.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (4.8.0)\n", + "Requirement already satisfied: decorator in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (4.4.2)\n", + "Requirement already satisfied: backcall in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (0.2.0)\n", + "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (3.0.8)\n", + "Requirement already satisfied: setuptools>=18.5 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from IPython>=7.0.0->lineapy) (50.3.1.post20201107)\n", + "Requirement already satisfied: bleach in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (3.2.1)\n", + "Requirement already satisfied: testpath in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (0.4.4)\n", + "Requirement already satisfied: jupyter-core in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (4.6.3)\n", + "Requirement already satisfied: pandocfilters>=1.4.1 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (1.4.3)\n", + "Requirement already satisfied: defusedxml in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (0.6.0)\n", + "Requirement already satisfied: mistune<2,>=0.8.1 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (0.8.4)\n", + "Requirement already satisfied: jupyterlab-pygments in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (0.1.2)\n", + "Requirement already satisfied: nbclient<0.6.0,>=0.5.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (0.5.1)\n", + "Requirement already satisfied: entrypoints>=0.2.2 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbconvert<7.0.0->lineapy) (0.3)\n", + "Requirement already satisfied: MarkupSafe>=0.23 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from jinja2->lineapy) (1.1.1)\n", + "Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbformat->lineapy) (3.2.0)\n", + "Requirement already satisfied: ipython-genutils in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbformat->lineapy) (0.2.0)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from SQLAlchemy<2.0.0,>=1.4->lineapy) (2.0.2)\n", + "Requirement already satisfied: six in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from asttokens->lineapy) (1.15.0)\n", + "Requirement already satisfied: tomli<2.0.0,>=0.2.6 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from black->lineapy) (1.2.3)\n", + "Requirement already satisfied: regex>=2020.1.8 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from black->lineapy) (2020.10.15)\n", + "Requirement already satisfied: mypy-extensions>=0.4.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from black->lineapy) (0.4.3)\n", + "Requirement already satisfied: appdirs in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from black->lineapy) (1.4.4)\n", + "Requirement already satisfied: pathspec<1,>=0.8.1 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from black->lineapy) (0.9.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pandas->lineapy) (2.8.1)\n", + "Requirement already satisfied: pytz>=2017.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pandas->lineapy) (2022.1)\n", + "Requirement already satisfied: numpy>=1.17.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pandas->lineapy) (1.18.5)\n", + "Requirement already satisfied: idna<3,>=2.5 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from requests->lineapy) (2.10)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from requests->lineapy) (2020.6.20)\n", + "Requirement already satisfied: chardet<5,>=3.0.2 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from requests->lineapy) (3.0.4)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from requests->lineapy) (1.25.11)\n", + "Requirement already satisfied: commonmark<0.10.0,>=0.9.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from rich->lineapy) (0.9.1)\n", + "Requirement already satisfied: parso<0.8.0,>=0.7.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from jedi>=0.10->IPython>=7.0.0->lineapy) (0.7.0)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: attrs>=17.4.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat->lineapy) (20.3.0)\n", + "Requirement already satisfied: pyrsistent>=0.14.0 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat->lineapy) (0.17.3)\n", + "Requirement already satisfied: jupyter-client>=6.1.5 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert<7.0.0->lineapy) (6.1.7)\n", + "Requirement already satisfied: async-generator in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert<7.0.0->lineapy) (1.10)\n", + "Requirement already satisfied: nest-asyncio in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert<7.0.0->lineapy) (1.5.1)\n", + "Requirement already satisfied: ptyprocess>=0.5 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pexpect>4.3->IPython>=7.0.0->lineapy) (0.6.0)\n", + "Requirement already satisfied: wcwidth in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->IPython>=7.0.0->lineapy) (0.2.5)\n", + "Requirement already satisfied: webencodings in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from bleach->nbconvert<7.0.0->lineapy) (0.5.1)\n", + "Requirement already satisfied: packaging in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from bleach->nbconvert<7.0.0->lineapy) (20.4)\n", + "Requirement already satisfied: zipp>=0.5 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from importlib-metadata->alembic==1.8.0->lineapy) (3.4.0)\n", + "Requirement already satisfied: tornado>=4.1 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from jupyter-client>=6.1.5->nbclient<0.6.0,>=0.5.0->nbconvert<7.0.0->lineapy) (6.1)\n", + "Requirement already satisfied: pyzmq>=13 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from jupyter-client>=6.1.5->nbclient<0.6.0,>=0.5.0->nbconvert<7.0.0->lineapy) (19.0.2)\n", + "Requirement already satisfied: pyparsing>=2.0.2 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from packaging->bleach->nbconvert<7.0.0->lineapy) (2.4.7)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "! pip install lineapy" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pandas==1.3.2 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (1.3.2)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pandas==1.3.2) (2.8.1)\n", + "Requirement already satisfied: numpy>=1.17.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pandas==1.3.2) (1.18.5)\n", + "Requirement already satisfied: pytz>=2017.3 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from pandas==1.3.2) (2022.1)\n", + "Requirement already satisfied: six>=1.5 in /Users/jenif/opt/anaconda3/lib/python3.8/site-packages (from python-dateutil>=2.7.3->pandas==1.3.2) (1.15.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "! python -m pip install pandas==1.3.2" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext lineapy" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "lineapy_config(home_dir=PosixPath('/Users/jenif/.lineapy'), database_url='sqlite:////Users/jenif/.lineapy/db.sqlite', artifact_storage_dir=PosixPath('/Users/jenif/.lineapy/linea_pickles'), customized_annotation_folder=PosixPath('/Users/jenif/.lineapy/custom-annotations'), do_not_track=False, logging_level='INFO', logging_file=PosixPath('/Users/jenif/.lineapy/lineapy.log'), storage_options=None, mlflow_registry_uri=None, mlflow_tracking_uri=None, default_ml_models_storage_backend=None)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lineapy.options" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:17:31.460557Z", + "start_time": "2019-06-16T21:17:29.395297Z" + } + }, + "outputs": [], + "source": [ + "import lineapy\n", + "import joblib\n", + "import json\n", + "import itertools\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.metrics import confusion_matrix, f1_score\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:17:31.485189Z", + "start_time": "2019-06-16T21:17:31.473720Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
\n", + "
" + ], + "text/plain": [ + " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n", + "0 5.1 3.5 1.4 0.2 \n", + "1 4.9 3.0 1.4 0.2 \n", + "2 4.7 3.2 1.3 0.2 \n", + "3 4.6 3.1 1.5 0.2 \n", + "4 5.0 3.6 1.4 0.2 \n", + "\n", + " target \n", + "0 0 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 0 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get data \n", + "\n", + "import pandas as pd\n", + "from sklearn.datasets import load_iris\n", + "\n", + "data = load_iris(as_frame=True)\n", + "dataset = data.frame\n", + "dataset.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0: setosa\n", + "1: versicolor\n", + "2: virginica\n" + ] + }, + { + "data": { + "text/plain": [ + "[None, None, None]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# print labels for target values \n", + "\n", + "[print(f'{target}: {label}') for target, label in zip(data.target.unique(), data.target_names)]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:17:32.328046Z", + "start_time": "2019-06-16T21:17:32.323611Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['sepal_length', 'sepal_width', 'petal_length', 'petal_width']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# feature names\n", + "\n", + "dataset.columns = [colname.strip(' (cm)').replace(' ', '_') for colname in dataset.columns.tolist()]\n", + "\n", + "feature_names = dataset.columns.tolist()[:4]\n", + "feature_names" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "#save raw data as artifact\n", + "dataset_csv = './data/raw/iris.csv'\n", + "dataset.to_csv(dataset_csv, index=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthtarget
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
..................
1456.73.05.22.32
1466.32.55.01.92
1476.53.05.22.02
1486.23.45.42.32
1495.93.05.11.82
\n", + "

150 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width target\n", + "0 5.1 3.5 1.4 0.2 0\n", + "1 4.9 3.0 1.4 0.2 0\n", + "2 4.7 3.2 1.3 0.2 0\n", + "3 4.6 3.1 1.5 0.2 0\n", + "4 5.0 3.6 1.4 0.2 0\n", + ".. ... ... ... ... ...\n", + "145 6.7 3.0 5.2 2.3 2\n", + "146 6.3 2.5 5.0 1.9 2\n", + "147 6.5 3.0 5.2 2.0 2\n", + "148 6.2 3.4 5.4 2.3 2\n", + "149 5.9 3.0 5.1 1.8 2\n", + "\n", + "[150 rows x 5 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.3.2\n" + ] + } + ], + "source": [ + "print(pd.__version__)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LineaArtifact(name='iris-raw', _version=4)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#save raw data as artifact to lineapy\n", + "lineapy.save(dataset, \"iris-raw\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Features engineering" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:02.150708Z", + "start_time": "2019-06-16T21:21:02.144518Z" + } + }, + "outputs": [], + "source": [ + "dataset['sepal_length_to_sepal_width'] = dataset['sepal_length'] / dataset['sepal_width']\n", + "dataset['petal_length_to_petal_width'] = dataset['petal_length'] / dataset['petal_width']\n", + "\n", + "dataset = dataset[[\n", + " 'sepal_length', 'sepal_width', 'petal_length', 'petal_width',\n", + "# 'sepal_length_in_square', 'sepal_width_in_square', 'petal_length_in_square', 'petal_width_in_square',\n", + " 'sepal_length_to_sepal_width', 'petal_length_to_petal_width',\n", + " 'target'\n", + "]]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:02.987144Z", + "start_time": "2019-06-16T21:21:02.976092Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthsepal_length_to_sepal_widthpetal_length_to_petal_widthtarget
05.13.51.40.21.4571437.00
14.93.01.40.21.6333337.00
24.73.21.30.21.4687506.50
34.63.11.50.21.4838717.50
45.03.61.40.21.3888897.00
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width \\\n", + "0 5.1 3.5 1.4 0.2 \n", + "1 4.9 3.0 1.4 0.2 \n", + "2 4.7 3.2 1.3 0.2 \n", + "3 4.6 3.1 1.5 0.2 \n", + "4 5.0 3.6 1.4 0.2 \n", + "\n", + " sepal_length_to_sepal_width petal_length_to_petal_width target \n", + "0 1.457143 7.0 0 \n", + "1 1.633333 7.0 0 \n", + "2 1.468750 6.5 0 \n", + "3 1.483871 7.5 0 \n", + "4 1.388889 7.0 0 " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# Save features\n", + "features_path = './data/processed/featured_iris.csv'\n", + "dataset.to_csv(features_path, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LineaArtifact(name='iris-preprocessed', _version=4)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#save features to lineapy\n", + "lineapy.save(dataset, \"iris-preprocessed\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Split dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:06.361378Z", + "start_time": "2019-06-16T21:21:06.358647Z" + } + }, + "outputs": [], + "source": [ + "test_size=0.2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Splittail train/test" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:07.438133Z", + "start_time": "2019-06-16T21:21:07.431649Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "((120, 7), (30, 7))" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_dataset, test_dataset = train_test_split(dataset, test_size=test_size, random_state=42)\n", + "train_dataset.shape, test_dataset.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "# Save train and test sets\n", + "trainset_path = './data/processed/train_iris.csv'\n", + "testset_path = './data/processed/test_iris.csv'\n", + "\n", + "train_dataset.to_csv(trainset_path)\n", + "test_dataset.to_csv(testset_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LineaArtifact(name='test-dataset', _version=4)" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#save train and test sets to lineapy\n", + "lineapy.save(train_dataset, \"train-dataset\")\n", + "lineapy.save(test_dataset, \"test-dataset\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Train" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:10.932148Z", + "start_time": "2019-06-16T21:21:10.927844Z" + } + }, + "outputs": [], + "source": [ + "# Get X and Y\n", + "\n", + "y_train = train_dataset.loc[:, 'target'].values.astype('int32')\n", + "X_train = train_dataset.drop('target', axis=1).values.astype('float32')" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:55.427365Z", + "start_time": "2019-06-16T21:21:55.416431Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression(C=0.001, multi_class='multinomial')" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create an instance of Logistic Regression Classifier CV and fit the data\n", + "\n", + "logreg = LogisticRegression(C=0.001, solver='lbfgs', multi_class='multinomial', max_iter=100)\n", + "logreg.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['./models/model.joblib']" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_path= './models/model.joblib'\n", + "joblib.dump(logreg, model_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LineaArtifact(name='logreg-model', _version=3)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#save model to lineapy\n", + "lineapy.save(model_path, \"logreg-model\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evaluate" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:55.875303Z", + "start_time": "2019-06-16T21:21:55.864724Z" + } + }, + "outputs": [], + "source": [ + "def plot_confusion_matrix(cm,\n", + " target_names,\n", + " title='Confusion matrix',\n", + " cmap=None,\n", + " normalize=True):\n", + " \"\"\"\n", + " given a sklearn confusion matrix (cm), make a nice plot\n", + "\n", + " Arguments\n", + " ---------\n", + " cm: confusion matrix from sklearn.metrics.confusion_matrix\n", + "\n", + " target_names: given classification classes such as [0, 1, 2]\n", + " the class names, for example: ['high', 'medium', 'low']\n", + "\n", + " title: the text to display at the top of the matrix\n", + "\n", + " cmap: the gradient of the values displayed from matplotlib.pyplot.cm\n", + " see http://matplotlib.org/examples/color/colormaps_reference.html\n", + " plt.get_cmap('jet') or plt.cm.Blues\n", + "\n", + " normalize: If False, plot the raw numbers\n", + " If True, plot the proportions\n", + "\n", + " Usage\n", + " -----\n", + " plot_confusion_matrix(cm = cm, # confusion matrix created by\n", + " # sklearn.metrics.confusion_matrix\n", + " normalize = True, # show proportions\n", + " target_names = y_labels_vals, # list of names of the classes\n", + " title = best_estimator_name) # title of graph\n", + "\n", + " Citiation\n", + " ---------\n", + " http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html\n", + "\n", + " \"\"\"\n", + "\n", + " accuracy = np.trace(cm) / float(np.sum(cm))\n", + " misclass = 1 - accuracy\n", + "\n", + " if cmap is None:\n", + " cmap = plt.get_cmap('Blues')\n", + "\n", + " plt.figure(figsize=(8, 6))\n", + " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n", + " plt.title(title)\n", + " plt.colorbar()\n", + "\n", + " if target_names is not None:\n", + " tick_marks = np.arange(len(target_names))\n", + " plt.xticks(tick_marks, target_names, rotation=45)\n", + " plt.yticks(tick_marks, target_names)\n", + "\n", + " if normalize:\n", + " cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", + "\n", + " thresh = cm.max() / 1.5 if normalize else cm.max() / 2\n", + " for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n", + " if normalize:\n", + " plt.text(j, i, \"{:0.4f}\".format(cm[i, j]),\n", + " horizontalalignment=\"center\",\n", + " color=\"white\" if cm[i, j] > thresh else \"black\")\n", + " else:\n", + " plt.text(j, i, \"{:,}\".format(cm[i, j]),\n", + " horizontalalignment=\"center\",\n", + " color=\"white\" if cm[i, j] > thresh else \"black\")\n", + "\n", + " plt.tight_layout()\n", + " plt.ylabel('True label')\n", + " plt.xlabel('Predicted label\\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))\n", + " \n", + " return plt.gcf()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:56.090756Z", + "start_time": "2019-06-16T21:21:56.086966Z" + } + }, + "outputs": [], + "source": [ + "# Get X and Y\n", + "\n", + "y_test = test_dataset.loc[:, 'target'].values.astype('int32')\n", + "X_test = test_dataset.drop('target', axis=1).values.astype('float32')" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:56.270245Z", + "start_time": "2019-06-16T21:21:56.265054Z" + } + }, + "outputs": [], + "source": [ + "prediction = logreg.predict(X_test)\n", + "cm = confusion_matrix(prediction, y_test)\n", + "f1 = f1_score(y_true = y_test, y_pred = prediction, average='macro')" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:56.493617Z", + "start_time": "2019-06-16T21:21:56.489929Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9305555555555555" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# f1 score value\n", + "f1" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "# Save metrics\n", + "metrics_file = './reports/metrics.json'\n", + "\n", + "metrics = {\n", + " 'f1': f1\n", + "}\n", + "\n", + "with open(metrics_file, 'w') as mf:\n", + " json.dump(\n", + " obj=metrics,\n", + " fp=mf,\n", + " indent=4\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAe0AAAHCCAYAAADCTpEYAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAA26klEQVR4nO3dd7xcVdXG8d+ThEDoJaEkAem9SkCaiBTpTYGggBSRLkrxVZSXqoiKCAKKCNJfQpcqoCgqCJIQQiDUKCBJaIFICaEkrPePvS9Obm7L3DJ35zxfPvNh5pwz56yZyZ01a+999lFEYGZmZr1fn0YHYGZmZh3jpG1mZlYIJ20zM7NCOGmbmZkVwknbzMysEE7aZmZmhXDSNmsASQMk3SbpLUnXd2I/+0i6pytjaxRJn5X0TKPjMOvN5PO0zVon6SvAscCqwDvAGOCHEXF/J/e7H/ANYJOImN7ZOHs7SQGsFBHjGx2LWclcaZu1QtKxwDnAGcASwDLAL4Fdu2D3nwKerULC7ghJ/Rodg1kJnLTNWiBpIeA04MiIuCkipkbERxFxW0R8O28zt6RzJE3Kt3MkzZ3XbSFpgqTjJL0m6WVJB+Z1pwInAcMlvSvpa5JOkXRVzfGXlRRNyUzSAZL+JekdSc9L2qdm+f01z9tE0sjc7D5S0iY16+6TdLqkB/J+7pE0sJXX3xT//9TEv5ukHSQ9K+lNSd+r2X5DSQ9K+k/e9nxJ/fO6v+bNHsuvd3jN/r8j6RXg0qZl+Tkr5GN8Oj8eLGmypC0687malc5J26xlGwPzADe3sc33gY2AdYF1gA2BE2vWLwksBAwBvgZcIGmRiDiZVL1fGxHzR8QlbQUiaT7gF8D2EbEAsAmpmb75dosCd+RtFwPOBu6QtFjNZl8BDgQWB/oDx7dx6CVJ78EQ0o+M3wD7AusDnwVOkrR83nYGcAwwkPTebQUcARARm+dt1smv99qa/S9KanU4pPbAEfFP4DvA1ZLmBS4FLouI+9qI12yO56Rt1rLFgMntNF/vA5wWEa9FxOvAqcB+Nes/yus/iog7gXeBVeqM52NgTUkDIuLliBjXwjY7As9FxJURMT0irgGeBnau2ebSiHg2IqYB15F+cLTmI1L//UfACFJCPjci3snHHwesDRARj0TEQ/m4LwC/Bj7Xgdd0ckR8kOOZSUT8BngO+AewFOlHklmlOWmbtewNYGA7fa2DgRdrHr+Yl32yj2ZJ/z1g/tkNJCKmAsOBw4CXJd0hadUOxNMU05Cax6/MRjxvRMSMfL8pqb5as35a0/MlrSzpdkmvSHqb1JLQYtN7jdcj4v12tvkNsCZwXkR80M62ZnM8J22zlj0IvA/s1sY2k0hNu02WycvqMRWYt+bxkrUrI+LuiNiGVHE+TUpm7cXTFNPEOmOaHb8ixbVSRCwIfA9QO89p89QVSfOTBgJeApySm//NKs1J26wFEfEWqR/3gjwAa15Jc0naXtJP8mbXACdKGpQHdJ0EXNXaPtsxBthc0jJ5ENwJTSskLSFpl9y3/QGpmX1GC/u4E1hZ0lck9ZM0HFgduL3OmGbHAsDbwLu5FeDwZutfBZaf5VltOxd4JCIOJvXVX9jpKM0K56Rt1oqIOJt0jvaJwOvAS8BRwO/yJj8ARgFjgceB0XlZPcf6A3Bt3tcjzJxo+wDHkSrpN0l9xUe0sI83gJ3ytm8A/wPsFBGT64lpNh1PGuT2DqkV4Npm608BLs+jy/dqb2eSdgW2I3UJQPocPt00at6sqjy5ipmZWSFcaZuZmRXCSdvMzKwQTtpmZmaFcNI2MzMrhJO2mZlZIXxlnTqo/3yhAZ7nYU6w3opLNDoEM2vB6NGPTI6IQd19nL4Lfipi+iyz6M62mPb63RGxXReE1CYn7TpowKLMvfGxjQ7DusADtx/T6BDMrAUD5lLzKXm7RUyfxtyrtDt1QLveH3NBe9P2dgknbTMzqzCByukpdtI2M7PqEqD2psnvPZy0zcys2gqqtMuJ1MzMrOJcaZuZWbW5edzMzKwEHohmZmZWjoIq7XJ+XpiZmVWcK20zM6su4eZxMzOzMqio5nEnbTMzq7aCKu1yIjUzM6s4V9pmZlZtbh43MzMrQVnnaZcTqZmZWcW50jYzs+ryVb7MzMwKUlDzuJO2mZlVmPu0zczMrBu40jYzs2rr4z5tMzOz3s9zj5uZmRWkoNHj5fy8MDMzqzhX2mZmVmFljR530jYzs2orqHncSdvMzKqtoEq7nEjNzMwKJem3kl6T9ETNskUl/UHSc/n/i7S3HydtMzOrLqlrbu27DNiu2bLvAvdGxErAvflxm5y0zcys2tSn87d2RMRfgTebLd4VuDzfvxzYrb39uE/bzMyqrWsGog2UNKrm8UURcVE7z1kiIl4GiIiXJS3e3kGctM3MzDpvckQM6+6DOGmbmVmFNfQ87VclLZWr7KWA19p7gvu0zcys2npmIFpLbgX2z/f3B25p7wmutM3MrLp66IIhkq4BtiD1fU8ATgbOBK6T9DXg38Ce7e3HSdvMzKybRcSXW1m11ezsx0nbzMwqzHOPm5mZlcNzj5uZmRWioEq7nEjNzMwqzpW2mZlVm5vHzczMCqCyBqKVE6mZmVnFudI2M7Nqc/O4mZlZGeSkbWZm1vuJspK2+7TNzMwK4UrbzMyqS/lWCCdtMzOrMLl53Mpy4THb8OKIQxl14X6fLFtk/rm5/Ywv8vglB3D7GV9k4fnnbmCEVq977r6LtddYhTVWXZGf/uTMRodjneTPs3tI6vStpzhpG1f+4Ul2PfHmmZYdP3xD7hvzEmt97TLuG/MSx++1QYOis3rNmDGDbx19JLfc9nseHfsk14+4hqeefLLRYVmd/HkaOGkb8MATE3nznfdnWrbTxstz1R/TF8JVf3ySnTdZoRGhWSeMfPhhVlhhRZZbfnn69+/PnsP35vbbbml0WFYnf57dx5W2FW/xhefllTenAvDKm1MZtNC8DY7IZtekSRMZOnTpTx4PGTKUiRMnNjAi6wx/nt3HSbuHSTpA0uBGx2HWm0TELMtKGnBjM/Pn2U3URbceMkckbeAAwEm7C732n/dYctH5AFhy0fl4/a33GhyRza4hQ4YyYcJLnzyeOHECgwf7z6RU/jwNenHSljSfpDskPSbpCUnDJa0v6S+SHpF0t6SlJO0BDAOuljRG0gBJW0l6VNLjkn4rae68zzMlPSlprKSz8rKdJf0jb/9HSUs08nX3Fnc89C/23Xp1APbdenVuf/BfDY7IZtewDTZg/PjneOH55/nwww+5/toR7LjTLo0Oy+rkz7N7iM43jfdki0dvPk97O2BSROwIIGkh4PfArhHxuqThwA8j4iBJRwHHR8QoSfMAlwFbRcSzkq4ADs//3x1YNSJC0sL5OPcDG+VlBwP/AxzXPBhJhwCHADDPIt33qhvg8u9uz2fXXpqBC87D+CsP5vSrHuSsa0dy1fd2ZP9t1+Cl195hnx/e3ugwbTb169ePn597PjvvuC0zZsxg/wMOYvU11mh0WFYnf57dp6RuBrXUT9IbSFoZuBu4DrgdmAL8HWgq+foCL0fEFyTdx3+T9jrAeRGxed7PVsCRwF7AI8Ao4A7g9oj4UNJawM+ApYD+wPMRsV1bsfVZaOmYe+Nju/T1WmNMuf2YRodgZi0YMJceiYhh3X2cfostHwvu8INO72fKVfv0SLy9tnk8Ip4F1gceB34EfAkYFxHr5ttaEfGFFp7a4k+miJgObAjcCOwG3JVXnQecHxFrAYcC83TpCzEzM+sivbZ5PI8GfzMirpL0LqlpepCkjSPiQUlzAStHxDjgHWCB/NSngWUlrRgR44H9gL9Imh+YNyLulPQQMD5vvxDQdN7E/j308szMrJcoqXm81yZtYC3gp5I+Bj4CDgemA7/I/dv9gHOAcaQ+7AslTQM2Bg4ErpfUDxgJXAgsCtyS+7wFNLWLnpK3nQg8BCzXEy/OzMx6AV8wpGtExN2kPu3mNm9h2xtJzd5N7gXWa7bZy6Tm8ebPvQXwtEJmZhVVUqXda/u0zczMbGa9ttI2MzPrbirs0pxO2mZmVmlO2mZmZqUoJ2e7T9vMzKwUrrTNzKy65OZxMzOzYpSUtN08bmZmVghX2mZmVmklVdpO2mZmVlk+T9vMzKwk5eRs92mbmZmVwpW2mZlVl0/5MjMzK4eTtpmZWSFKStru0zYzMyuEK20zM6u2cgptJ20zM6u2kprHnbTNzKyypLImV3GftpmZWSFcaZuZWaWVVGk7aZuZWaU5aZuZmZWinJztPm0zM7NSuNI2M7NKc/O4mZlZCXzBEDMzszIIKChnu0/bzMysFE7aZmZWYfpkVrTO3No9inSMpHGSnpB0jaR56onWSdvMzCpN6vyt7f1rCHA0MCwi1gT6AnvXE6uTtpmZWffrBwyQ1A+YF5hU707MzMwqq4tGjw+UNKrm8UURcRFAREyUdBbwb2AacE9E3FPPQZy0zcysujrQvN1BkyNiWIuHkBYBdgWWA/4DXC9p34i4anYP4uZxMzOrLAF9+qjTt3ZsDTwfEa9HxEfATcAm9cTrpG1mZta9/g1sJGlepbb4rYCn6tmRm8fNzKzSuntylYj4h6QbgNHAdOBR4KJ69uWkbWZmldYT05hGxMnAyZ3dj5O2mZlVV9cNROsR7tM2MzMrhCttMzOrrHTBkHJKbSdtMzOrsI7NHd5bOGmbmVmlFZSz3adtZmZWClfaZmZWaW4eNzMzK0Fhp3w5aZuZWWWVNnrcfdpmZmaFcKVtZmaVVlCh7aRtZmbVVlLzuJO2mZlVWkE5233aZmZmpXClbWZm1SU3j8/x1ltxCR64/ZhGh2FdYLvzH2h0CNaF7jpq00aHYIVJp3w1OoqOc9I2M7MKK+uCIe7TNjMzK4QrbTMzq7SCCm0nbTMzqzY3j5uZmVmXc6VtZmbV5at8mZmZlaG0q3w5aZuZWaWVlLTdp21mZlYIV9pmZlZpBRXaTtpmZlZtJTWPO2mbmVl1FTZ63H3aZmZmhXClbWZmlaXCLhjipG1mZpVWUM520jYzs2rrU1DWdp+2mZlZIVxpm5lZpRVUaDtpm5lZdUk+T9vMzKwYfcrJ2e7TNjMzK4UrbTMzqzQ3j5uZmRWioJztpG1mZtUl0qxopXCftpmZWSFcaZuZWaWVNHrcSdvMzKpLZV0wxM3jZmZmhXClbWZmlVZQoe2kbWZm1SXKusqXk7aZmVVaQTnbfdpmZmalcKVtZmaVVtLocSdtMzOrrHRpzkZH0XGtJm1J5wHR2vqIOLpbIjIzM+tBc8pAtFE9FoWZmZm1q9WkHRGX1z6WNF9ETO3+kMzMzHpOOXV2B0aPS9pY0pPAU/nxOpJ+2e2RmZmZ9QDlqUw7c+vAMRaWdIOkpyU9JWnjemLtyEC0c4BtgVsBIuIxSZvXczAzM7PeJE2u0iOHOhe4KyL2kNQfmLeenXRo9HhEvNTsl8SMeg5mZmZWNZIWBDYHDgCIiA+BD+vZV0cmV3lJ0iZASOov6XhyU7mZmVnRuqBpvAPN48sDrwOXSnpU0sWS5qsn3I4k7cOAI4EhwERg3fzYzMyseE3nanfmBgyUNKrmdkjNIfoBnwZ+FRHrAVOB79YTa7vN4xExGdinnp2bmZn1dl00I9rkiBjWyroJwISI+Ed+fAN1Ju2OjB5fXtJtkl6X9JqkWyQtX8/BzMzMqiYiXiF1Na+SF20FPFnPvjoyEO3/gAuA3fPjvYFrgM/Uc0AzM7PeogdHj38DuDqPHP8XcGA9O+lI0lZEXFnz+CpJR9VzMDMzs96mJy4YEhFjgNaazzusrbnHF813/yzpu8AI0lzkw4E7OntgMzOz3qCkGdHaqrQfISXpptdzaM26AE7vrqDMzMxsVm3NPb5cTwZiZmbW06SyrvLVkfO0kbSmpL0kfbXp1t2BWePcc/ddrL3GKqyx6or89CdnNjoc64SlFxnAxfus88ntjsM/wx7rLdXosKxO/tvsHl10nnaPaHcgmqSTgS2A1YE7ge2B+4ErujUya4gZM2bwraOP5I7f/4EhQ4ey2UYbsNNOu7Da6qs3OjSrw0tTpnHw1Y8BaYTsDQdvwN/Gv9ngqKwe/tvsPj0xEK2rdKTS3oN0TtkrEXEgsA4wd7dGZQ0z8uGHWWGFFVlu+eXp378/ew7fm9tvu6XRYVkX+PTSCzPxrfd59Z0PGh2K1cF/mwYdS9rTIuJjYHqe9Pw10jyqNgeaNGkiQ4cu/cnjIUOGMnHixAZGZF1ly1UG8qdnXm90GFYn/212n5KaxzuStEdJWhj4DWlE+Wjg4e4MqiWSTpO0dR3P20LS7d0R05woImZZVlLTkbWsXx+x6fKLct9zbzQ6FKuT/za7hxB91PlbT+nI3ONH5LsXSroLWDAixnZHMEr/ApUr++ZxnNQdx2whhn4RMb0njtUbDRkylAkTXvrk8cSJExg8eHADI7Ku8JllF+HZ195lynsfNToUq5P/Ng3aqLQlfbr5DVgU6Jfvt0rSjyUdUfP4FEnHSfq2pJGSxko6Na9bVtJTkn5JquKXlnSZpCckPS7pmLzdZZL2yPc3kPR3SY9JeljSApLmkXRpfs6jkj7fQlyLSvpdPv5Dktauie8iSfdQ8QF2wzbYgPHjn+OF55/nww8/5PprR7DjTrs0OizrpK1WGci9z0xudBjWCf7b7CZd0DTeW0aP/6yNdQFs2cb6EcA5wC/z472AM4HNgA1JE7bcKmlz4N/AKsCBEXGEpPWBIRGxJkBumv9Enrf1WmB4RIzM/ezTgG8CRMRaklYF7pG0crO4TgUejYjdJG1JStDr5nXrA5tFxLQ2Xtccr1+/fvz83PPZecdtmTFjBvsfcBCrr7FGo8OyTpi7Xx/WX2ZhfnbvPxsdinWC/za7T0ndDG1NrjJLpdpREfGopMUlDQYGAVOAtYEvAI/mzeYHViIl7Rcj4qG8/F/A8pLOI02Xek+z3a8CvBwRI/Ox3gaQtBlwXl72tKQXgeZJezPgS3mbP0laTNJCed2tbSXsfG3UQwCWXmaZDr8XJdpu+x3YbvsdGh2GdZEPpn/Mrr/u8WEo1g38t9k9OjRhSS/RnbHeQDpdbDip8hbwo4hYN99WjIhL8rZTm54UEVNIp5XdBxwJXNxsvyJV+s115KdSS9s07WtqC+v+u1HERRExLCKGDRo4qAOHMjMz61rdmbRHkC7juQcpgd8NHCRpfgBJQyQt3vxJkgYCfSLiRuB/geb9508DgyVtkLdfQFI/4K/APnnZysAywDPNnlu7zRaki5a/3elXamZmRRKpebyzt57SkUtz1iUixklaAJgYES8DL0taDXgwv8B3gX2BGc2eOgS4VFLTD4oTmu33Q0nDgfMkDSD1Z29N6j+/UNLjwHTggIj4oNmbeUre91jgPWD/LnvBZmZWpB66nnaX6Mg0piJVp8tHxGmSlgGWjIh2O8kiYq1mj88Fzm1h0zVrtnmMWatrIuKAmvsjgY1a2M8BzRdExH2kpnYi4k1g1xa2OaWl+M3MbM5XUtLuSPP4L4GNgS/nx+8AF3RbRGZmZtaijjSPfyYiPi3pUUgDxfJpV2ZmZkVL51mXU2p3JGl/JKkveZS1pEHALDOWmZmZlaik5vGOJO1fADcDi0v6IWk0+IndGpWZmVkPKajQ7tDc41dLeoR0eU4Bu0XEU90emZmZmc2kI6PHlyGdHnVb7bKI+Hd3BmZmZtbdBD16la7O6kjz+B2k/mwB8wDLkSYt8aS3ZmZWvJKmMe1I8/hM51rnK3wd2m0RmZmZ9aCCCu3Z/4EREaOBDbohFjMzM2tDR/q0j6152Ic0W9nr3RaRmZlZD5E0x/VpL1Bzfzqpj/vG7gnHzMysZxWUs9tO2nlSlfkj4ts9FI+ZmVmPKmlylVb7tCX1i4gZtHDxDjMzM+t5bVXaD5MS9hhJtwLXA1ObVkbETd0cm5mZWbeaE8/TXhR4A9iS/56vHYCTtpmZFa+gnN1m0l48jxx/gv8m6ybRrVGZmZn1BJXVp91W0u4LzM/MybqJk7aZmVkPaytpvxwRp/VYJGZmZg2gFmvT3qmtpF3OqzAzM6tDGojW6Cg6rq1pTLfqsSjMzMysXa1W2hHxZk8GYmZm1gglVdodOeXLzMxsjqWCzvly0jYzs8qak/q0zczMrBdxpW1mZtWlOWdGNDMzsznenDb3uJmZ2RzJfdpmZmbWLVxpm5lZpRXUOu6kbWZmVSb6FDRrt5O2mZlVliir0naftpmZWSFcaZuZWXWprNHjTtpmZlZpPk/bzMysAO7TNjMzs27hStvMzCrNzeNmZmaFKChnO2mbmVl1iZ7rJ5bUFxgFTIyInerZh/u0zczMesY3gac6swMnbTMzqy6BpE7f2j2MNBTYEbi4M+G6edzMzCqth7q0zwH+B1igMztxpW1mZtZ5AyWNqrkd0rRC0k7AaxHxSGcP4krbzMwqS3TZKV+TI2JYK+s2BXaRtAMwD7CgpKsiYt/ZPYgrbTMzqzR1wa0tEXFCRAyNiGWBvYE/1ZOwwZW2mZlVnM/TNjMzs1lExH3AffU+30nbzMwqrGOnbPUWTtpmZlZZPTkjWldw0jYzs0orqdIu6QeGmZlZpbnSNjOzSiunznbStoo7f491Gh2CdaFFNjiq0SFYaVRW87iTtpmZVVZpA9FKitXMzKzSXGmbmVmluXnczMysEOWkbCdtMzOruIIKbfdpm5mZlcKVtpmZVVYaPV5Oqe2kbWZmlVZS87iTtpmZVZhQQZW2+7TNzMwK4UrbzMwqzc3jZmZmBfBANDMzs1KorErbfdpmZmaFcKVtZmaVVlKl7aRtZmaV5lO+zMzMrMu50jYzs8oS0KecQttJ28zMqq2k5nEnbTMzq7SSBqK5T9vMzKwQrrTNzKzS3DxuZmZWAA9EMzMzK4YvzWlmZmbdwJW2mZlVV2EXDHHSNjOzSisoZztpm5lZdaWBaOWkbfdpm5mZFcKVtpmZVVo5dbaTtpmZVV1BWdtJ28zMKs3naZuZmVmXc6VtZmaVVtDgcSdtMzOrtoJytpO2mZlVXEFZ233aZmZmhXClbWZmlSXKGj3upG1mZtXlC4aYmZmVo6Cc7T5tMzOzUrjSNjOzaiuo1HbSNjOzClNRA9HcPG5mZlYIV9pmZlZpHj1uZmZWAFFUl7aTtpmZVVxBWdt92mZmZoVw0jYzs0pTF/zX5v6lpSX9WdJTksZJ+ma9sTpp2yzuufsu1l5jFdZYdUV++pMzGx2O1emVSRM4aK8d2OXz67PbVhtw1SW/bHRINpsuPHkfXrz3R4y6/nufLPvi1uvxyA3fZ+ojv+DTqy/TwOjmHFLnb+2YDhwXEasBGwFHSlq9nlidtG0mM2bM4FtHH8ktt/2eR8c+yfUjruGpJ59sdFhWh759+3H8/57BrX9+hKtv+RMjLr+Ifz77dKPDstlw5W0PseuRF8y0bNw/J7H3cb/h/tH/bFBUcx51wa0tEfFyRIzO998BngKG1BOrk7bNZOTDD7PCCiuy3PLL079/f/Ycvje333ZLo8OyOgxaYklWX2tdAOabfwGWW3EVXn1lUmODstnywOh/8uZb78207JnnX+W5F19rUETWWZKWBdYD/lHP8520bSaTJk1k6NClP3k8ZMhQJk6c2MCIrCtMfOlFnh43lrXXG9boUMx6l64os1OpPVDSqJrbIbMcSpofuBH4VkS8XU+4DU/akgZLuqGO590paeF2tjlN0tZ1B1dBETHLMpU084DN4r2p73LMofvynVPOZP4FFmx0OGa9ThcNRJscEcNqbhfNdAxpLlLCvjoibqo31oafpx0Rk4A9mi+X1C8iprfxvB06sO+TOhle5QwZMpQJE1765PHEiRMYPHhwAyOyzvjoo4845pB92XG3vdh6+10bHY5ZryO6f0Y0pcrnEuCpiDi7M/vq0Upb0o8lHVHz+BRJx0l6Ij8+QNL1km4D7pE0r6TrJI2VdK2kf0galrd9QdJAScvmYfS/yUPp75E0IG9zmaQ98v0NJP1d0mOSHpa0QH7u3ySNzrdNevL96I2GbbAB48c/xwvPP8+HH37I9deOYMeddml0WFaHiODkbx/J8iutwv6HfKPR4ZhV2abAfsCWksbkW7uFZ0t6utIeAZwDNJ17shdwGHBgzTYbA2tHxJuSjgemRMTaktYExrSy35WAL0fE1yVdB3wJuKpppaT+wLXA8IgYKWlBYBrwGrBNRLwvaSXgGqDSnX79+vXj5+eez847bsuMGTPY/4CDWH2NNRodltXh0ZEPctuN17DSqmuwx7bp9+jR3zmZzbfctsGRWUdd/qMD+Oz6KzFw4fkZf9fpnH7hnUx5aypnf2dPBi4yPzf94jDGPjORXZqNMLfZ090dgBFxf1cdpkeTdkQ8KmlxSYOBQcAU4N/NNvtDRLyZ728GnJuf+4Sksa3s+vmIGJPvPwIs22z9KsDLETEy7+ttAEnzAedLWheYAazcWux5UMEhAEsvM2efG7nd9juw3fZ1/Qi0XuTTG27C4y+90+gwrBP2P+GyFpff+ufWvgqtLgUN22lEn/YNpD7sJUmVd3NTa+539K38oOb+DGBAs/UCZh1hBccArwLrkLoK3m/tAHlQwUUA668/rKV9mZlZgXw97baNAPYmJe72Ro3fT2pCJ88es1adx3waGCxpg7yvBST1AxYiVeAfk/ob+ta5fzMzs27X40k7IsYBCwATI+Lldjb/JTAoN4t/BxgLvFXHMT8EhgPnSXoM+AMwT97//pIeIjWNT219L2ZmNifqgWlMu0xDTvmKiLVq7r8ArJnvXwZcVrPp+8C+eaDYCsC9wIt522XzNpObnp+Xn1Vz/4Ca+yNJc77Weg5Yu+bxCXW9IDMzK1Y5jeO94DztdswL/DmflC7g8Fw1m5mZdY2CsnavTtp5YvVKn4JlZmbWpFcnbTMzs+6Upg4vp9R20jYzs+rq4YFkndXwC4aYmZlZx7jSNjOzSiuo0HbSNjOziisoaztpm5lZhamogWju0zYzMyuEK20zM6u0kkaPO2mbmVlliaK6tJ20zcys4grK2u7TNjMzK4QrbTMzq7SSRo87aZuZWaV5IJqZmVkhCsrZ7tM2MzMrhSttMzOrrsKu8uWkbWZmFVdO1nbSNjOzyhJlVdru0zYzMyuEK20zM6u0ggptJ20zM6u2kprHnbTNzKzSSpoRzX3aZmZmhXClbWZm1VZOoe2kbWZm1VZQznbSNjOz6lJhM6K5T9vMzKwQrrTNzKzSSho97qRtZmbVVk7OdvO4mZlZKVxpm5lZpRVUaDtpm5lZtZU0etxJ28zMKkxFDURzn7aZmVkhXGmbmVllibKax11pm5mZFcKVtpmZVZorbTMzM+tyrrTNzKzSSho97qRtZmbVVdhVvpy0zcysskRZM6K5T9vMzKwQrrTNzKzaCiq1nbTNzKzSPBDNzMysECUNRHOftpmZWSFcaZuZWaUVVGi70jYzs4pTF9zaO4S0naRnJI2X9N16Q3WlbWZmldbdA9Ek9QUuALYBJgAjJd0aEU/O7r5caZuZmXWvDYHxEfGviPgQGAHsWs+OXGmbmVll9dD1tIcAL9U8ngB8pp4dOWnXYfToRyYPmEsvNjqOHjAQmNzoIKxL+LOcc1Tls/xUTxxk9OhH7h4wlwZ2wa7mkTSq5vFFEXFRvt/Sz4Ko5yBO2nWIiEGNjqEnSBoVEcMaHYd1nj/LOYc/y64VEdv1wGEmAEvXPB4KTKpnR+7TNjMz614jgZUkLSepP7A3cGs9O3KlbWZm1o0iYrqko4C7gb7AbyNiXD37ctK2tlzU/iZWCH+Wcw5/lgWKiDuBOzu7H0XU1RduZmZmPcx92mZmZoVw0jYzMyuEk7aZzUQq6UKFZtXipG1mn5CkyANdJO0nabNGx2Szr6UfXv4xNmdw0ra6SPKZB3OgmoS9Helc0mcaG5HNrqYfXpI+L2m4pC9D+myduMvnpG2zTdIRwCWSTpX02UbHY11L0obAQcBjEfF6XuYv+0Lk5LwjcB7wAXCupO/XrPNnWTAnbZstko4E9gTOJ1255gxJOzc2KuuMFr7E3wD+DawtaVPwl31JJC0FHA98CZgBvAgcI+mn8N/WFCuTmzitwyQtCCwC7EKqxAAuB74t6eOIuKNhwVldmvVh70S6iMEbwMnACcDO+bN90F/2vVdNk/i8EfGypP2AQcBpEbFebj15SNK0iDipweFaJ7jStg6RtG5EvE1qchtMStxfJM2f2xc4UtJ8rsaKIwBJhwFnAMOAm4DdgXNJzatfyV/61gvVJOydgBGSFoiICcACwEN5s36k1rH7GxWndQ0nbWuXpG8Cp0kaGhFvkf7dTCNVZVsAo4ADImKqq7EySFo1f9l/LGkwadDZVyLiVGA74HRgU+BXwMvA842L1trSNOgM+CFwXkS8k1d9BCwi6RfAlcDVEXGPf1iXzdOYWpsk7Qp8D9g2Iv4jacmIeEXSr4GlgNWB3SLiiYYGah0maX7gHOBj4ND8pX8ZqRJ7NCJmSPoisENEHCypf0R82LiIrTlJSwBLRsRj+fGhwIyIuFjS3BHxQV7+WdLf6RsRcW/jIrau4krbWiSp6d/Gp4DRwIqSTgNul/T3iDgUOAzYyAm7OO+REvQMUvIGmAgcByyUHy8GzJ3/HXzU0wFau/YCpkmaN1fOiwBNp3Y1JeyNgX9HxHVO2HMOV9rWIkmLRMQUSYsA15G+4C8H7gAuBs6IiDENDNFmU7NBZ32A1YBvAxMj4vuSLgSWBN4BVgUO9A+y3ikn6iWAE0lN308AvwCmAscCnwEuA74WEX9tUJjWDZy0bRaSDgF2BV4AxkTEb2rW7Qr8CNgqIl5uTIQ2u5ol7OVIXaEvSFqd9CX/SkScKGkNYAjwbES80LiIrSXNPsd5Sf3YfUk/rCcDpwEL59tpEXF7YyK17uKkbTOR9CXgFNIpXSuTBpq9QfpF/0XgVGBPV2BlknQMMJzUNTYO+AEwL/AtYDpwmAcT9m55JP98pAGg00in5y0IXBERj0haCJg7Il6rTfI2Z3CfdsW1MJJ0QeDiiBhJOvXnV6RTvJYG/gJs54RdptzHORzYBtiElKSPjYjHgZ8D75OaXK2Xafo7lbQ5MII0OPRMYDPSj+wpwFGSNo2ItyLiNfBEKnMiJ+0KkzQXsHW+f5SkLUh//EdKWi0ipkXEaNIgl4ER8WpEvNiwgG22tPCD7F3SgLO5ImI6aSDhZyR9Lf8Q+3ZEvNLTcVrrmj7DPMJ/E+DrwLak7qtnSLOebU46RW8i8J/GRGo9xUm72voCu0t6ADgUeCEifgdcCJwnaQtJewKLA5MaF6bNrmZ9n/tLWpvUlPoBaXrShSJiBqk15X0An9bVu+TpSC+U1Dcv2hzYB1goIt4DbgSezcu2iIgTI2JcY6K1nuJpTCssIt6XNAL4Aqnp+yWlq3f9mtR0ejzpS/7rETGxcZHa7KpJ2EcChwDDI2K8pD8BRwPPSfqAdOrQro2L1FqTpyM9B/iUpCkRcaakgaREvkceSHgz6XvcLSQV4YFoFZa/AOYiJegfk5pPz8iTp8wbEe9JmisifJ5uISQtBrwVEdNzpTYC+Gptt4akL5BGiK8MXBoRzzYmWmuNpL65JQRJlwDrkc7YmCLpFNK4hP3zD7FPJlOxOZ+TdkXlCmxHYDzwFHAF6Tzs8aTJNHYnDVZ6x4NZyiBpRVLlfDbwIWmClNuAL0TE200zm0kaGBGTGxmrtS9/nm9FxOv5HPrVSLMPTpH0I1IL2WbABxHxcSNjtZ7jPu0KkrQ36fKahwCLAp+LiKnAwaSJNRYlzUP9thN2OSJiPGm0/2rANpGuhf0Y8HNJ/XLCPgi4UtI8noO696kZJb4h6dzr/5O0YEQcRuq/vkHSohFxArB3HizqhF0hrrQrJs87/QXSNXbXB/YgzTE9XdJyEfF8/oKf3tBArcNqRxjnx6cCywKXkC728Q3gs6Sqe2dgP5+213vl7osTSK1fRwNjgOMi4k1JV5OmFt7Cf6PV5KRdIZKOAOYmjRb+MfBwRDSd8vV1YEXgJPePlaPZKPHdgVcj4u+STiSdX38j8GdSy8p7wNMR8VzDArZ25Yu3PBwRv5Q0N3AVaQKcvSPiHaXL5I5pZIzWOB49XhH5KkAHALtHxERJywKrS1oG2Il0ytdXnLDLUpOwjyVdMOKrefkPJB1PuuSmgJs8oLB3k7QD0J90gZ4FlK6L/U7+sf0ocBLpXPoxnumsutynXQGSBgDbA/8LfCDpcNJgs3VJ01huQUrYPsezELX90ZLWJHVzbEI6lWtrSftHxFmk+eN3Ip0lYL2UpHWBo0jdVs8AGwLrS1qANLnRn4AdcouYZzqrMFfaFRAR0yTdSbrQxwTSl8KLwDWkeYs/cv9YOZo1ie9IGv0/iXR61yukqUgXk7RYRPwwjxZ/r3ERW3P5oi3rRsTN+dS8bwEfR8Sjef3KpIGh3yRdcW0XYCvSvAlWYe7TrghJ8wBrAf/MA1r2IX0p7Ogv9DJJ2gb4Pmk+8cWBrwG/joinJB0IDI2I0xsZo7VM0vqkls6ncxP4QaQuqosi4pK8zUqkFpJ3gDWAs0gX63mqQWFbL+CkXTFK11E+kPTL/sseRVwmSRsBNwPfiohrm637GnAkaZS4uzx6qVxt3wX8OCJ+K2k/0lSlf4uIK2q2WwK4Fjg6IsY2JlrrLdw8Xj3zAB8De/kXezlaGHg0mjT17MmSfhcRH+SxC8sA25Fmy3LC7qUkLQ58hXQWx8GSPo6IyyR9TOq7VkRcDhARr0raPiKmNTJm6x1caVeQR56WpVkf9rakaymPASYDZwArkc4KeE9Sf6Cvv+B7t3wRkGuAl0gzEV4KnB0RV0v6KvBopEumms3ESdusEPkUrp2BUcDGpAk4/kGq1jYiTbjhZN2LSRoMDIiIf+a5/y8FfgIEcBlwWm3TuFlzPuXLrACSVgfWjIjPka6b/DZwP+nUvROAvwGDGhehtUfSfMD3gJ/lgYJNn+HSEXE/aSDa8w0M0QrgStusl5O0AbApaWrSxUnn7e4SER9J2gv4Y0S82cAQrYMkLQisTbqoy83A50jdGztExDN5G3dfWatcaZv1YnkSlc+RkvZE0jnYx+aEfQBpwpx5GhehzY58EZ77gd2AkaSLgCxBGqfQtI0TtrXKlbZZL1VzTfN+wO+BKaTJcZYHXiMl8r08Srxsklb2Nc2to5y0zXohSVuSKuyREXF7nkhlTdJ5vYuTLp86OiJebGCY1gmS+tReVtPN4tYRPk/brHd6gVRR/yTPjDUd2BV4ICL+0sjArGs0vw62E7Z1hCtts14sz0G9N+mSqicA1wP7AtP9JW9WPU7aZr1cvqaygOOB69z/aVZdTtpmvZz7Os2siZO2mZlZIXyetpmZWSGctM3MzArhpG1mZlYIJ20zM7NCOGmbmZkVwknbrBtImiFpjKQnJF0vad5O7OsySXvk+xfny3S2tu0Wkjap4xgv5Os7d2h5s23enc1jnZKvDW5ms8lJ26x7TIuIdSNiTeBD4LDalZL61rPTiDg4Ip5sY5MtgNlO2mZWBidts+73N2DFXAX/WdL/AY9L6ivpp5JGShor6VBIk6lIOl/Sk5LuIF0ghLzuPknD8v3tJI2W9JikeyUtS/pxcEyu8j8raZCkG/MxRkraND93MUn3SHpU0q9JM661SdLvJD0iaZykQ5qt+1mO5V5Jg/KyFSTdlZ/zN0mrdsm7aVZhvmCIWTfKl9XcnnR1LoANgTUj4vmc+N6KiA3yVKUPSLoHWA9YBViLdK3lJ4HfNtvvIOA3wOZ5X4tGxJuSLgTejYiz8nb/B/w8Iu6XtAxwN7AacDJwf0ScJmlHYKYk3IqD8jEGACMl3RgRb5CuBT06Io6TdFLe91HARcBhEfGcpM8AvwS2rONtNLPMSdusewyQNCbf/xtwCanZ+uGIeD4v/wKwdlN/NbAQsBKwOXBNRMwAJkn6Uwv73wj4a9O+IuLNVuLYGlhd+qSQXlDSAvkYX8zPvUPSlA68pqMl7Z7vL51jfQP4GLg2L78KuEnS/Pn1Xl9z7Lk7cAwza4OTtln3mBYR69YuyMlrau0i4BsRcXez7XYA2ptfWB3YBlIX2MYRMa2FWDo8h7GkLUg/ADaOiPck3QfM08rmkY/7n+bvgZl1jvu0zRrnbuBwSXNBugynpPmAvwJ75z7vpYDPt/DcB4HPSVouP3fRvPwdYIGa7e4hNVWTt1s33/0rsE9etj2wSDuxLgRMyQl7VVKl36QP0NRa8BVSs/vbwPOS9szHkKR12jmGmbXDSduscS4m9VePlvQE8GtS69fNwHPA48CvgL80f2JEvE7qh75J0mP8t3n6NmD3poFowNHAsDzQ7Un+O4r9VGBzSaNJzfT/bifWu4B+ksYCpwMP1aybCqwh6RFSn/Vpefk+wNdyfOOAXTvwnphZG3yVLzMzs0K40jYzMyuEk7aZmVkhnLTNupikuSVdK2m8pH/kSU9a2m547mseJ+knNcsPk/R47pe+v2naUkmfyhOVjMnPOazmOZfkSVbGSrohn3LVFa9lF0nfreN5n0wC0xMkrZ/fs/GSfqGa88yabXdC3uYZSdvWLO8v6SJJz0p6WtKXatbtpTTRzbh83juSPp8/h6bb+5J26/YXapXnPm2rBEn9ImJ6Dx3rCGDtiDhM0t7A7hExvNk2iwGPAutHxOuSLgeuiIh7JS2YR18jaRfgiIjYTlJ/0t/sBzkpPwFsEhGTmj3nbOC1iDizJ15vS/IpYcdHxKgeOt7DwDdJA+TuBH4REb9vts3qwDWkCW4GA38EVo6IGZJOBfpGxImS+gCLRsRkSSsB1wFbRsQUSYtHxGvN9rsoMB4YGhHvdfNLtYpzpW0NpVamxlSzKTrzsvklXZorqrFN1ZBqLlghaQ9Jl+X7l0k6W9KfgR9L2lDS35Wm7vy7pFXydn0lnVWz329I2krSzTX73UbSTR18WbsCl+f7NwBbtVD5LQ88m0eBQ0ogXwJoSr7ZfOTzqSPiw4j4IC+fm5q/35qELWBA03MkDZN0cfMAJS2bK8qLlS5qcrWkrSU9IOk5SRvm7Q6QdH6+v2fe9jFJf23tvWvhWL+SNCp/xqfWLD8zV7BjJZ3V2jHao3Ra3IIR8WCkKuQKYLcWNt0VGBERH+RJacaTEjjAQcCP8nv5cURMzsu/DlwQEVPyuteY1R7A752wrSd4chVrtFmmxiQlo5mm6Mzb/i9p2s+1ACS1d24xwMrA1rmaWjDvc7qkrYEzSInyEGA5YL28blFgCnCBpEE5sR4IXJqPey1pmtHmzo6IK4AhwEsAeX9vAYsBk2u2HQ+sqtR0PoGUZPo3rZR0JHBsXrZlzfKlgTuAFYFvR8SkmnWXAjuQTiM7Lh9/FHBwK+/NisCe+fWPJJ1jvRmwC/A9Zk18JwHbRsRESQvnZS29d819P3/GfYF7Ja2dX/PuwKoRETX7m+UY+cfVtS3sF9IFUobk/TWZkJc1N4SZT1WbAAypOfbpSpPI/BM4KiJeJf37QdIDQF/glIi4i5ntDZzdSnxmXcpJ2xqtpakxB9HyFJ1bk74gycs7MvXm9Xk6UEgThFyemzwDmKtmvxc2NZ83HU/SlcC+ORluDHw1r5+pqbsFLfWnztQPlZtaDyclo4+Bv5Oq76b1F5B+NHwFOBHYPy9/iTT16WDgd5JuyMmFiDgwJ8bzgOHkHxlteD4iHs+vdRxwb06gjwPLtrD9A8Blkq4DmlodWnzvmtkrt6L0A5YCVif9sHgfuFjpoii3t3aMiHgGWLe1F9FCKwa0PNtba9v1A4YCD0TEsZKOBc4C9svrViL9OBgK/E3SmhHxn3zspUhzxN89667Nup6bx61hNPPUmOuQ+njnofUpOltbXrus+dSatdOGng78OV8uc+eabVvb76XAvsCXScl/eo77Ws08CKnp9tX8vAmkHyBNFwxZCJglmUXEbRHxmYjYGHiGNKFKcyNooak3V9jjgM82Wz6D9EPgS82f04IPau5/XPP4Y1r4QR8Rh5F+QCwNjFHql29zOlWlGduOB7aKiLVJrQTz5PdyQ+DG/Pruau0YklZp5f0ek6vkCaSE2mQoMIlZffK5NNvuDeA90qQ2ANcDn655zi0R8VH+EfkMKYk32Qu4OSI+au09MOtKTtrWSK1NjdnaFJ3Np+Rsah5/VdJqSgOImqr21o43Md8/oGb5PcBhOcF+crycGCeRkshlTRtHxPB8rezmtyvyJreSK2NSf+efooURn5IWr3kdR5BmSCO3BDTZkZzMJQ3N3QhNz9kUeEbJinm5SD9Ins6PN5R0BV1A0goR8Y+IOInU1L80rbx3NRYk/XB6S9ISpCueoTSQbqGIuBP4FrmSbukYEfFMK+/3uhHxn4h4GXhH0kb59X8VuKWFl3AraXrYufO/rZVIF3AJ0kxyW+TttiK1BAD8jjyNrKSBpObyf9Xs88ukwW1mPcLN49ZId5G+8MeSKpiHIE3RmZtTb8qJ+DVgG+AHpCbjJ4AZpKk4bwK+S2pefYk0orq1051+QmoePxaovXLWxaQv47GSPiL1p5+f110NDIqIJ+m4S4ArJY0nVdifNOlLGhP/vYjGufrvfNynRcSz+f5Ruc/9I1LfetMPgNWAn0kKUoV7VkQ8nt+jy3OfvYDHgMPzc5YBZrpYSCf8NP+gEHBvPs4TtP7eERGPSXqU1CrwL1LzN6T50W+R1NSyckwbx+iIw0k/rAYAv8+3ptH3wyLipIgYl5vdnwSmA0fWdJ18h/SZnQM0jWGA1Oz9BaUpYGeQxhG8kfe9LOmHyyzTzJp1F5/yZdYGpZHTj0bEJY2OpR6SfgpcGRFjGx2LmXWek7ZZK5QugDEV2KbmVCszs4Zx0jYzMyuEB6KZmZkVwknbzMysEE7aZmZmhXDSNjMzK4STtpmZWSGctM3MzArx/8JpoYjLihYHAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "cm_plot = plot_confusion_matrix(cm, data.target_names, normalize=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "# Save confusion matrix image\n", + "confusion_matrix_image = './reports/confusion_matrix.png'\n", + "cm_plot.savefig(confusion_matrix_image)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LineaArtifact(name='plot-confusion-matrix', _version=1)" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#save confusion matrix to lineapy\n", + "lineapy.save(plot_confusion_matrix, \"plot-confusion-matrix\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#commenting for change\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/step-0-prototype.ipynb b/notebooks/step-0-prototype.ipynb index 3f7fee6c..608a10f2 100644 --- a/notebooks/step-0-prototype.ipynb +++ b/notebooks/step-0-prototype.ipynb @@ -383,7 +383,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.9.2" }, "toc": { "base_numbering": 1, diff --git a/notebooks/step-1-organize-ml-project.ipynb b/notebooks/step-1-organize-ml-project.ipynb index 3a115fea..dcbbae38 100644 --- a/notebooks/step-1-organize-ml-project.ipynb +++ b/notebooks/step-1-organize-ml-project.ipynb @@ -1,5 +1,25 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# test code\n", + "\n", + "def add(a,b):\n", + " added = a + b\n", + " retur" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -459,7 +479,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.10.12" }, "toc": { "base_numbering": 1, diff --git a/notebooks/step-2-create-config-file.ipynb b/notebooks/step-2-create-config-file.ipynb index a95d4434..364049ef 100644 --- a/notebooks/step-2-create-config-file.ipynb +++ b/notebooks/step-2-create-config-file.ipynb @@ -480,7 +480,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.10.12" }, "toc": { "base_numbering": 1, diff --git a/notebooks/step-3-reusable-code.ipynb b/notebooks/step-3-reusable-code.ipynb new file mode 100644 index 00000000..abe9652c --- /dev/null +++ b/notebooks/step-3-reusable-code.ipynb @@ -0,0 +1,786 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:17:31.460557Z", + "start_time": "2019-06-16T21:17:29.395297Z" + } + }, + "outputs": [], + "source": [ + "import itertools\n", + "import joblib\n", + "import json\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.metrics import confusion_matrix, f1_score\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split\n", + "import yaml" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/jenif/course-ds-base\n" + ] + } + ], + "source": [ + "# Go to project root folder\n", + "%cd .." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Config" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'base': {'random_state': 42},\n", + " 'data': {'dataset_csv': 'data/raw/iris.csv',\n", + " 'features_path': 'data/processed/featured_iris.csv',\n", + " 'test_size': 0.2,\n", + " 'testset_path': 'data/processed/test_iris.csv',\n", + " 'trainset_path': 'data/processed/train_iris.csv'},\n", + " 'reports': {'confusion_matrix_image': 'reports/confusion_matrix.png',\n", + " 'metrics_file': 'reports/metrics.json'},\n", + " 'train': {'clf_params': {'C': 0.001,\n", + " 'max_iter': 100,\n", + " 'multi_class': 'multinomial',\n", + " 'solver': 'lbfgs'},\n", + " 'model_path': 'models/model.joblib'}}\n" + ] + } + ], + "source": [ + "# Read config\n", + "import pprint\n", + "\n", + "with open('params.yaml') as conf_file:\n", + " config = yaml.safe_load(conf_file)\n", + "\n", + "pprint.pprint(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:17:31.485189Z", + "start_time": "2019-06-16T21:17:31.473720Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)target
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
\n", + "
" + ], + "text/plain": [ + " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n", + "0 5.1 3.5 1.4 0.2 \n", + "1 4.9 3.0 1.4 0.2 \n", + "2 4.7 3.2 1.3 0.2 \n", + "3 4.6 3.1 1.5 0.2 \n", + "4 5.0 3.6 1.4 0.2 \n", + "\n", + " target \n", + "0 0 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 0 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get data \n", + "\n", + "import pandas as pd\n", + "from sklearn.datasets import load_iris\n", + "\n", + "data = load_iris(as_frame=True)\n", + "dataset = data.frame\n", + "dataset.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0: setosa\n", + "1: versicolor\n", + "2: virginica\n" + ] + }, + { + "data": { + "text/plain": [ + "[None, None, None]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# print labels for target values \n", + "\n", + "[print(f'{target}: {label}') for target, label in zip(data.target.unique(), data.target_names)]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:17:32.328046Z", + "start_time": "2019-06-16T21:17:32.323611Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['sepal_length', 'sepal_width', 'petal_length', 'petal_width']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# feature names\n", + "\n", + "dataset.columns = [colname.strip(' (cm)').replace(' ', '_') for colname in dataset.columns.tolist()]\n", + "\n", + "feature_names = dataset.columns.tolist()[:4]\n", + "feature_names" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Save raw data\n", + "dataset.to_csv(config['data']['dataset_csv'], index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Features engineering" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:02.150708Z", + "start_time": "2019-06-16T21:21:02.144518Z" + } + }, + "outputs": [], + "source": [ + "dataset['sepal_length_to_sepal_width'] = dataset['sepal_length'] / dataset['sepal_width']\n", + "dataset['petal_length_to_petal_width'] = dataset['petal_length'] / dataset['petal_width']\n", + "\n", + "dataset = dataset[[\n", + " 'sepal_length', 'sepal_width', 'petal_length', 'petal_width',\n", + "# 'sepal_length_in_square', 'sepal_width_in_square', 'petal_length_in_square', 'petal_width_in_square',\n", + " 'sepal_length_to_sepal_width', 'petal_length_to_petal_width',\n", + " 'target'\n", + "]]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:02.987144Z", + "start_time": "2019-06-16T21:21:02.976092Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthsepal_length_to_sepal_widthpetal_length_to_petal_widthtarget
05.13.51.40.21.4571437.00
14.93.01.40.21.6333337.00
24.73.21.30.21.4687506.50
34.63.11.50.21.4838717.50
45.03.61.40.21.3888897.00
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width \\\n", + "0 5.1 3.5 1.4 0.2 \n", + "1 4.9 3.0 1.4 0.2 \n", + "2 4.7 3.2 1.3 0.2 \n", + "3 4.6 3.1 1.5 0.2 \n", + "4 5.0 3.6 1.4 0.2 \n", + "\n", + " sepal_length_to_sepal_width petal_length_to_petal_width target \n", + "0 1.457143 7.0 0 \n", + "1 1.633333 7.0 0 \n", + "2 1.468750 6.5 0 \n", + "3 1.483871 7.5 0 \n", + "4 1.388889 7.0 0 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# Save features\n", + "dataset.to_csv(config['data']['features_path'], index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Split dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:07.438133Z", + "start_time": "2019-06-16T21:21:07.431649Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "((120, 7), (30, 7))" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_dataset, test_dataset = train_test_split(\n", + " dataset, test_size=config['data']['test_size'],\n", + " random_state=config['base']['random_state']\n", + ")\n", + "train_dataset.shape, test_dataset.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Save train and test sets\n", + "train_dataset.to_csv(config['data']['trainset_path'])\n", + "test_dataset.to_csv(config['data']['testset_path'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Train" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:10.932148Z", + "start_time": "2019-06-16T21:21:10.927844Z" + } + }, + "outputs": [], + "source": [ + "# Get X and Y\n", + "\n", + "y_train = train_dataset.loc[:, 'target'].values.astype('int32')\n", + "X_train = train_dataset.drop('target', axis=1).values.astype('float32')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:55.427365Z", + "start_time": "2019-06-16T21:21:55.416431Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression(C=0.001, multi_class='multinomial', random_state=42)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create an instance of Logistic Regression Classifier CV and fit the data\n", + "\n", + "logreg = LogisticRegression(\n", + " **config['train']['clf_params'],\n", + " random_state=config['base']['random_state']\n", + ")\n", + "logreg.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['models/model.joblib']" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "joblib.dump(logreg, config['train']['model_path'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evaluate" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:55.875303Z", + "start_time": "2019-06-16T21:21:55.864724Z" + } + }, + "outputs": [], + "source": [ + "from src.report.visualization import plot_confusion_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:56.090756Z", + "start_time": "2019-06-16T21:21:56.086966Z" + } + }, + "outputs": [], + "source": [ + "# Get X and Y\n", + "\n", + "y_test = test_dataset.loc[:, 'target'].values.astype('int32')\n", + "X_test = test_dataset.drop('target', axis=1).values.astype('float32')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:56.270245Z", + "start_time": "2019-06-16T21:21:56.265054Z" + } + }, + "outputs": [], + "source": [ + "prediction = logreg.predict(X_test)\n", + "cm = confusion_matrix(prediction, y_test)\n", + "f1 = f1_score(y_true = y_test, y_pred = prediction, average='macro')" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:56.493617Z", + "start_time": "2019-06-16T21:21:56.489929Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9305555555555555" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# f1 score value\n", + "f1" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# Save metrics\n", + "metrics = {\n", + " 'f1': f1\n", + "}\n", + "\n", + "with open(config['reports']['metrics_file'], 'w') as mf:\n", + " json.dump(\n", + " obj=metrics,\n", + " fp=mf,\n", + " indent=4\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:21:56.966279Z", + "start_time": "2019-06-16T21:21:56.726149Z" + } + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'np' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m--------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcm_plot\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mplot_confusion_matrix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtarget_names\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnormalize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/course-ds-base/src/report/visualization.py\u001b[0m in \u001b[0;36mplot_confusion_matrix\u001b[0;34m(cm, target_names, title, cmap, normalize)\u001b[0m\n\u001b[1;32m 39\u001b[0m \"\"\"\n\u001b[1;32m 40\u001b[0m \u001b[0maccuracy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcm\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcm\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 41\u001b[0;31m \u001b[0mmisclass\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0maccuracy\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcmap\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'np' is not defined" + ] + } + ], + "source": [ + "cm_plot = plot_confusion_matrix(cm, data.target_names, normalize=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'cm_plot' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m--------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Save confusion matrix image\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mcm_plot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msavefig\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'reports'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'confusion_matrix_image'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'cm_plot' is not defined" + ] + } + ], + "source": [ + "# Save confusion matrix image\n", + "cm_plot.savefig(config['reports']['confusion_matrix_image'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/step-4-build-ml-pipeline.ipynb b/notebooks/step-4-build-ml-pipeline.ipynb new file mode 100644 index 00000000..230d2b08 --- /dev/null +++ b/notebooks/step-4-build-ml-pipeline.ipynb @@ -0,0 +1,437 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2019-06-16T21:17:31.460557Z", + "start_time": "2019-06-16T21:17:29.395297Z" + } + }, + "outputs": [], + "source": [ + "# Assist with the automatic loading of the Python module in this jupyter notebook\n", + "%load_ext autoreload \n", + "%autoreload 2\n", + "\n", + "import itertools\n", + "import joblib\n", + "import json\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.metrics import confusion_matrix, f1_score\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split\n", + "import yaml\n", + "\n", + "from src.report.visualization import plot_confusion_matrix " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/mnt/c/Users/MR-BEST/course-ds-base-root/course-ds-base\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/mr-best/.local/lib/python3.10/site-packages/IPython/core/magics/osm.py:417: UserWarning: using dhist requires you to install the `pickleshare` library.\n", + " self.shell.db['dhist'] = compress_dhist(dhist)[-100:]\n" + ] + } + ], + "source": [ + "# Go to project root folder\n", + "\n", + "%cd ..\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Config" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "data load completed successfully\n" + ] + } + ], + "source": [ + "# Load the function to load raw data\n", + "\n", + "from src.stages.data_load import data_load\n", + "\n", + "# Call function\n", + "data_load(config_file = 'params.yaml')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "data load completed successfully\n" + ] + } + ], + "source": [ + "# Shell prompt for running \"load_data\" function\n", + "\n", + "!python3 src/stages/data_load.py --config=params.yaml" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "iris.csv\n" + ] + } + ], + "source": [ + "%%bash \n", + "\n", + "# View the Raw Iris dataset saved \n", + "\n", + "ls data/raw" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extract feautures using python module at src/stages/featurize.py" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-01-15 09:30:00,157 — FEATURIZE — INFO — Load the raw data\n", + "2024-01-15 09:30:00,166 — FEATURIZE — INFO — Curate by extraction of features from the dataset\n", + "2024-01-15 09:30:00,169 — FEATURIZE — INFO — Save features\n" + ] + } + ], + "source": [ + "# Load,curate and save features (x1,...xn) the function to load raw data\n", + "\n", + "from src.stages.featurize import featurize\n", + "\n", + "# Call function\n", + "featurize(config_path = 'params.yaml')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-01-15 09:30:21,699 — FEATURIZE — INFO — Load the raw data\n", + "2024-01-15 09:30:21,706 — FEATURIZE — INFO — Curate by extraction of features from the dataset\n", + "2024-01-15 09:30:21,708 — FEATURIZE — INFO — Save features\n" + ] + } + ], + "source": [ + "# Shell prompt for running \"load_data\" function. Load,curate and save features (x1,...xn)\n", + "\n", + "!python src/stages/featurize.py --config=params.yaml" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Split dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-01-15 09:31:43,126 — DATA_SPLIT — INFO — Load features\n", + "2024-01-15 09:31:43,137 — DATA_SPLIT — INFO — Split features into train and test sets\n", + "2024-01-15 09:31:43,140 — DATA_SPLIT — INFO — Save features for training and testing models\n" + ] + } + ], + "source": [ + "# Call the Split module by loading saved features from local memory, splitting into train and test sets; and saving completion\n", + "\n", + "from src.stages.data_split import data_split\n", + "\n", + "# Call function\n", + "data_split(config_path = 'params.yaml')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-01-15 09:31:51,795 — DATA_SPLIT — INFO — Load features\n", + "2024-01-15 09:31:51,804 — DATA_SPLIT — INFO — Split features into train and test sets\n", + "2024-01-15 09:31:51,805 — DATA_SPLIT — INFO — Save features for training and testing models\n" + ] + } + ], + "source": [ + "# Shell prompt for running \"data split\" function\n", + "\n", + "!python3 src/stages/data_split.py --config=params.yaml" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Train" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-01-15 10:22:31,528 — TRAIN — INFO — Get model/estimator name\n", + "2024-01-15 10:22:31,529 — TRAIN — INFO — The name of Model/Estimator: logreg\n", + "2024-01-15 10:22:31,530 — TRAIN — INFO — Load train dataset\n", + "2024-01-15 10:22:31,544 — TRAIN — INFO — Train model/estimator\n", + "Fitting 3 folds for each of 1 candidates, totalling 3 fits\n", + "2024-01-15 10:22:31,577 — TRAIN — INFO — Best score: 0.857564307288572\n", + "2024-01-15 10:22:31,578 — TRAIN — INFO — Trained Model Saved\n" + ] + } + ], + "source": [ + "# Name model,Load, train and save model/estimator\n", + "\n", + "from src.stages.train import train_model\n", + "\n", + "# Call function\n", + "train_model(config_path = 'params.yaml')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-01-15 10:22:21,429 — TRAIN — INFO — Get model/estimator name\n", + "2024-01-15 10:22:21,429 — TRAIN — INFO — The name of Model/Estimator: logreg\n", + "2024-01-15 10:22:21,429 — TRAIN — INFO — Load train dataset\n", + "2024-01-15 10:22:21,440 — TRAIN — INFO — Train model/estimator\n", + "Fitting 3 folds for each of 1 candidates, totalling 3 fits\n", + "2024-01-15 10:22:21,463 — TRAIN — INFO — Best score: 0.857564307288572\n", + "2024-01-15 10:22:21,463 — TRAIN — INFO — Trained Model Saved\n" + ] + } + ], + "source": [ + "# Shell prompt for running \"train model\" function\n", + "\n", + "!python3 src/stages/train.py --config=params.yaml" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 4. EVALUATE MODEL : load dataset,test, Evaluate with F1 and CM and save model/estimator" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-01-15 10:51:16,267 — EVALUATE — INFO — Load model\n", + "2024-01-15 10:51:16,276 — EVALUATE — INFO — Load test dataset\n", + "2024-01-15 10:51:16,288 — EVALUATE — INFO — Evaluate (build report)\n", + "2024-01-15 10:51:16,295 — EVALUATE — INFO — Save metrics\n", + "2024-01-15 10:51:16,302 — EVALUATE — INFO — F1 metrics file saved to : reports/metrics.json\n", + "2024-01-15 10:51:16,303 — EVALUATE — INFO — Save confusion matrix\n", + "2024-01-15 10:51:16,474 — EVALUATE — INFO — Confusion matrix saved to : reports/confusion_matrix.png\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAqwAAAJxCAYAAACHYuDBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAA9hAAAPYQGoP6dpAAByIUlEQVR4nO3dd3gU5dfG8XsTSG/UJEAMPYTehBcBkS7SURAFpKPSpBeRKkVQpEuTXgQBQaQpAtKLlEgPLSDSixBqEpJ5/8DszzWACWTd3eT74ZrrYp+ZfebsssaTs2eeMRmGYQgAAACwU062DgAAAAB4FhJWAAAA2DUSVgAAANg1ElYAAADYNRJWAAAA2DUSVgAAANg1ElYAAADYNRJWAAAA2DUSVgAAANg1ElYAKc7JkydVrVo1+fr6ymQyacWKFck6/9mzZ2UymTR79uxknTclyJ49u1q0aGHrMACkMCSsAKzi9OnTev/995UzZ065ubnJx8dHZcuW1bhx4/TgwQOrnrt58+Y6dOiQhg0bpnnz5qlkyZJWPV9KdPToUQ0aNEhnz561dSgAIJNhGIatgwCQsqxevVoNGzaUq6ur3nvvPRUsWFDR0dHatm2bli1bphYtWmjatGlWOfeDBw/k4eGhfv36aejQoVY5h2EYioqKUtq0aeXs7GyVc9ja0qVL1bBhQ23atEmvvfZaop8XFRUlJycnpU2b1nrBAUh10tg6AAApS0REhBo3bqzg4GBt3LhRgYGB5n0dOnTQqVOntHr1aqud/9q1a5IkPz8/q53DZDLJzc3NavM7GsMw9PDhQ7m7u8vV1dXW4QBIgWgJAJCsRo0apbt372rGjBkWyWq83Llz66OPPjI/fvTokT799FPlypVLrq6uyp49uz7++GNFRUVZPC979uyqVauWtm3bplKlSsnNzU05c+bU3LlzzccMGjRIwcHBkqSePXvKZDIpe/bskqQWLVqY//53gwYNkslkshhbv369ypUrJz8/P3l5eSkkJEQff/yxef/Telg3btyo8uXLy9PTU35+fqpbt66OHTv2xPOdOnVKLVq0kJ+fn3x9fdWyZUvdv3//6W/sX1577TUVLFhQBw8eVIUKFeTh4aHcuXNr6dKlkqTNmzerdOnScnd3V0hIiH7++WeL5587d07t27dXSEiI3N3dlSFDBjVs2NDiq//Zs2erYcOGkqSKFSvKZDLJZDLpl19+kfS/f4sff/xRJUuWlLu7u6ZOnWreF9/DahiGKlasqEyZMunq1avm+aOjo1WoUCHlypVL9+7d+9fXDAAkrACS1Q8//KCcOXPqlVdeSdTxbdq00YABA1S8eHGNGTNGFSpU0IgRI9S4ceMEx546dUpvvfWWqlatqtGjRytdunRq0aKFjhw5Iklq0KCBxowZI0l65513NG/ePI0dOzZJ8R85ckS1atVSVFSUhgwZotGjR6tOnTravn37M5/3888/q3r16rp69aoGDRqkbt26aceOHSpbtuwT+0AbNWqkO3fuaMSIEWrUqJFmz56twYMHJyrGP//8U7Vq1VLp0qU1atQoubq6qnHjxlq8eLEaN26sN954Q5999pnu3bunt956S3fu3DE/99dff9WOHTvUuHFjjR8/Xh988IE2bNig1157zZwwv/rqq+rcubMk6eOPP9a8efM0b948hYaGmucJDw/XO++8o6pVq2rcuHEqWrRogjhNJpNmzpyphw8f6oMPPjCPDxw4UEeOHNGsWbPk6emZqNcMIJUzACCZ3L5925Bk1K1bN1HHh4WFGZKMNm3aWIz36NHDkGRs3LjRPBYcHGxIMrZs2WIeu3r1quHq6mp0797dPBYREWFIMj7//HOLOZs3b24EBwcniGHgwIHG338UjhkzxpBkXLt27alxx59j1qxZ5rGiRYsamTNnNm7cuGEe++233wwnJyfjvffeS3C+Vq1aWcxZv359I0OGDE89Z7wKFSoYkoyFCxeax44fP25IMpycnIxdu3aZx3/88ccEcd6/fz/BnDt37jQkGXPnzjWPLVmyxJBkbNq0KcHx8f8W69ate+K+5s2bW4xNnTrVkGTMnz/f2LVrl+Hs7Gx06dLlX18rAMSjwgog2URGRkqSvL29E3X8mjVrJEndunWzGO/evbskJeh1zZ8/v8qXL29+nClTJoWEhOjMmTPPHfM/xfe+fv/994qLi0vUcy5duqSwsDC1aNFC6dOnN48XLlxYVatWNb/Ov/t7xVGSypcvrxs3bpjfw2fx8vKyqECHhITIz89PoaGhKl26tHk8/u9/f3/c3d3Nf4+JidGNGzeUO3du+fn5af/+/Yl4tY/lyJFD1atXT9Sx7dq1U/Xq1dWpUyc1a9ZMuXLl0vDhwxN9LgAgYQWQbHx8fCTJ4ivoZzl37pycnJyUO3dui/GAgAD5+fnp3LlzFuMvvfRSgjnSpUunP//88zkjTujtt99W2bJl1aZNG/n7+6tx48b69ttvn5m8xscZEhKSYF9oaKiuX7+eoFfzn68lXbp0kpSo15ItW7YEfbe+vr4KCgpKMPbPOR88eKABAwYoKChIrq6uypgxozJlyqRbt27p9u3b/3rueDly5Ej0sZI0Y8YM3b9/XydPntTs2bMtEmcA+DckrACSjY+Pj7JkyaLDhw8n6Xn/TL6e5mlLSBmJWJ3vaeeIjY21eOzu7q4tW7bo559/VrNmzXTw4EG9/fbbqlq1aoJjX8SLvJanPTcxc3bq1EnDhg1To0aN9O233+qnn37S+vXrlSFDhkRXlCUlOeH85ZdfzBfSHTp0KEnPBQASVgDJqlatWjp9+rR27tz5r8cGBwcrLi5OJ0+etBi/cuWKbt26Zb7iPzmkS5dOt27dSjD+zyquJDk5Oaly5cr68ssvdfToUQ0bNkwbN27Upk2bnjh3fJzh4eEJ9h0/flwZM2a0m4uLli5dqubNm2v06NHmC9jKlSuX4L1J7C8RiXHp0iV16tRJ1apVU61atdSjR48nvu8A8DQkrACSVa9eveTp6ak2bdroypUrCfafPn1a48aNkyS98cYbkpTgSv4vv/xSklSzZs1kiytXrly6ffu2Dh48aB67dOmSli9fbnHczZs3Ezw3/gr4fy61FS8wMFBFixbVnDlzLBK/w4cP66effjK/Tnvg7OycoIo7YcKEBNXj+AT7SUl+UrVt21ZxcXGaMWOGpk2bpjRp0qh169aJqiYDgMSNAwAks1y5cmnhwoV6++23FRoaanGnqx07dmjJkiXmdTqLFCmi5s2ba9q0abp165YqVKigPXv2aM6cOapXr54qVqyYbHE1btxYvXv3Vv369dW5c2fdv39fkydPVt68eS0uNhoyZIi2bNmimjVrKjg4WFevXtVXX32lbNmyqVy5ck+d//PPP1eNGjVUpkwZtW7dWg8ePNCECRPk6+urQYMGJdvreFG1atXSvHnz5Ovrq/z582vnzp36+eeflSFDBovjihYtKmdnZ40cOVK3b9+Wq6urKlWqpMyZMyfpfLNmzdLq1as1e/ZsZcuWTdLjBLlp06aaPHmy2rdvn2yvDUDKRcIKINnVqVNHBw8e1Oeff67vv/9ekydPlqurqwoXLqzRo0erbdu25mO//vpr5cyZU7Nnz9by5csVEBCgvn37auDAgckaU4YMGbR8+XJ169ZNvXr1Uo4cOTRixAidPHnSImGtU6eOzp49q5kzZ+r69evKmDGjKlSooMGDB5svYnqSKlWqaN26dRo4cKAGDBigtGnTqkKFCho5cmSSL1CypnHjxsnZ2VkLFizQw4cPVbZsWfMasn8XEBCgKVOmaMSIEWrdurViY2O1adOmJCWsf/zxh7p27aratWurefPm5vEmTZpo2bJl6tWrl2rUqGFX7w8A+2Qy+E4GAAAAdoweVgAAANg1ElYAAADYNRJWAAAA2DUSVgAAANg1ElYAAADYNRJWAAAA2DXWYXUQcXFxunjxory9vZP1lokAAODpDMPQnTt3lCVLFjk52Ued7+HDh4qOjrba/C4uLnJzc7Pa/M+DhNVBXLx4UUFBQbYOAwCAVOn8+fPmu7XZ0sOHD+XunUF6dN9q5wgICFBERIRdJa0krA7C29tbkuRSYYBMaeznA4TU4/dvO9g6BAD4z92JjFTuHEHm/w/bWnR0tPTovlwLtJScXZL/BLHRunxklqKjo0lYkXTxbQCmNG4krLAJHx8fW4cAADZjd+14zi4yWSFhtdfbn5KwAgAAOBqTJGsk0XaWl8ezj+5hAAAA4CmosAIAADgak9PjzRrz2iH7jAoAAAD4CxVWAAAAR2MyWamH1T6bWKmwAgAAwK5RYQUAAHA09LACAAAA9oMKKwAAgKOhhxUAAACwH1RYAQAAHI6VeljttJZpn1EBAAAAf6HCCgAA4GhSWQ8rCSsAAICjYVkrAAAAwH5QYQUAAHA0qawlgAorAAAA7BoVVgAAAEdDDysAAABgP6iwAgAAOBp6WAEAAAD7QYUVAADA0dDDCgAAANgPKqwAAACOxmSyUoWVHlYAAAAgyaiwAgAAOBon0+PNGvPaISqsAAAAsGtUWAEAABxNKlslgIQVAADA0XDjAAAAAMB+UGEFAABwNKmsJcA+owIAAAD+QoUVAADA0dDDCgAAANgPKqwAAACOhh5WAAAAwH6QsAIAADia+B5Wa2xJtGXLFtWuXVtZsmSRyWTSihUrLPYbhqEBAwYoMDBQ7u7uqlKlik6ePJmkc5CwAgAA4Lndu3dPRYoU0aRJk564f9SoURo/frymTJmi3bt3y9PTU9WrV9fDhw8TfQ56WAEAAByNHfWw1qhRQzVq1HjiPsMwNHbsWH3yySeqW7euJGnu3Lny9/fXihUr1Lhx40SdgworAAAALERGRlpsUVFRzzVPRESELl++rCpVqpjHfH19Vbp0ae3cuTPR85CwAgAAOBor97AGBQXJ19fXvI0YMeK5wrx8+bIkyd/f32Lc39/fvC8xaAkAAACAhfPnz8vHx8f82NXV1YbRkLACAAA4ICv1sP715buPj49Fwvq8AgICJElXrlxRYGCgefzKlSsqWrRoEqMCAACA47CjZa2eJUeOHAoICNCGDRvMY5GRkdq9e7fKlCmT6HmosAIAAOC53b17V6dOnTI/joiIUFhYmNKnT6+XXnpJXbp00dChQ5UnTx7lyJFD/fv3V5YsWVSvXr1En4OEFQAAwNGYTFZa1irpFda9e/eqYsWK5sfdunWTJDVv3lyzZ89Wr169dO/ePbVr1063bt1SuXLltG7dOrm5uSX6HCSsAAAAeG6vvfaaDMN46n6TyaQhQ4ZoyJAhz30OElYAAABHY0c3Dvgv2GdUAAAAwF+osAIAADgaK1zRb57XDlFhBQAAgF2jwgoAAOBo6GEFAAAA7AcVVgAAAEdDDysAAABgP6iwAgAAOBp6WAEAAAD7QYUVAADA0aSyHlYSVgAAAAdjMplkSkUJKy0BAAAAsGtUWAEAABwMFVYAAADAjlBhBQAAcDSmvzZrzGuHqLACAADArlFhBQAAcDD0sAIAAAB2hAorAACAg6HCCjiwsgWzaumgujqzoK0erOuq2mVyJTimf7MyOrOwnW5+30mrR7ypXFn8/vtAkapM+WqSQnJnl5+Xm8q/Ulq/7tlj65CQivD5Q0pAwooUxdMtrQ5FXFOXSRufuL97w5JqX7eoOo//Wa92+Ub3Hsboh2EN5JrW+T+OFKnFkm8Xq3fPbur3yUDt3LNfhQsXUZ2a1XX16lVbh4ZUgM9fyhVfYbXGZo9IWJGi/LT3rAbP2aGVO04/cX+H+sU18ps9WrXrjA5HXFebz9cpMIOn6rySsBILJIfxY79Uy9Zt9V6LlgrNn18Tvpoidw8PzZk909ahIRXg84eUgoQVqUb2AF8FpvfUxgO/m8ci70fr1+OXVTo0iw0jQ0oVHR2tA/v3qVLlKuYxJycnVapURXt27bRhZEgN+PylbFRYgRQqIJ2HJOnqrfsW41dv3Zf/X/uA5HT9+nXFxsYqc2Z/i/HM/v66fPmyjaJCasHnDykJqwQAAAA4Gu50lbqdPXtWJpNJYWFhtg4Fyezyn48rq5n9LKupmf08dOXP+096CvBCMmbMKGdnZ129esVi/OqVKwoICLBRVEgt+PylbLQEACnU2cu3denmPVUsGmQe8/Zw0cv5ArT72EUbRoaUysXFRcWKl9CmjRvMY3Fxcdq0aYNK/V8ZG0aG1IDPH1KSFJuwLl26VIUKFZK7u7syZMigKlWq6N69e5Kkr7/+WqGhoXJzc1O+fPn01VdfmZ+XI0cOSVKxYsVkMpn02muvSXr8H/mQIUOULVs2ubq6qmjRolq3bp35edHR0erYsaMCAwPl5uam4OBgjRgxwrz/yy+/VKFCheTp6amgoCC1b99ed+/e/Q/eidTF0y2tCufMpMI5M0mSsgf4qHDOTArK5C1JmrR8v3q/U1o1/y+nCmTPoBk9quvSjXtPXVUAeFGdu3TTrBnTNX/uHB0/dkydO3yo+/fu6b3mLW0dGlIBPn8pl8lkrSqrrV/Zk6XIHtZLly7pnXfe0ahRo1S/fn3duXNHW7dulWEYWrBggQYMGKCJEyeqWLFiOnDggNq2bStPT081b95ce/bsUalSpfTzzz+rQIECcnFxkSSNGzdOo0eP1tSpU1WsWDHNnDlTderU0ZEjR5QnTx6NHz9eK1eu1LfffquXXnpJ58+f1/nz580xOTk5afz48cqRI4fOnDmj9u3bq1evXhbJ8t9FRUUpKirK/DgyMtK6b1oKUTyvv34a1dD8eNT7r0mS5q0/onajf9LoJXvl4ZZWEztXkZ+Xq3Ycuag6n3ynqJhYG0WMlK5ho7d1/do1DRk8QFcuX1bhIkX1/ap18vf3//cnAy+Izx9SCpNhGIatg0hu+/fvV4kSJXT27FkFBwdb7MudO7c+/fRTvfPOO+axoUOHas2aNdqxY4fOnj2rHDly6MCBAypatKj5mKxZs6pDhw76+OOPzWOlSpXSyy+/rEmTJqlz5846cuSIfv7550T1fyxdulQffPCBrl+//sT9gwYN0uDBgxOMu1YeLlMat3+dH0huf67qausQAOA/FxkZKf8Mvrp9+7Z8fHxsHY4iIyPl6+srv0bTZXJJ/hVujOj7uvVtW7t5vfFSZEtAkSJFVLlyZRUqVEgNGzbU9OnT9eeff+revXs6ffq0WrduLS8vL/M2dOhQnT799K+EIyMjdfHiRZUtW9ZivGzZsjp27JgkqUWLFgoLC1NISIg6d+6sn376yeLYn3/+WZUrV1bWrFnl7e2tZs2a6caNG7p//8kX+/Tt21e3b982b3+v1gIAAKQmKTJhdXZ21vr167V27Vrlz59fEyZMUEhIiA4fPixJmj59usLCwszb4cOHtWvXrhc6Z/HixRUREaFPP/1UDx48UKNGjfTWW29JerzyQK1atVS4cGEtW7ZM+/bt06RJkyQ97n19EldXV/n4+FhsAAAAUupbJSBF9rBKj/8hy5Ytq7Jly2rAgAEKDg7W9u3blSVLFp05c0ZNmjR54vPie1ZjY//X0+jj46MsWbJo+/btqlChgnl8+/btKlWqlMVxb7/9tt5++2299dZbev3113Xz5k3t27dPcXFxGj16tJycHv+O8O2331rjZQMAAKQ4KTJh3b17tzZs2KBq1aopc+bM2r17t65du6bQ0FANHjxYnTt3lq+vr15//XVFRUVp7969+vPPP9WtWzdlzpxZ7u7uWrdunbJlyyY3Nzf5+vqqZ8+eGjhwoHLlyqWiRYtq1qxZCgsL04IFCyQ9XgUgMDBQxYoVk5OTk5YsWaKAgAD5+fkpd+7ciomJ0YQJE1S7dm1t375dU6ZMsfG7BAAAHFYqu3FAikxYfXx8tGXLFo0dO1aRkZEKDg7W6NGjVaNGDUmSh4eHPv/8c/Xs2VOenp4qVKiQunTpIklKkyaNxo8fryFDhmjAgAEqX768fvnlF3Xu3Fm3b99W9+7ddfXqVeXPn18rV65Unjx5JEne3t4aNWqUTp48KWdnZ7388stas2aNnJycVKRIEX355ZcaOXKk+vbtq1dffVUjRozQe++9Z6u3CAAAwGGkyFUCUqL4qwJZJQC2wioBAFIje10lIN07M+RkhVUC4qLv689vWtvN642XIi+6AgAAQMqRIlsCAAAAUjJrXdFvr6sEUGEFAACAXaPCCgAA4GBSW4WVhBUAAMDRpLJlrWgJAAAAgF2jwgoAAOBgUltLABVWAAAA2DUqrAAAAA6GCisAAABgR6iwAgAAOBgqrAAAAIAdocIKAADgYKiwAgAAAHaECisAAICj4U5XAAAAgP2gwgoAAOBg6GEFAAAA7AgVVgAAAAeT2iqsJKwAAAAOJrUlrLQEAAAAwK5RYQUAAHA0LGsFAAAA2A8qrAAAAA6GHlYAAADAjlBhBQAAcDBUWAEAAAA7QoUVAADAwZhkpQqrnS4TQIUVAAAAdo0KKwAAgIOhhxUAAACwI1RYAQAAHA13ugIAAADsBxVWAAAAB5PaelhJWAEAABxMaktYaQkAAACAXaPCCgAA4GBMpsebNea1R1RYAQAAYNeosAIAADiYxxVWa/SwJvuUyYIKKwAAAOwaCSsAAICjMf2vjzU5t6TeOCA2Nlb9+/dXjhw55O7urly5cunTTz+VYRjJ+nJpCQAAAMBzGTlypCZPnqw5c+aoQIEC2rt3r1q2bClfX1917tw52c5DwgoAAOBg7GUd1h07dqhu3bqqWbOmJCl79uz65ptvtGfPnmSNi5YAAAAAWIiMjLTYoqKinnjcK6+8og0bNujEiROSpN9++03btm1TjRo1kjUeKqwAAAAOxtrrsAYFBVmMDxw4UIMGDUpwfJ8+fRQZGal8+fLJ2dlZsbGxGjZsmJo0aZKscZGwAgAAwML58+fl4+Njfuzq6vrE47799lstWLBACxcuVIECBRQWFqYuXbooS5Ysat68ebLFQ8IKAADgYJycTHJySv4Sq/HXnD4+PhYJ69P07NlTffr0UePGjSVJhQoV0rlz5zRixIhkTVjpYQUAAMBzuX//vpycLNNJZ2dnxcXFJet5qLACAAA4GGv3sCZW7dq1NWzYML300ksqUKCADhw4oC+//FKtWrVK1rhIWAEAAByMvSxrNWHCBPXv31/t27fX1atXlSVLFr3//vsaMGBAssZFwgoAAIDn4u3trbFjx2rs2LFWPQ8JKwAAgIOxl5aA/woXXQEAAMCuUWEFAABwMPbSw/pfocIKAAAAu0aFFQAAwMFQYQUAAADsCBVWAAAAB8MqAQAAAIAdocIKAADgYEyyUg+r7LPESoUVAAAAdo0KKwAAgIOhhxUAAACwI1RYAQAAHExqW4eVhBUAAMDB0BIAAAAA2BEqrAAAAA4mtbUEUGEFAACAXaPCCgAA4GDoYQUAAADsCBVWAAAAB0MPKwAAAGBHqLA6mN+/7SAfHx9bh4FU6PWJ220dAlKxdR3L2joEwL5YqYdV9llgpcIKAAAA+0aFFQAAwMHQwwoAAADYESqsAAAADoZ1WAEAAAA7QoUVAADAwdDDCgAAANgRKqwAAAAOJrX1sJKwAgAAOBhaAgAAAAA7QoUVAADAwVBhBQAAAOwIFVYAAAAHk9ouuqLCCgAAALtGhRUAAMDB0MMKAAAA2BEqrAAAAA6GHlYAAADAjlBhBQAAcDD0sAIAAAB2hAorAACAgzHJSj2syT9lsqDCCgAAALtGhRUAAMDBOJlMcrJCidUacyYHElYAAAAHw7JWAAAAgB2hwgoAAOBgWNYKAAAAsCNUWAEAAByMk+nxZo157REVVgAAANg1KqwAAACOxmSlflMqrAAAAEDSUWEFAABwMKzDCgAAANgRKqwAAAAOxvTXH2vMa4+osAIAAMCuUWEFAABwMKzDCgAAANgRKqwAAAAOxmQyWWUdVqus7ZoMSFgBAAAcDMtaAQAAAHaECisAAICDcTKZ5GSFcqg15kwOVFgBAABg16iwAgAAOBh6WAEAAAA7QoUVAADAwaS2Za2osAIAAMCuUWEFAABwMPSwAgAAAHYkURXWlStXJnrCOnXqPHcwAAAA+HepbR3WRCWs9erVS9RkJpNJsbGxLxIPAAAAYCFRCWtcXJy14wAAAEAimf7arDGvPXqhHtaHDx8mVxwAAADAEyU5YY2NjdWnn36qrFmzysvLS2fOnJEk9e/fXzNmzEj2AAEAAGApfh1Wa2z2KMkJ67BhwzR79myNGjVKLi4u5vGCBQvq66+/TtbgAAAAkJCTyXqbPUpywjp37lxNmzZNTZo0kbOzs3m8SJEiOn78eLIGBwAAAPt24cIFNW3aVBkyZJC7u7sKFSqkvXv3Jus5knzjgAsXLih37twJxuPi4hQTE5MsQQEAAODp7OXWrH/++afKli2rihUrau3atcqUKZNOnjypdOnSJWtcSU5Y8+fPr61btyo4ONhifOnSpSpWrFiyBQYAAAD7NnLkSAUFBWnWrFnmsRw5ciT7eZKcsA4YMEDNmzfXhQsXFBcXp++++07h4eGaO3euVq1alewBAgAAICFrXh8VGRlp8djV1VWurq4Jjlu5cqWqV6+uhg0bavPmzcqaNavat2+vtm3bJms8Se5hrVu3rn744Qf9/PPP8vT01IABA3Ts2DH98MMPqlq1arIGBwAAgP9eUFCQfH19zduIESOeeNyZM2c0efJk5cmTRz/++KM+/PBDde7cWXPmzEnWeJJcYZWk8uXLa/369ckaCAAAABLH2j2s58+fl4+Pj3n8SdVV6fE1TCVLltTw4cMlScWKFdPhw4c1ZcoUNW/ePNnieq6EVZL27t2rY8eOSXrc11qiRIlkCwoAAAC24+PjY5GwPk1gYKDy589vMRYaGqply5YlazxJTlj/+OMPvfPOO9q+fbv8/PwkSbdu3dIrr7yiRYsWKVu2bMkaIAAAACxZa83UpM5ZtmxZhYeHW4ydOHEiwcX5LyrJPaxt2rRRTEyMjh07pps3b+rmzZs6duyY4uLi1KZNm2QNDgAAAPara9eu2rVrl4YPH65Tp05p4cKFmjZtmjp06JCs50lyhXXz5s3asWOHQkJCzGMhISGaMGGCypcvn6zBAQAAICF7WYf15Zdf1vLly9W3b18NGTJEOXLk0NixY9WkSZNkjSvJCWtQUNATbxAQGxurLFmyJEtQAAAAcAy1atVSrVq1rHqOJLcEfP755+rUqZPFLbf27t2rjz76SF988UWyBgcAAICETFbc7FGiKqzp0qWzKBHfu3dPpUuXVpo0j5/+6NEjpUmTRq1atVK9evWsEigAAABSp0QlrGPHjrVyGAAAAEgsJ5NJTlboYbXGnMkhUQlrci78CgAAgBdjMlnn1qx2mq8+/40DJOnhw4eKjo62GEvMIrMAAABAYiX5oqt79+6pY8eOypw5szw9PZUuXTqLDbBHU76apJDc2eXn5abyr5TWr3v22DokpAKLWpXQL13KJtg+qpjT1qEhFeHnX8oUv6yVNTZ7lOSEtVevXtq4caMmT54sV1dXff311xo8eLCyZMmiuXPnWiNG4IUs+Xaxevfspn6fDNTOPftVuHAR1alZXVevXrV1aEjh3v/mNzWYtse8dV92WJK0+eR1G0eG1IKff0gpkpyw/vDDD/rqq6/05ptvKk2aNCpfvrw++eQTDR8+XAsWLLBGjMALGT/2S7Vs3VbvtWip0Pz5NeGrKXL38NCc2TNtHRpSuNsPHunm/RjzViZnel249UBhf0TaOjSkEvz8S7nie1itsdmjJCesN2/eVM6cj7/O8vHx0c2bNyVJ5cqV05YtW5I3OuAFRUdH68D+fapUuYp5zMnJSZUqVdGeXTttGBlSmzROJlXNl0lrjlDZwn+Dn39ISZKcsObMmVMRERGSpHz58unbb7+V9Ljy6ufnl6zBAS/q+vXrio2NVebM/hbjmf39dfnyZRtFhdSoXK708nJNo3VHSVjx3+DnX8oWv6yVNTZ7lOSEtWXLlvrtt98kSX369NGkSZPk5uamrl27qmfPnskeYHI6e/asTCaTwsLC7HI+ACnXGwX9tfvsn7pxL/rfDwYAWEjyslZdu3Y1/71KlSo6fvy49u3bp9y5c6tw4cLJGlxyCwoK0qVLl5QxY0Zbh4L/SMaMGeXs7KyrV69YjF+9ckUBAQE2igqpjb+3q0oE+WnAquO2DgWpCD//UrbUtg5rkius/xQcHKwGDRrYRbIaExPzzP3Ozs4KCAgw31LWHvxzHVskLxcXFxUrXkKbNm4wj8XFxWnTpg0q9X9lbBgZUpMaBTLr1oMY7Yq4aetQkIrw8w8pSaIS1vHjxyd6S6xp06YpS5YsiouLsxivW7euWrVqJUn6/vvvVbx4cbm5uSlnzpwaPHiwHj16ZD7WZDJp8uTJqlOnjjw9PTVs2DD9+eefatKkiTJlyiR3d3flyZNHs2bNkvTkr/CPHDmiWrVqycfHR97e3ipfvrxOnz4t6fF/2EOGDFG2bNnk6uqqokWLat26dc98XZs3b1apUqXk6uqqwMBA9enTxyLm1157TR07dlSXLl2UMWNGVa9ePdHvGZ5P5y7dNGvGdM2fO0fHjx1T5w4f6v69e3qveUtbh4ZUwCTp9fyZ9ePRq4o1bB0NUht+/qVcqW0d1kSVGseMGZOoyUwmkzp37pyoYxs2bKhOnTpp06ZNqly5sqTHKxCsW7dOa9as0datW/Xee+9p/Pjx5iSyXbt2kqSBAwea5xk0aJA+++wzjR07VmnSpFH//v119OhRrV27VhkzZtSpU6f04MGDJ8Zw4cIFvfrqq3rttde0ceNG+fj4aPv27eYEc9y4cRo9erSmTp2qYsWKaebMmapTp46OHDmiPHnyPHG+N954Qy1atNDcuXN1/PhxtW3bVm5ubho0aJD5uDlz5ujDDz/U9u3bn/r+REVFKSoqyvw4MpJlcJ5Xw0Zv6/q1axoyeICuXL6swkWK6vtV6+Tv7//vTwZeUImX/BTg46Y1R678+8FAMuPnH1IKk2EYNvudv169esqQIYNmzJgh6XHVdfDgwTp//ryqVaumypUrq2/fvubj58+fr169eunixYuSHifIXbp0sUio69Spo4wZM2rmzIRrzJ09e1Y5cuTQgQMHVLRoUX388cdatGiRwsPDlTZt2gTHZ82aVR06dNDHH39sHitVqpRefvllTZo0KcF8/fr107Jly3Ts2DHzbyhfffWVevfurdu3b8vJyUmvvfaaIiMjtX///me+N4MGDdLgwYMTjF+5cZvb38ImXp/49F+wAGtb17GsrUNAKhUZGSn/DL66fds+/v8bGRkpX19ftZu/Ry4eXsk+f/T9u5rWtJTdvN54L9zD+iKaNGmiZcuWmSuJCxYsUOPGjeXk5KTffvtNQ4YMkZeXl3lr27atLl26pPv375vnKFmypMWcH374oRYtWqSiRYuqV69e2rFjx1PPHxYWpvLlyz8xWY2MjNTFixdVtqzlD8myZcvq2LFjT5zv2LFjKlOmjEU5vWzZsrp7967++OMP81iJEiWe8a481rdvX92+fdu8nT9//l+fAwAAkBLZ9Oqj2rVryzAMrV69Wi+//LK2bt1qrpbevXtXgwcPVoMGDRI8z83Nzfx3T09Pi301atTQuXPntGbNGq1fv16VK1dWhw4d9MUXXySYx93dPZlfUeL8M+YncXV1laur638QDQAAcDTW6je11x5Wm1ZY3dzc1KBBAy1YsEDffPONQkJCVLx4cUlS8eLFFR4erty5cyfYnJyeHXamTJnUvHlzzZ8/X2PHjtW0adOeeFzhwoW1devWJ64u4OPjoyxZsiToM92+fbvy58//xPlCQ0O1c+dO/b3LYvv27fL29la2bNmeGTMAAEBimUySkxU2O81XbVthlR63BdSqVUtHjhxR06ZNzeMDBgxQrVq19NJLL+mtt94ytwkcPnxYQ4cOfep8AwYMUIkSJVSgQAFFRUVp1apVCg0NfeKxHTt21IQJE9S4cWP17dtXvr6+2rVrl0qVKqWQkBD17NlTAwcOVK5cuVS0aFHNmjVLYWFhWrBgwRPna9++vcaOHatOnTqpY8eOCg8P18CBA9WtW7d/TbIBAADwZDZPWCtVqqT06dMrPDxc7777rnm8evXqWrVqlYYMGaKRI0cqbdq0ypcvn9q0afPM+VxcXNS3b1+dPXtW7u7uKl++vBYtWvTEYzNkyKCNGzeqZ8+eqlChgpydnVW0aFFz32rnzp11+/Ztde/eXVevXlX+/Pm1cuXKJ64QID2+SGvNmjXq2bOnihQpovTp06t169b65JNPnvPdAQAASCi+ImqNee3Rc60SsHXrVk2dOlWnT5/W0qVLlTVrVs2bN085cuRQuXLlrBFnqhd/VSCrBMBWWCUAtsQqAbAVe10loP03v8rVCqsERN2/q6/eedluXm+8JH9PvWzZMlWvXl3u7u46cOCA+Qr/27dva/jw4ckeIAAAACylthsHJDlhHTp0qKZMmaLp06dbLAdVtmzZf11bFAAAAEiqJPewhoeH69VXX00w7uvrq1u3biVHTAAAAHiG1NbDmuQKa0BAgE6dOpVgfNu2bcqZM2eyBAUAAADES3LC2rZtW3300UfavXu3TCaTLl68qAULFqhHjx768MMPrREjAAAA/sZkst5mj5LcEtCnTx/FxcWpcuXKun//vl599VW5urqqR48e6tSpkzViBAAAQCqW5ITVZDKpX79+6tmzp06dOqW7d+8qf/788vJK/qUVAAAAkJCTySQnK5RDrTFncnjuGwe4uLg89RalAAAAQHJJcsJasWLFZ67RtXHjxhcKCAAAAM/mpOe4ECmR89qjJCesRYsWtXgcExOjsLAwHT58WM2bN0+uuAAAAABJz5Gwjhkz5onjgwYN0t27d184IAAAADybta7ot9MW1uSr/DZt2lQzZ85MrukAAADwFE4ymS+8StZN9pmxJlvCunPnTrm5uSXXdAAAAICk52gJaNCggcVjwzB06dIl7d27V/3790+2wAAAAPBkqa0lIMkJq6+vr8VjJycnhYSEaMiQIapWrVqyBQYAAABISUxYY2Nj1bJlSxUqVEjp0qWzVkwAAAB4BifT480a89qjJPWwOjs7q1q1arp165aVwgEAAAAsJfmiq4IFC+rMmTPWiAUAAACJYDLJKqsE2GsPa5IT1qFDh6pHjx5atWqVLl26pMjISIsNAAAASE6J7mEdMmSIunfvrjfeeEOSVKdOHYtbtBqGIZPJpNjY2OSPEgAAAGasEvAUgwcP1gcffKBNmzZZMx4AAADAQqITVsMwJEkVKlSwWjAAAAD4d6wS8Awme60TAwAAIMVK0jqsefPm/dek9ebNmy8UEAAAAJ7N9Ncfa8xrj5KUsA4ePDjBna4AAAAAa0pSwtq4cWNlzpzZWrEAAAAgEVJbD2uiE1b6VwEAAOxDaktYE33RVfwqAQAAAMB/KdEV1ri4OGvGAQAAgEQymUxW+fbbXr9RT/KtWQEAAID/UpIuugIAAIDt0cMKAAAA2BEqrAAAAA7GZHq8WWNee0SFFQAAAHaNCisAAICDcTKZ5GSFcqg15kwOVFgBAABg16iwAgAAOBhWCQAAAADsCBVWAAAAR2OlVQJEhRUAAABIOiqsAAAADsZJJjlZoRxqjTmTAwkrAACAg+HGAQAAAIAdocIKAADgYFjWCgAAALAjVFgBAAAcDLdmBQAAAOwIFVYAAAAHwyoBAAAAgB2hwgoAAOBgnGSlHlY7vXEAFVYAAADYNSqsAAAADoYeVgAAAMCOUGEFAABwME6yTtXRXiuZ9hoXAAAAHMxnn30mk8mkLl26JOu8VFgBAAAcjMlkkskKDacvMuevv/6qqVOnqnDhwskY0WNUWAEAAByMyYrb87h7966aNGmi6dOnK126dM85y9ORsAIAAMBCZGSkxRYVFfXM4zt06KCaNWuqSpUqVomHlgAAAAAH42Sy0o0D/pozKCjIYnzgwIEaNGjQE5+zaNEi7d+/X7/++muyxxOPhBUAAAAWzp8/Lx8fH/NjV1fXpx730Ucfaf369XJzc7NaPCSsAAAADsiaa/z7+PhYJKxPs2/fPl29elXFixc3j8XGxmrLli2aOHGioqKi5Ozs/MLxkLACAADguVSuXFmHDh2yGGvZsqXy5cun3r17J0uyKpGwAgAAOBx7uTWrt7e3ChYsaDHm6empDBkyJBh/EawSAAAAALtGhRUAAMDB2OONA+L98ssvLx7IP1BhBQAAgF2jwgoAAOBgnGSdqqO9VjLtNS4AAABAEhVWAAAAh2PPPazWQIUVAAAAdo0KKwAAgIMxyTp3urLP+ioJKwAAgMNJbS0BJKwAEmXiW0VsHQJSsXQvd7R1CEiljNhoW4cAkbACAAA4HJa1AgAAAOwIFVYAAAAHk9p6WKmwAgAAwK5RYQUAAHAwqW1ZKyqsAAAAsGtUWAEAAByMyfR4s8a89ogKKwAAAOwaFVYAAAAH4ySTnKzQcWqNOZMDFVYAAADYNSqsAAAADoYeVgAAAMCOUGEFAABwMKa//lhjXntEwgoAAOBgaAkAAAAA7AgVVgAAAAdjstKyVvbaEkCFFQAAAHaNCisAAICDoYcVAAAAsCNUWAEAABwMFVYAAADAjlBhBQAAcDCp7cYBVFgBAABg16iwAgAAOBgn0+PNGvPaIyqsAAAAsGtUWAEAABwMPawAAACAHaHCCgAA4GBS2zqsJKwAAAAOxiTrfH1vp/kqLQEAAACwb1RYAQAAHAzLWgEAAAB2hAorAACAg2FZKwAAAMCOUGEFAABwMKltWSsqrAAAALBrVFgBAAAcjEnWWTPVTgusVFgBAABg36iwAgAAOBgnmeRkhYZTJzutsVJhBQAAgF2jwgoAAOBg6GEFAAAA7AgVVgAAAEeTykqsJKwAAAAOhluzAgAAAHaECisAAICjsdKtWe20wEqFFQAAAPaNCisAAICDSWXXXFFhBQAAgH2jwgoAAOBoUlmJlQorAAAA7BoVVgAAAAfDOqwAAACAHaHCCgAA4GBMVlqH1SpruyYDKqwAAACwa1RYAQAAHEwqWySACisAAADsGxVWAAAAR5PKSqwkrAAAAA6GZa0AAAAAO0KFFQAAwMGwrBUAAABgR6iwAgAAOJhUds0VFVYAAADYNyqsAAAAjiaVlVipsAIAAMCuUWEFAABwMKzDCgAAANgRKqwAAAAOhnVYAQAAgEQYMWKEXn75ZXl7eytz5syqV6+ewsPDk/08JKxIFaZ8NUkhubPLz8tN5V8prV/37LF1SEgFvp74hRrXrKDS+QJVoWgOdW7dWBGnT9g6LKRQZYvn0tKx7+vMT8P04MBE1X6tsMX+upWK6IevOuiPTSP14MBEFc6b1UaRIjmYrLglxebNm9WhQwft2rVL69evV0xMjKpVq6Z79+694Cu0RMKKFG/Jt4vVu2c39ftkoHbu2a/ChYuoTs3qunr1qq1DQwq3d9d2NW7eVgu+36hpC1fq0aMYvd+knu7fT94f5IAkebq76tCJC+oyYvET93u4u2hH2Gl9Mn7FfxsYHFJkZKTFFhUV9cTj1q1bpxYtWqhAgQIqUqSIZs+erd9//1379u1L1njoYUWKN37sl2rZuq3ea9FSkjThqylau3a15syeqZ69+tg4OqRkU+Yvt3g89MspqlA0p44ePKCS/1fORlEhpfpp+1H9tP3oU/d/s/pXSdJLgen/q5BgTVZehzUoKMhieODAgRo0aNC/Pv327duSpPTpk/dzRsKKFC06OloH9u9Tz959zWNOTk6qVKmK9uzaacPIkBrdjYyUJPn6kTAAeDHWXtbq/Pnz8vHxMY+7urr+63Pj4uLUpUsXlS1bVgULFkzWuEhYkaJdv35dsbGxypzZ32I8s7+/wsOP2ygqpEZxcXEaObi3ir38f8qTL7+twwGAZ/Lx8bFIWBOjQ4cOOnz4sLZt25bs8ThsD+ugQYNUtGjRF57nl19+kclk0q1btxL9nBYtWqhevXovfG4Aqcewft10KvyYRk2abetQAKQA8ctaWWN7Hh07dtSqVau0adMmZcuWLXlfrBy4wtqjRw916tTphed55ZVXdOnSJfn6+ib6OePGjZNhGC98blhfxowZ5ezsrKtXr1iMX71yRQEBATaKCqnNsE+6a/OGdZq9dJ0CArkyG0DKYRiGOnXqpOXLl+uXX35Rjhw5rHIeh62wenl5KUOGDE/dHx0dnah5XFxcFBAQIFMSfqXw9fWVn59foo+H7bi4uKhY8RLatHGDeSwuLk6bNm1Qqf8rY8PIkBoYhqFhn3TXxnU/aMbiVcr2UnZbhwQghbCXZa06dOig+fPna+HChfL29tbly5d1+fJlPXjw4AVfoSW7TVinTZumLFmyKC4uzmK8bt26atWqVYKWgPiv6YcNG6YsWbIoJCREkrRjxw4VLVpUbm5uKlmypFasWCGTyaSwsDBJCVsCZs+eLT8/P/34448KDQ2Vl5eXXn/9dV26dCnBueLFxcVp1KhRyp07t1xdXfXSSy9p2LBh5v29e/dW3rx55eHhoZw5c6p///6KiYlJ3jcMT9W5SzfNmjFd8+fO0fFjx9S5w4e6f++e3mve0tahIYUb1q+bVi9frM8mzJSnp7euX72i61ev6GEy/yAHJMnT3UWF82Y1r6+aPWsGFc6bVUEB6SRJ6Xw8VDhvVoXmevztUt7s/iqcN6v8M3jbLGY4vsmTJ+v27dt67bXXFBgYaN4WL37y8mrPy25bAho2bKhOnTpp06ZNqly5siTp5s2bWrdundasWaOtW7cmeM6GDRvk4+Oj9evXS3q8hljt2rX1xhtvaOHChTp37py6dOnyr+e+f/++vvjiC82bN09OTk5q2rSpevTooQULFjzx+L59+2r69OkaM2aMypUrp0uXLun48f9d0OPt7a3Zs2crS5YsOnTokNq2bStvb2/16tXrqTFERUVZrHkW+dfVxUi6ho3e1vVr1zRk8ABduXxZhYsU1fer1snf3//fnwy8gMXzvpYktWpUw2L809GTVa9RU1uEhBSseP5g/fT1R+bHo3q8KUmat3KX2g2cr5oVCmn6kGbm/fNGtpIkDZ2yRsOmrvlvg8WLs/KyVon1X7VI2m3Cmi5dOtWoUUMLFy40J6xLly5VxowZVbFixScmrJ6envr666/l4uIiSZoyZYpMJpOmT58uNzc35c+fXxcuXFDbtm2fee6YmBhNmTJFuXLlkvS4kXjIkCFPPPbOnTsaN26cJk6cqObNm0uScuXKpXLl/rfG4ieffGL+e/bs2dWjRw8tWrTomQnriBEjNHjw4GfGicT7sENHfdiho63DQCpz6PwdW4eAVGTrvpNyL/b0n3Pzf9it+T/s/g8jApKP3bYESFKTJk20bNkyc6VxwYIFaty4sZycnhx2oUKFzMmqJIWHh6tw4cJyc3Mzj5UqVepfz+vh4WFOViUpMDDwqXdFOnbsmKKiosxJ9ZMsXrxYZcuWVUBAgLy8vPTJJ5/o999/f2YMffv21e3bt83b+fPn/zVuAACQOpis+Mce2XXCWrt2bRmGodWrV+v8+fPaunWrmjRp8tTjPT09k+W8adOmtXhsMpmeWvJ2d3d/5lw7d+5UkyZN9MYbb2jVqlU6cOCA+vXr968Xhbm6uprXQHuetdAAAABSCrttCZAkNzc3NWjQQAsWLNCpU6cUEhKi4sWLJ/r5ISEhmj9/vqKiosx3aPj111+TNcY8efLI3d1dGzZsUJs2bRLs37Fjh4KDg9WvXz/z2Llz55I1BgAAkLq8yJqp/zavPbLrCqv0uC1g9erVmjlz5jOrq0/y7rvvKi4uTu3atdOxY8f0448/6osvvpCkJC1j9Sxubm7q3bu3evXqpblz5+r06dPatWuXZsyYIelxQvv7779r0aJFOn36tMaPH6/ly5f/y6wAAACIZ/cJa6VKlZQ+fXqFh4fr3XffTdJzfXx89MMPPygsLExFixZVv379NGDAAEmy6Gt9Uf3791f37t01YMAAhYaG6u233zb3vNapU0ddu3ZVx44dVbRoUe3YsUP9+/dPtnMDAIDUx17WYf2vmIxUdsumBQsWqGXLlrp9+/a/9p/ak8jISPn6+urKjdv0s8ImTl2+a+sQkIq9XLuPrUNAKmXERivq0HTdvm0f//+Nzwf2nbgkL+/kj+funUiVyBtoN683nl33sCaHuXPnKmfOnMqaNat+++039e7dW40aNXKoZBUAAMCCnazD+l9J8Qnr5cuXNWDAAF2+fFmBgYFq2LChxV2oAAAAHI21lqCy12WtUnzC2qtXr2cu0A8AAAD7luITVgAAgBTHSsta2WmB1f5XCQAAAEDqRoUVAADAwaSya66osAIAAMC+UWEFAABwNKmsxEqFFQAAAHaNCisAAICDSW3rsFJhBQAAgF2jwgoAAOBgTFZah9Uqa7smAyqsAAAAsGtUWAEAABxMKlskgAorAAAA7BsVVgAAAEeTykqsJKwAAAAOhmWtAAAAADtChRUAAMDBmGSlZa2Sf8pkQYUVAAAAdo0KKwAAgINJZddcUWEFAACAfaPCCgAA4GC4NSsAAABgR6iwAgAAOJzU1cVKhRUAAAB2jQorAACAg6GHFQAAALAjVFgBAAAcTOrqYKXCCgAAADtHhRUAAMDBpLYeVhJWAAAAB2P664815rVHtAQAAADArlFhBQAAcDSp7KorKqwAAACwa1RYAQAAHEwqK7BSYQUAAIB9o8IKAADgYFLbslZUWAEAAGDXqLACAAA4GNZhBQAAAOwIFVYAAABHk8qWCaDCCgAAALtGhRUAAMDBpLICKxVWAAAA2DcqrAAAAA4mta3DSsIKAADgcKyzrJW9NgXQEgAAAAC7RoUVAADAwaS2lgAqrAAAALBrJKwAAACwaySsAAAAsGv0sAIAADgYelgBAAAAO0KFFQAAwMGYrLQOq3XWdn1xVFgBAABg16iwAgAAOBh6WAEAAAA7QoUVAADAwZj+2qwxrz2iwgoAAAC7RoUVAADA0aSyEisJKwAAgINhWSsAAADAjlBhBQAAcDAsawUAAADYESqsAAAADiaVXXNFhRUAAAD2jQorAACAo0llJVYqrAAAALBrJKwAAAAOxmTFP89j0qRJyp49u9zc3FS6dGnt2bMnWV8vCSsAAACe2+LFi9WtWzcNHDhQ+/fvV5EiRVS9enVdvXo12c5BwgoAAOBg4tdhtcaWVF9++aXatm2rli1bKn/+/JoyZYo8PDw0c+bMZHu9XHTlIAzDkCTdiYy0cSRIre7euWvrEJCKGbHRtg4BqVT8Zy/+/8P2ItJK+UD8vP+c39XVVa6urgmOj46O1r59+9S3b1/zmJOTk6pUqaKdO3cmW1wkrA7izp07kqTcOYJsHAkAAKnPnTt35Ovra+sw5OLiooCAAOWxYj7g5eWloCDL+QcOHKhBgwYlOPb69euKjY2Vv7+/xbi/v7+OHz+ebDGRsDqILFmy6Pz58/L29pbJXu+bZsciIyMVFBSk8+fPy8fHx9bhIJXh8wdb4vP3YgzD0J07d5QlSxZbhyJJcnNzU0REhKKjrfetg2EYCXKNJ1VX/0skrA7CyclJ2bJls3UYDs/Hx4cf2LAZPn+wJT5/z88eKqt/5+bmJjc3N1uHIUnKmDGjnJ2ddeXKFYvxK1euKCAgINnOw0VXAAAAeC4uLi4qUaKENmzYYB6Li4vThg0bVKZMmWQ7DxVWAAAAPLdu3bqpefPmKlmypEqVKqWxY8fq3r17atmyZbKdg4QVqYKrq6sGDhxo8x4cpE58/mBLfP5gbW+//bauXbumAQMG6PLlyypatKjWrVuX4EKsF2Ey7G2dBgAAAOBv6GEFAACAXSNhBQAAgF0jYQUAAIBdI2EFAACAXSNhBQAAgF0jYQUAAIBdI2EFAAcRvwrh+vXrtX//fhtHAwD/HRJW4G9Ylhj2zGQyaevWrapfv76OHz/O5xVWFRcX98RxPnewBW4cAPzFMAyZTCbt2rVLv/zyi0wmk0qWLKnKlSvbOjRAknTu3DlNnjxZfn5+6tOnj63DQQoWFxcnJ6fHNa1Vq1YpIiJCfn5+Kl++vLJnz26xH/gv8GkD/mIymfTdd9+pTp06+umnn7R9+3bVq1dP8+bNs3VogI4ePap3331XixcvVsaMGSU9vQIGvAjDMMzJaO/evdWpUyfNnj1bc+bMUb169XT06FE5OTkpNjbWxpEiNSFhBf6yc+dOdejQQUOHDtXGjRs1YsQIxcTEqHnz5ho/frytw0Mqly9fPhUpUkQ3b97UTz/9pHv37snJyYmvZ5HsTCaTJGncuHFasGCBFi1apH379qlWrVo6ePCgqlWrprCwMDk7O/NLE/4zJKyApJiYGO3evVstWrRQu3bt9Mcff6hmzZpq1qyZ+vfvry5dumj27Nm2DhOpyD8TUScnJ02cOFGtWrXS0aNHNX78eN25c0cmk4mkFcnu2rVr2rNnj0aMGKHSpUtr9erV6t+/vz755BOFhoaqdu3a5korSSv+C/SwItWL78WKiIjQpUuXVKxYMVWvXl0hISGaPn26jh07plKlSunevXuaNGmSPvzwQ1uHjBQuvp96z5492r17t1xdXZUzZ05VqVJFcXFx+uijj7Rr1y69+eab6tixo7y8vMzPAZ7Hk3pSt2/frsDAQN29e1d169ZVz5491b59e02aNEmdOnWSyWTS0aNHFRISYqOokZqksXUAgC3E/899x44dOnr0qBo3bqwcOXIoR44cOn78uO7evav27dtLkry8vFSvXj2VLFlSr732mm0DR6pgMpm0bNkytWrVSgUKFFBkZKTCw8PVs2dPDR8+XOPHj1fHjh31/fff6969e+rdu7e8vLxsHTYc1N+T1UWLFikuLk7vvvuuypYtK0maMmWK8uXLpxYtWkiSAgMD9c4776hw4cLKnTu3rcJGKkNLAFKd+GR12bJlql27tv744w+dPXvWvP/u3bsKCwtTRESEDMPQ1KlTde7cObVs2VKhoaG2CxypxokTJ9SxY0eNHDlS27dv1+bNmzVjxgyNGTNGn3zyiUwmkyZOnKh8+fJp+/btioqKsnXIcGDxyWrPnj3Vu3dvXbt2TRcvXjTvv3Pnjnbu3Knr168rKipKc+fOVbZs2dS7d285Oztz8RX+GwaQCm3evNnw8fExpk6dajEeHR1tGIZhdOzY0TCZTEbBggUNX19f48CBAzaIEqnB7Nmzjd9//91ibNu2bUZISIhx8eJFi/GZM2ca7u7uxpYtWwzDMIy4uDjj8uXL/1msSFni4uLMf58+fbrh7+9v7Nq1K8FxBw8eNCpVqmR4eXkZBQoUMEJDQ42YmJgEcwDWREsAUqU1a9aoWrVqateunSIjI7Vv3z4tWLBAly9f1ogRIzRhwgRVr15dN27c0KuvvqocOXLYOmSkQJGRkerRo4dy5MihFStWKEuWLJIkV1dXnTx5UqdPn1ZgYKD5W4EqVaooICBAly5dkvS4dcDf39+WLwEOaPPmzapQoYL5gr349affeustlS5d2jwW3ypQqFAhTZ48WZs2bVJMTIw++OADpUmTRrGxsXJ2drb1y0EqQcKKVCl9+vT6/vvvtXDhQn333Xd68OCBHj58KDc3N1WsWFGnT59WrVq1bB0mUjgfHx/t3btXb7zxht566y0tWbJEWbNmVd68efX6669r/Pjx8vX1VaFChSRJmTJlkp+fn6Kjo20cORxVv379dOnSJb366qsymUzmpPXq1avm9X3jL95zcnLSw4cP9euvv6p8+fLKmzeveR6SVfzX6GFFimc8YSGM8uXLq0iRIvroo4/k6empLl26aP369erTp49y5syphw8f2iBSpEbBwcFau3atbty4oYYNG+rixYvy8fFRs2bNdOnSJQ0YMEDr16/X8ePHNWjQIF24cEHly5e3ddhwUO+8846mTZsmk8mk8PBwSY8T1JdeekkbNmzQ1atXLY6/efOmpkyZom3btlmMk6ziv8ayVkjR4r/a2rJli3bs2KHz58+rQYMGKlWqlLy9vRUREWHxdX+vXr20ZcsW/fjjj/L19bVh5Ehtzp07p6pVq5qr//7+/vr222/1zTff6Pvvv1doaKiioqK0ZMkSFStWzNbhwsEtW7ZMQ4YMUb9+/dSoUSPdv39fJUqUkI+Pj+bPny8/Pz/FxcWpRYsWunv3rjZv3sytWGFTJKxI8b777js1bdpUr7/+unmh65CQEH3xxRfKlSuXJOnXX3/V3LlzNX/+fP3yyy8qUqSIjaNGSvb3HkHDMMzVqnPnzqlKlSpKnz69Vq5cKX9/f0VHR+vkyZOSHrcEZM6c2ZahI4XYtm2bRo8erdu3b6t9+/Z66623dPLkSTVu3FgXL16Ui4uLMmTIYO5vTZs27RPXagX+KySsSNHOnTunatWqqUuXLuYF/+fPn6+FCxfK3d1dU6ZM0a1btzRx4kQdOnRI48aNM/cLAtYQn6z+9NNP+v7773X8+HHVr19fxYsX1yuvvGJOWjNkyKDvvvvOfCEW8Lyelmju2LFDX375pa5evapu3bqpXr16kh6vxfrgwQN5eXmpQYMGcnZ21qNHj5QmDZe9wHZIWJHiGH+748/+/ftVs2ZNLV++XP/3f/9n3j937lyNHDlSs2fPVqlSpXTq1CmlS5dOGTJksGXoSCVWrFihxo0bq1mzZrp586YiIiLk5uamXr16qV69ejp37pzeeOMNGYahDRs2KDAw0NYhw0H9/efhvHnzdPXqVbm7u+uDDz6Qk5OTtm3bprFjx+ratWvq2LGjGjZsmGAOLrCCPaC2jxTj0aNHkh5fQHDo0CFJkp+fn3x9fc2LYMfFxclkMql58+a6c+eOfvjhB0lS7ty5SVbxn7h69ao+++wzDR8+XNOnT9eyZcs0adIkhYaG6osvvtDevXsVHBysVatWydPTk5sC4Ln9PVnt0aOHunbtqgULFuiLL75QuXLlFBsbq3LlyqlLly7KnDmzJk+erAULFiSYh2QV9oCEFSnC6dOn9e6770qSlixZokqVKum3335Tzpw5lS1bNn366ac6c+aM+WuxmJgY5c2bV0FBQbYMG6nUxYsXLX5BKlOmjFq3bq1bt27pyJEjkqQcOXJo586dyp49u42ihKOK/+I0Plm9efOmzp8/r02bNmnz5s2aPXu2bt++rWLFilkkrfG3qwbsEQkrUoSoqCitXbtWpUuX1ttvv60vvvjCfOHUihUrFB0drfr162vOnDlav369+vfvr/3796tixYo2jhwpXXzyEBMTI0lKkyaNsmTJosuXL8swDMXFxUmSXnnlFQUGBmrt2rXm59IziKTat2+fOVGVpClTpqh06dK6c+eOsmbNKm9vb5UvX14zZ85UbGysihcvrtjYWJUtW1ZjxozRhAkTbBg98HQkrEgR8ufPrwEDBujXX39VkSJFzNXWuLg4eXl5ac+ePQoKCtIXX3yhNm3aaP369dq4caPy5Mlj48iRkv39AqvBgwcrIiJC6dOnV6VKlTRs2DCtX7/e4nhPT0/lzp3bRtHC0X322Wf64IMPJD3+7D169Ejp0qWTp6enDh06pPTp00t6XHktVaqUZs2aJcMwlDVrVsXFxalw4cJycnIy/xIF2BMuukKKsXTpUh09elTTpk1TwYIFNX/+fGXMmNHi6tZr167pzz//VMaMGc0/vAFr+u6779SiRQu9//77atGihQoUKCBJatmypZYuXaquXbsqU6ZMOnPmjGbOnKldu3YpNDTUxlHDEd2+fVuenp5KkyaNzp49q+zZs+vevXvasGGD2rdvr4IFC2rdunXm4w3D0Pbt2zVlyhTNmTOHXlXYNRJWOKz46tX9+/eVNm1apU2bVpJ06NAhVatWTUWKFNHChQvNiemmTZtoAcB/6vjx46pWrZr69++vtm3bJtg/aNAgbd26VRcvXlRQUJA+//xz1gDGC1u1apXq1KmjH3/8UVWrVtWDBw/0008/qXv37sqXL59WrVplPvbvF2axGgDsGQkrHFL8D9k1a9Zo/vz5OnnypEqVKqU33nhDNWvW1OHDh/X666+rQIECGjp0qL7//nvNnDlTe/fuZV1LWMXy5ctVs2ZNubi4mMe2bNmijh07au3atQoICJCzs3OCNTHv3btnXr3Cy8vLFqHDwf3zM3Xjxg11795dS5cu1ffff6/KlSvrwYMH+vHHH9WrVy/ly5dPK1eutGHEQNLRwwqHZDKZtHLlSr355psqUKCAWrdurRs3bqhOnTo6fvy4ChYsqE2bNunYsWNq2rSpZs+erR9++IFkFVZx+PBhtWnTRteuXbMYv3Dhgo4fPy5PT085OzsrNjbWnFjs27dPERER8vT0lLe3N8kqnlv8Z2rFihW6c+eOMmTIoDFjxqhx48aqWbOmNmzYIHd3d1WvXl1ffPGFNm3apJ49e9o4aiBpqLDCId26dUsNGzZUzZo11aVLF127dk1FixZV/fr1NXHiRPNxDx8+1IEDB5QjRw4FBATYMGKkdJGRkfLx8dGxY8eUM2dOubq66uzZs6pTp46qVKmiTz75ROnTpzd/7dqqVSvlypVLffv25XaXeC5/r6yeP39ewcHB+uCDDzRy5Eh5e3vr5s2b6tWrl+bPn6/Vq1ercuXKun//vvbt26dXXnmFr//hUPgpCYcUExOjs2fP6tVXX9XFixdVrFgx1axZ05ysLlu2TMePH5ebm5vKlClDsgqr8/b21pUrV1SoUCF17txZjx49UnBwsGrVqqWdO3dqwIABunjxok6cOKF+/fpp1apVatCgAckqnothGObPzuDBgzVmzBgFBARoypQp6tSpk+7du6f06dNr1KhRatq0qerWravVq1fLw8ND5cuXN1f8AUfBIn9wCPE9q2FhYcqQIYP8/f0VGhqq/fv3a9iwYXrjjTc0efJkSdIff/yhNWvWKG3atAoJCbFYkxCwFsMw5O/vr2+++UYtW7aUi4uLJkyYoKFDh+qzzz7TihUrlC1bNoWGhio6Olo//vgjqwHgucX/XPvss880YcIELVmyRHXr1tWZM2f00UcfKSYmRtOmTVP69On1+eef69atW/riiy9Us2ZN8xxUWOFIaAmA3YtPVlesWKEOHTqoVatWGjx4sDp37qyvvvpK9erV09KlS83Vhr59+2rlypVat24dd7KCVcV/Nnfv3q1z586pevXq8vX11ffff69GjRqpbdu25qp/ZGSkdu3apUyZMikwMJCqP57L39sADMNQ7dq1VaBAAY0cOdJ8zM8//6y6devqnXfe0ZdffikfHx9FRkbKy8uLij4cFhVW2D2TyaTVq1fr3Xff1fjx4/X666/LyclJEydO1N27d7VmzRqNHDlSTk5OOnPmjL755htt3bqVZBVWFZ+sfvfdd2rbtq26deumwoULy9fXV3Xr1tXixYv19ttvy2QymZOGatWq2TpsOLC/twFs3LhRlSpV0qVLl+Tv728+JjY2VlWqVFG7du00btw4OTs7a+rUqfLx8ZGUcEUBwFHwqYXde/jwoebMmaOuXbuqTZs2Sp8+vU6cOKEvvvhCDRo00CuvvKItW7Zo8eLFevDggXbs2MFalrA6k8mkTZs2qWXLlvrss8/Up08f5cuXT9LjWwXXq1dPCxcu1KxZs/Thhx8qOjraxhHDkf19vdT+/furQYMG+vPPP8137luzZo2k/33N/9JLL6lRo0ZasGCB+vfvb56HZBWOigor7J5hGIqIiFBAQIBu3rypgQMH6uDBgzp16pTSpk2rzp07q127dnJyclKaNGks1sEErGn16tWqXr262rZtq7t37+q3337TggULdPfuXXXv3l1vvvmmYmJi9NFHH2nYsGEWlTAgsf6+oP/+/ft1/vx5rVq1SunSpVOFChW0ZcsWjRo1SnFxcapVq5Zu3bqlTZs2qX79+ipdurTGjx+vNm3a6KWXXqKnHw6LX7Vg99zd3dWpUyd9/fXXypEjhy5cuKDWrVvrwoULqlu3rlatWiU3Nzd5eHiQrMKq4lv+t27dql27dsnb21uXL1/WkiVL1LZtWw0bNkz79+/X9evXVa9ePV2/fl2NGzfW6dOnSVaRZNOmTZP0v6rpkiVL9P777+u3335Tnjx5JEn58+dXhw4dFBwcrHfeeUeFChVSyZIlFRERoZYtWypz5sxydXWVn58fySocGgkrHMJ7772nvXv3aunSpfruu+/UtGlTSY8rD0FBQSzPgv+EyWTShg0bVKNGDd29e1clS5aUl5eXOnTooDRp0qhz587atWuX3n//fWXNmtV8u2BuCoCkmj17ttauXWvxsy0mJkbu7u4KDw/X8ePHzePlypXTqFGjtGbNGr399tvq3bu39u/fL0navXu3goODSVbh8FglAA7p+PHjmjdvniZNmqRt27apYMGCtg4JqcCVK1c0ffp0OTk56eOPP5YkXb9+XZGRkcqZM6f5uN69e2v79u1avXq1fH19bRUuHNj169eVLl06OTs7a8OGDapcubIkae3atRo2bJjSpEmjESNGqEyZMpISXkx15swZjR8/XrNnz9bWrVtVqFAhm7wOILlQYYXD2bdvn4YMGaLly5dr8+bNJKuwOsMwdOLECWXJkkWTJ0+Wn5+feV/GjBnNyer+/fvVrVs3TZ06VZMmTSJZxXOJi4tTxowZ5ezsrK1bt6pNmzbmW6nWqFFD3bt3l6urqz799FPt3r1bkuXFVA8ePNCqVat06tQpbd68mWQVKQIVVjicBw8eaO/evcqePTtLV8Hq/n519oABAzR06FC1a9dOn332mUXiGh4erhEjRigiIkITJkxQ4cKFbRQxUoqFCxfq4MGDcnd315IlS1S7dm2NGDFCkvTdd99p2rRpSps2rXr27KlXX33V4rn37t1TTEyMxWcUcGSsEgCH4+7urvLly9s6DKRw8Ynq33v/hgwZori4OA0fPlxFihRR8+bN5eHhIUkKCQnRxx9/rHTp0ilTpky2ChspxMOHDzVv3jylT59eM2bMkLOzs+bPny9JGjFihBo0aCCTyaRhw4Zp5cqVFgmrYRjy9PS0VeiAVZCwAsA/xCerW7Zs0erVq3X//n1lzZpVffr00dChQxUbG6vOnTvLZDLpvffeMyetefPmtXHkSAkMw5Cbm5tGjBihsmXLqkmTJurWrZsMw9A333wjk8mk4cOHq379+kqfPn2CX+C5wAopET2sAPAP8XewqlWrlm7evClJmjRpkipVqiTpcYWrV69e6tatm6ZNm6b79+/bMlw4uH925plMJhmGoXz58qlx48ZasmSJPDw81Lp1a7377rv64Ycf1KFDB0lShQoV5OTkpLi4OFuEDvxnSFgB4B9+//139evXT8OHD9f06dPVrVs3RUVFKXfu3ObkYtiwYWrdurWGDRvGXazwQuIrohMmTNBXX32lyMhImUwmubm5qWLFilq6dKn27dunLFmyqFWrVqpRo4Zu3bplkehyByukdFx0BQCyvLjq1KlTqlWrlo4fP67z58/rlVdeUc2aNTVlyhRJ0k8//aRq1apJkq5du0bPKl7Y/fv31a9fP02ePFlVq1ZV0aJF9emnn0qSWrRooStXrmjJkiXy8vLSzZs3lS5dOnMllhYApAb8SgYAelzl2rVrlyZMmKA0adIoY8aMWrVqlcqVK6eaNWtq4sSJkqRjx45p3rx52rNnj6THy1oBL8rDw0NjxozR4cOHVahQIS1dulS5c+fWmDFjlC1bNrm4uCgiIkKSlD59epJVpDokrAAg6dGjR5o6daqWLVumdOnSyTAM1atXTxUqVNCUKVOUJs3ja1RnzJihs2fPKnv27JK4wAXJK3fu3BoyZIgOHDig2rVra/PmzZo4caJ++OEHrV271uJYPntITWgJAIC/HD9+XCVKlNDSpUuVI0cOvfzyy6pZs6YaNWqkzJkza8mSJZozZ462bNnCOquwmr9XTiMiIrR582YtW7ZMy5cvN//iBKQ2JKwAUqV/fp0af2vLLl266Ny5c1q+fLk2bNigAQMGKCIiQunTp5efn58mTZqkIkWK2DBypAZP+7r/0aNHJK1IlfjUA0iVTCaTNm/erPPnz+vdd981X2X96quvqk2bNtq8ebMqV66sIkWK6P79+3J2dpa3t7d8fHxsHDlSg38mq/EJLMkqUisqrABSpejoaPXu3Vvjxo1T/fr1VaZMGfXo0UOS1K5dOx0+fFg//vijvL29bRwpAICLrgCkSi4uLhozZoyOHDkif39/zZgxQ6GhoZo1a5YKFiyoTJkyKSwszNZhAgBEhRUA9PDhQ929e1d9+vTR+fPndeTIEV28eFGdOnXSuHHjbB0eAKR6JKwA8DcHDx7U1q1bNXbsWC1dupQLrADADpCwAoASXpUdFRUlV1dXG0YEAIhHwgoAT8BdhADAfnDRFQA8AckqANgPElYAAADYNRJWAAAA2DUSVgAAANg1ElYAAADYNRJWAAAA2DUSVgAAANg1ElYAAADYNRJWAJDUokUL1atXz/z4tddeU5cuXf7zOH755ReZTCbdunXrqceYTCatWLEi0XMOGjRIRYsWfaG4zp49K5PJpLCwsBeaBwCeBwkrALvVokULmUwmmUwmubi4KHfu3BoyZIgePXpk9XN/9913+vTTTxN1bGKSTADA80tj6wAA4Flef/11zZo1S1FRUVqzZo06dOigtGnTqm/fvgmOjY6OlouLS7KcN3369MkyDwDgxVFhBWDXXF1dFRAQoODgYH344YeqUqWKVq5cKel/X+MPGzZMWbJkUUhIiCTp/PnzatSokfz8/JQ+fXrVrVtXZ8+eNc8ZGxurbt26yc/PTxkyZFCvXr1kGIbFef/ZEhAVFaXevXsrKChIrq6uyp07t2bMmKGzZ8+qYsWKkqR06dLJZDKpRYsWkqS4uDiNGDFCOXLkkLu7u4oUKaKlS5danGfNmjXKmzev3N3dVbFiRYs4E6t3797KmzevPDw8lDNnTvXv318xMTEJjps6daqCgoLk4eGhRo0a6fbt2xb7v/76a4WGhsrNzU358uXTV199leRYAMAaSFgBOBR3d3dFR0ebH2/YsEHh4eFav369Vq1apZiYGFWvXl3e3t7aunWrtm/fLi8vL73++uvm540ePVqzZ8/WzJkztW3bNt28eVPLly9/5nnfe+89ffPNNxo/fryOHTumqVOnysvLS0FBQVq2bJkkKTw8XJcuXdK4ceMkSSNGjNDcuXM1ZcoUHTlyRF27dlXTpk21efNmSY8T6wYNGqh27doKCwtTmzZt1KdPnyS/J97e3po9e7aOHj2qcePGafr06RozZozFMadOndK3336rH374QevWrdOBAwfUvn178/4FCxZowIABGjZsmI4dO6bhw4erf//+mjNnTpLjAYBkZwCAnWrevLlRt25dwzAMIy4uzli/fr3h6upq9OjRw7zf39/fiIqKMj9n3rx5RkhIiBEXF2cei4qKMtzd3Y0ff/zRMAzDCAwMNEaNGmXeHxMTY2TLls18LsMwjAoVKhgfffSRYRiGER4ebkgy1q9f/8Q4N23aZEgy/vzzT/PYw4cPDQ8PD2PHjh0Wx7Zu3dp45513DMMwjL59+xr58+e32N+7d+8Ec/2TJGP58uVP3f/5558bJUqUMD8eOHCg4ezsbPzxxx/msbVr1xpOTk7GpUuXDMMwjFy5chkLFy60mOfTTz81ypQpYxiGYURERBiSjAMHDjz1vABgLfSwArBrq1atkpeXl2JiYhQXF6d3331XgwYNMu8vVKiQRd/qb7/9plOnTsnb29tinocPH+r06dO6ffu2Ll26pNKlS5v3pUmTRiVLlkzQFhAvLCxMzs7OqlChQqLjPnXqlO7fv6+qVatajEdHR6tYsWKSpGPHjlnEIUllypRJ9DniLV68WOPHj9fp06d19+5dPXr0SD4+PhbHvPTSS8qaNavFeeLi4hQeHi5vb2+dPn1arVu3Vtu2bc3HPHr0SL6+vkmOBwCSGwkrALtWsWJFTZ48WS4uLsqSJYvSpLH8seXp6Wnx+O7duypRooQWLFiQYK5MmTI9Vwzu7u5Jfs7du3clSatXr7ZIFKXHfbnJZefOnWrSpIkGDx6s6tWry9fXV4sWLdLo0aOTHOv06dMTJNDOzs7JFisAPC8SVgB2zdPTU7lz50708cWLF9fixYuVOXPmBFXGeIGBgdq9e7deffVVSY8rifv27VPx4sWfeHyhQoUUFxenzZs3q0qVKgn2x1d4Y2NjzWP58+eXq6urfv/996dWZkNDQ80XkMXbtWvXv7/Iv9mxY4eCg4PVr18/89i5c+cSHPf777/r4sWLypIli/k8Tk5OCgkJkb+/v7JkyaIzZ86oSZMmSTo/APwXuOgKQIrSpEkTZcyYUXXr1tXWrVsVERGhX375RZ07d9Yff/whSfroo4/02WefacWKFTp+/Ljat2//zDVUs2fPrubNm6tVq1ZasWKFec5vv/1WkhQcHCyTyaRVq1bp2rVrunv3rry9vdWjRw917dpVc+bM0enTp7V//35NmDDBfCHTBx98oJMnT6pnz54KDw/XwoULNXv27CS93jx58uj333/XokWLdPr0aY0fP/6JF5C5ubmpefPm+u2337R161Z17txZjRo1UkBAgCRp8ODBGjFihMaPH68TJ07o0KFDmjVrlr788sskxQMA1kDCCiBF8fDw0JYtW/TSSy+pQYMGCg0NVevWrfXw4UNzxbV79+5q1qyZmjdvrjJlysjb21v169d/5ryTJ0/WW2+9pfbt2ytfvnxq27at7t27J0nKmjWrBg8erD59+sjf318dO3aUJH366afq37+/RowYodDQUL3++utavXq1cuTIIelxX+myZcu0YsUKFSlSRFOmTNHw4cOT9Hrr1Kmjrl27qmPHjipatKh27Nih/v37Jzgud+7catCggd544w1Vq1ZNhQsXtli2qk2bNvr66681a9YsFSpUSBUqVNDs2bPNsQKALZmMp11lAAAAANgBKqwAAACwaySsAAAAsGskrAAAALBrJKwAAACwaySsAOCgBg0apKJFiybLXNmzZ9fYsWOTZS4ASG4krADs3u+//66aNWvKw8NDmTNnVs+ePfXo0aNnPmf//v2qWrWq/Pz8lCFDBrVr1858RydJunHjhl5//XVlyZJFrq6uCgoKUseOHRUZGWk+Ztu2bSpbtqwyZMggd3d35cuXT2PGjLE4z+TJk1W4cGH5+PjIx8dHZcqU0dq1a5P3DXiKHj16aMOGDf/JuWzpl19+UfHixeXq6qrcuXMnaq3agwcPqnz58nJzc1NQUJBGjRqV4Jhbt26pQ4cOCgwMlKurq/Lmzas1a9ZYHHPhwgU1bdrU/BkoVKiQ9u7da95vMpmeuH3++ecv/LoB/A93ugJgFhMTo7Rp09o6DAuxsbGqWbOmAgICtGPHDl26dEnvvfee0qZN+9Q1Sy9evKgqVaro7bff1sSJExUZGakuXbqoRYsWWrp0qSTJyclJdevW1dChQ5UpUyadOnVKHTp00M2bN7Vw4UJJj++y1bFjRxUuXFienp7atm2b3n//fXl6eqpdu3aSpGzZsumzzz5Tnjx5ZBiG5syZo7p16+rAgQMqUKCAVd8bLy8veXl5WfUcthYREaGaNWvqgw8+0IIFC7Rhwwa1adNGgYGBql69+hOfExkZqWrVqqlKlSqaMmWKDh06pFatWsnPz8/87xYdHa2qVasqc+bMWrp0qbJmzapz587Jz8/PPM+ff/6psmXLqmLFilq7dq0yZcqkkydPKl26dOZjLl26ZHHutWvXqnXr1nrzzTeT/80AUjMDgE2sXbvWKFu2rOHr62ukT5/eqFmzpnHq1CmLY86fP280btzYSJcuneHh4WGUKFHC2LVrl3n/ypUrjZIlSxqurq5GhgwZjHr16pn3STKWL19uMZ+vr68xa9YswzAMIyIiwpBkLFq0yHj11VcNV1dXY9asWcb169eNxo0bG1myZDHc3d2NggULGgsXLrSYJzY21hg5cqSRK1cuw8XFxQgKCjKGDh1qGIZhVKxY0ejQoYPF8VevXjXSpk1r/Pzzz0l+n9asWWM4OTkZly9fNo9NnjzZ8PHxMaKiop74nKlTpxqZM2c2YmNjzWMHDx40JBknT5586rnGjRtnZMuW7Znx1K9f32jatOkzj0mXLp3x9ddfP/OYf5JkTJkyxahZs6bh7u5u5MuXz9ixY4dx8uRJo0KFCoaHh4dRpkwZi8/IwIEDjSJFipgfb9q0yXj55ZcNDw8Pw9fX13jllVeMs2fPmvc/6/MSHBxsjBkzxvx49OjRRsGCBQ0PDw8jW7ZsxocffmjcuXPHvP/s2bNGrVq1DD8/P8PDw8PInz+/sXr1asMwDOPmzZvGu+++a2TMmNFwc3MzcufObcycOTNJ70e8Xr16GQUKFLAYe/vtt43q1as/9TlfffWVkS5dOovPR+/evY2QkBDz48mTJxs5c+Y0oqOjnzpP7969jXLlyiUp3rp16xqVKlVK0nMA/DtaAgAbuXfvnrp166a9e/dqw4YNcnJyUv369RUXFydJunv3ripUqKALFy5o5cqV+u2339SrVy/z/tWrV6t+/fp64403dODAAW3YsEGlSpVKchx9+vTRRx99pGPHjql69ep6+PChSpQoodWrV+vw4cNq166dmjVrpj179pif07dvX3322Wfq37+/jh49qoULF8rf31/S4zsmLVy4UFFRUebj58+fr6xZs6pSpUqSHt+SNL46+LQt3s6dO1WoUCHz/JJUvXp1RUZG6siRI098TVFRUXJxcZGT0/9+xLm7u0t6/DX/k1y8eFHfffedKlSo8NT36sCBA9qxY8dTj4mNjdWiRYt07949lSlTxjzeokULvfbaa0+dN96nn36q9957T2FhYcqXL5/effddvf/+++rbt6/27t0rwzDMd9H6p0ePHqlevXqqUKGCDh48qJ07d6pdu3YymUySkv55cXJy0vjx43XkyBHNmTNHGzduVK9evcz7O3TooKioKG3ZskWHDh3SyJEjzf9u8Z+LtWvX6tixY5o8ebIyZsxofm6BAgWe+W9fo0YN87E7d+5UlSpVLGKrXr26du7c+dTYd+7cqVdffVUuLi4WzwkPD9eff/4pSVq5cqXKlCmjDh06yN/fXwULFtTw4cMVGxtrfs7KlStVsmRJNWzYUJkzZ1axYsU0ffr0p573ypUrWr16tVq3bv3UYwA8J1tnzAAeu3btmiHJOHTokGEYj6uE3t7exo0bN554fJkyZYwmTZo8dT4lssI6duzYf42tZs2aRvfu3Q3DMIzIyEjD1dXVmD59+hOPffDggZEuXTpj8eLF5rHChQsbgwYNMj++cuWKcfLkyWdu8dq2bWtUq1bN4hz37t0zJBlr1qx5YgyHDx820qRJY4waNcqIiooybt68abz55puGJGP48OEWxzZu3Nhwd3c3JBm1a9c2Hjx4kGC+rFmzGi4uLoaTk5MxZMiQBPsPHjxoeHp6Gs7Ozoavr6+50hivT58+RrNmzZ4YazxJxieffGJ+vHPnTkOSMWPGDPPYN998Y7i5uZkf/73CeuPGDUOS8csvvzxx/n/7vPyzwvpPS5YsMTJkyGB+XKhQIYt/07+rXbu20bJly6fOdfbs2Wf+2//xxx/mY/PkyZPg32z16tWGJOP+/ftPnL9q1apGu3btLMaOHDliSDKOHj1qGIZhhISEGK6urkarVq2MvXv3GosWLTLSp09v8ZpcXV0NV1dXo2/fvsb+/fuNqVOnGm5ubsbs2bOfeN6RI0ca6dKle+JnCMCLoYcVsJGTJ09qwIAB2r17t65fv26unP7+++8qWLCgwsLCVKxYMaVPn/6Jzw8LC1Pbtm1fOI6SJUtaPI6NjdXw4cP17bff6sKFC4qOjlZUVJQ8PDwkSceOHVNUVJQqV678xPnc3NzUrFkzzZw5U40aNdL+/ft1+PBhrVy50nxM5syZlTlz5heO/WkKFCigOXPmqFu3burbt6+cnZ3VuXNn+fv7W1RdJWnMmDEaOHCgTpw4ob59+6pbt2766quvLI7ZunWr7t69q127dqlPnz7KnTu33nnnHfP+kJAQhYWF6fbt21q6dKmaN2+uzZs3K3/+/JKkESNGJCruwoULm/8eX1EuVKiQxdjDhw8VGRkpHx8fi+emT59eLVq0UPXq1VW1alVVqVJFjRo1UmBgoKSkf15+/vlnjRgxQsePH1dkZKQePXqkhw8f6v79+/Lw8FDnzp314Ycf6qefflKVKlX05ptvmuP/8MMP9eabb2r//v2qVq2a6tWrp1deecU8d3BwcKLjsJa4uDhlzpxZ06ZNk7Ozs0qUKKELFy7o888/18CBA83HlCxZ0twrXaxYMR0+fFhTpkxR8+bNE8w5c+ZMNWnSRG5ubv/pawFSA1oCABupXbu2bt68qenTp2v37t3avXu3pMcXg0j/+wr7af5tv8lkkmEYFmMxMTEJjvP09LR4/Pnnn2vcuHHq3bu3Nm3apLCwMFWvXj3RcUmP2wLWr1+vP/74Q7NmzVKlSpUskpSktAQEBAToypUrFvPHPw4ICHhqDO+++64uX76sCxcu6MaNGxo0aJCuXbumnDlzWhwXEBCgfPnyqU6dOpo6daomT56c4EKaHDlyqFChQmrbtq26du2qQYMGWex3cXFR7ty5VaJECY0YMUJFihTRuHHj/vV9+qe/X/AW/1X+k8bif7n5p1mzZmnnzp165ZVXtHjxYuXNm1e7du2SlLh/t3hnz55VrVq1VLhwYS1btkz79u3TpEmTJP3v89mmTRudOXNGzZo106FDh1SyZElNmDBBklSjRg2dO3dOXbt21cWLF1W5cmX16NHDPH9SWgKe9u/v4+Pz1NeUmM9MYGCg8ubNK2dnZ/MxoaGhunz5svk1BgYGmn/p+Psxv//+e4Jzbt26VeHh4WrTps3T3lYAL4CEFbCBGzduKDw8XJ988okqV66s0NBQc29dvMKFCyssLEw3b9584hyFCxd+5pJGmTJlski8Tp48qfv37/9rbNu3b1fdunXVtGlTFSlSRDlz5tSJEyfM+/PkySN3d/dnnrtQoUIqWbKkpk+froULF6pVq1YW+4cMGaKwsLBnbvHKlCmjQ4cO6erVq+ax9evXy8fHJ0Ey8ST+/v7y8vLS4sWL5ebmpqpVqz712PhE8O/9t0865ln7E3uMtRQrVkx9+/bVjh07VLBgQfOKB//2efm7ffv2KS4uTqNHj9b//d//KW/evLp48WKC44KCgvTBBx/ou+++U/fu3S36OzNlyqTmzZtr/vz5Gjt2rKZNm2bet2bNmmf+23/99dfmY8uUKZMg7vXr11v0CP9TmTJltGXLFotf0NavX6+QkBDzFf5ly5bVqVOnLJL/EydOKDAw0Nz7WrZsWYWHh1vMfeLEiSdWiGfMmKESJUqoSJEiT40LwAuwdU8CkBrFxsYaGTJkMJo2bWqcPHnS2LBhg/Hyyy9b9J1GRUUZefPmNcqXL29s27bNOH36tLF06VJjx44dhmE8viLcycnJGDBggHH06FHj4MGDxmeffWY+R+PGjY3Q0FBj//79xq+//mpUqlTJSJs2bYIe1gMHDljE1rVrVyMoKMjYvn27cfToUaNNmzaGj4+PUbduXfMxgwYNMtKlS2fMmTPHOHXqlLFz584EV8VPmzbNcHFxeeGevkePHhkFCxY0qlWrZoSFhRnr1q0zMmXKZPTt29d8zO7du42QkBCL3scJEyYY+/btM8LDw42JEyca7u7uxrhx48z7V69ebcycOdM4dOiQERERYaxatcoIDQ01ypYtaz5m4sSJxsqVK40TJ04YJ06cML7++mvD29vb6Nevn/mYPn36GJs3bzYiIiKMgwcPGn369DFMJpPx008/Jel16h89x0/699m0aZMhyfjzzz8Nw7DsYT1z5ozRp08fY8eOHcbZs2eNH3/80ciQIYPx1VdfmZ/7rM/L33tYw8LCzP3Np0+fNubOnWtkzZrV4twfffSRsW7dOuPMmTPGvn37jNKlSxuNGjUyDMMw+vfvb6xYscI4efKkcfjwYaNWrVpGqVKlkvR+xDtz5ozh4eFh9OzZ0zh27JgxadIkw9nZ2Vi3bp35mAkTJlhcmX/r1i3D39/faNasmXH48GFj0aJFhoeHhzF16lTzMb///rvh7e1tdOzY0QgPDzdWrVplZM6c2bzahWEYxp49e4w0adIYw4YNM06ePGksWLDA8PDwMObPn28R4+3btw0PDw9j8uTJz/UaAfw7ElbARtavX2+EhoYarq6uRuHChY1ffvklQdJy9uxZ48033zR8fHwMDw8Po2TJksbu3bvN+5ctW2YULVrUcHFxMTJmzGg0aNDAvO/ChQtGtWrVDE9PTyNPnjzGmjVrnnjR1T8T1hs3bhh169Y1vLy8jMyZMxuffPKJ8d5771kkrLGxscbQoUON4OBgI23atMZLL72U4MKYO3fuGB4eHkb79u1f+L06e/asUaNGDcPd3d3ImDGj0b17dyMmJsa8Pz6Ri4iIMI81a9bMSJ8+veHi4mIULlzYmDt3rsWcGzduNMqUKWP4+voabm5uRp48eYzevXubEzLDMIzx48cbBQoUMDw8PAwfHx+jWLFixldffWWxXFarVq2M4OBgw8XFxciUKZNRuXLlBMlq8+bNjQoVKjzzNb5ownr58mWjXr16RmBgoOHi4mIEBwcbAwYMsIj1WZ+Xf1509eWXXxqBgYGGu7u7Ub16dWPu3LkW5+7YsaORK1cuw9XV1ciUKZPRrFkz4/r164ZhGMann35qhIaGGu7u7kb69OmNunXrGmfOnHnm63+WTZs2mePOmTOn+TMcb+DAgUZwcLDF2G+//WaUK1fOcHV1NbJmzWqRnMfbsWOHUbp0acPV1dXImTOnMWzYMOPRo0cWx/zwww9GwYIFDVdXVyNfvnzGtGnTEswzdepUw93d3bh169Zzv0YAz2YyjH80uQFAMjh79qxy5cqlX3/9VcWLF7d1ODZVoUIFVaxYMUHvKwAgcUhYASSrmJgY3bhxQz169FBERIS2b99u65Bs6vbt2ypQoICOHz+e4u9KBQDWwrJWAJLV9u3bVbFiReXNm9d8G9TUzNfXV3/88YetwwAAh0aFFQAAAHaNZa0AAABg10hYAQAAYNdIWAEAAGDXSFgBAABg10hYAQAAYNdIWAEAAGDXSFgBAABg10hYAQAAYNf+H6y+an/igeeXAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "# Load dataset,test, Evaluate with F1 and CM and save model/estimator\n", + "\n", + "from src.stages.evaluate import evaluate_model\n", + "\n", + "\n", + "# Call function\n", + "evaluate_model(config_path = 'params.yaml')" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-01-15 10:51:24,829 — EVALUATE — INFO — Load model\n", + "2024-01-15 10:51:24,866 — EVALUATE — INFO — Load test dataset\n", + "2024-01-15 10:51:24,877 — EVALUATE — INFO — Evaluate (build report)\n", + "2024-01-15 10:51:24,880 — EVALUATE — INFO — Save metrics\n", + "2024-01-15 10:51:24,884 — EVALUATE — INFO — F1 metrics file saved to : reports/metrics.json\n", + "2024-01-15 10:51:24,884 — EVALUATE — INFO — Save confusion matrix\n", + "2024-01-15 10:51:25,005 — EVALUATE — INFO — Confusion matrix saved to : reports/confusion_matrix.png\n" + ] + } + ], + "source": [ + "# Shell prompt for running \"EVALUATE MODEL\" function. oad dataset,test, Evaluate with F1 and CM and save model/estimator\n", + "\n", + "!python3 src/stages/evaluate.py --config=params.yaml" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/step-5-automate-ml-pipeline-bee.ipynb b/notebooks/step-5-automate-ml-pipeline-bee.ipynb new file mode 100644 index 00000000..d5c924fc --- /dev/null +++ b/notebooks/step-5-automate-ml-pipeline-bee.ipynb @@ -0,0 +1,356 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1.0 Change working Directory to Root Directory" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/mnt/c/Users/MR-BEST/course-ds-base-root/course-ds-base\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/mr-best/.local/lib/python3.10/site-packages/IPython/core/magics/osm.py:417: UserWarning: using dhist requires you to install the `pickleshare` library.\n", + " self.shell.db['dhist'] = compress_dhist(dhist)[-100:]\n" + ] + } + ], + "source": [ + "# Set the repository root as a working directory \n", + "%cd .." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 0.1 Init DVC repository\n", + " 2.1 Init DVC repository and setup DVC remote storage\n", + "\n", + "dvc init\n", + "\n", + "2.2 Add DVC repository under git control\n", + "\n", + "git add .\n", + "git commit -m \"Init DVC repo\"\n", + "\n", + "# 0.2 View config" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#### Initial state before the update\n", + "\n", + "# base:\n", + "# random_state: 42\n", + "# log_level: INFO\n", + "\n", + "# data:\n", + " # dataset_csv: 'data/raw/iris.csv'\n", + "# features_path: 'data/processed/featured_iris.csv'\n", + "# test_size: 0.2\n", + "# trainset_path: 'data/processed/train_iris.csv'\n", + "# testset_path: 'data/processed/test_iris.csv'\n", + "\n", + "\n", + "# train:\n", + "# clf_params:\n", + "# 'C': 0.001\n", + "# 'solver': 'lbfgs'\n", + "# 'multi_class': 'multinomial'\n", + "# 'max_iter': 100\n", + "# model_path: 'models/model.joblib'\n", + "\n", + "# reports:\n", + "# metrics_file: 'reports/metrics.json'\n", + "# confusion_matrix_image: 'reports/confusion_matrix.png'\n", + "\n", + " #### Newer Versions of Metrics\n", + "\n", + " \n", + "base:\n", + " random_state: 42\n", + " log_level: INFO\n", + "\n", + "\n", + "data_load:\n", + " dataset_csv: 'data/raw/iris.csv'\n", + "\n", + "\n", + "featurize:\n", + " features_path: 'data/processed/featured_iris.csv'\n", + " target_column: target\n", + "\n", + "\n", + "data_split:\n", + " test_size: 0.2\n", + " trainset_path: 'data/processed/train_iris.csv'\n", + " testset_path: 'data/processed/test_iris.csv'\n", + "\n", + "\n", + "train:\n", + "\n", + " cv: 3\n", + " estimator_name: logreg\n", + " estimators:\n", + " logreg: # sklearn.linear_model.LogisticRegression\n", + " param_grid: # params of GridSearchCV constructor\n", + " C: [0.001]\n", + " max_iter: [100]\n", + " solver: ['lbfgs']\n", + " multi_class: ['multinomial']\n", + " svm: # sklearn.svm.SVC\n", + " param_grid:\n", + " C: [0.1, 1.0]\n", + " kernel: ['rbf', 'linear']\n", + " gamma: ['scale']\n", + " degree: [3, 5]\n", + " model_path: models/model.joblib\n", + "\n", + "\n", + "evaluate:\n", + " reports_dir: reports\n", + " metrics_file: 'metrics.json'\n", + " confusion_matrix_image: 'confusion_matrix.png'" + ] + } + ], + "source": [ + "# Look on stages config \n", + "!cat params.yaml" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create and run stages for a DVC pipeline\n", + "\n", + "## First Stage of ML Pipeline : Extract and Raw Load Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "dvc stage add -n data_load \\\n", + " -d src/stages/data_load.py \\\n", + " -o data/raw/iris.csv \\\n", + " -p base,data_load \\\n", + " python src/stages/data_load.py --config=params.yaml" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Featurization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "dvc stage add -n featurize \\\n", + " -d src/stages/featurize.py \\\n", + " -d data/raw/iris.csv \\\n", + " -o data/processed/featured_iris.csv \\\n", + " -p base,featurize \\\n", + " python src/stages/featurize.py --config=params.yaml" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3 Split dataset into train/test edited directly into the dvc.yaml config file" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "data_split:\n", + " cmd: python src/stages/data_split.py --config=params.yaml\n", + " deps:\n", + " - data/processed/featured_iris.csv\n", + " - src/stages/data_split.py\n", + " params:\n", + " - base\n", + " - data_split\n", + " - featurize\n", + " outs:\n", + " - data/processed/test_iris.csv\n", + " - data/processed/train_iris.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4 Train Model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "train:\n", + " cmd: python src/stages/train.py --config=params.yaml\n", + " deps:\n", + " - data/processed/test_iris.csv\n", + " - data/processed/train_iris.csv\n", + " - src/stages/train.py\n", + " params:\n", + " - base\n", + " - train\n", + " outs:\n", + " - models/model.joblib" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5 Evaluate Model with Test Data" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-01-15 10:22:31,528 — TRAIN — INFO — Get model/estimator name\n", + "2024-01-15 10:22:31,529 — TRAIN — INFO — The name of Model/Estimator: logreg\n", + "2024-01-15 10:22:31,530 — TRAIN — INFO — Load train dataset\n", + "2024-01-15 10:22:31,544 — TRAIN — INFO — Train model/estimator\n", + "Fitting 3 folds for each of 1 candidates, totalling 3 fits\n", + "2024-01-15 10:22:31,577 — TRAIN — INFO — Best score: 0.857564307288572\n", + "2024-01-15 10:22:31,578 — TRAIN — INFO — Trained Model Saved\n" + ] + } + ], + "source": [ + "evaluate:\n", + " cmd: python src/stages/evaluate.py --config=params.yaml\n", + " deps:\n", + " - models/model.joblib\n", + " - data/processed/test_iris.csv\n", + "\n", + " params:\n", + " - base\n", + " - train\n", + " - data_split\n", + " - featurize\n", + " - evaluate\n", + " outs:\n", + " - metrics.json\n", + " - confusion_matrix.png" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# MAnual entry on bash shell\n", + "\n", + "dvc stage add -n evaluate \\\n", + " -d models/model.joblib \\\n", + " -d data/processed/test_iris.csv\\\n", + " -o metrics.json \\\n", + " -o confusion_matrix.png\\\n", + " -p base, evaluate \\\n", + " python src/stages/evaluate.py --config=params.yaml" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/params.yaml b/params.yaml index a48a0538..161c5f7c 100644 --- a/params.yaml +++ b/params.yaml @@ -1,22 +1,45 @@ + base: random_state: 42 + log_level: INFO -data: + +data_load: dataset_csv: 'data/raw/iris.csv' + + +featurize: features_path: 'data/processed/featured_iris.csv' - test_size: 0.2 + target_column: target + + +data_split: + test_size: 0.21 trainset_path: 'data/processed/train_iris.csv' testset_path: 'data/processed/test_iris.csv' train: - clf_params: - 'C': 0.001 - 'solver': 'lbfgs' - 'multi_class': 'multinomial' - 'max_iter': 100 - model_path: 'models/model.joblib' - -reports: - metrics_file: 'reports/metrics.json' - confusion_matrix_image: 'reports/confusion_matrix.png' \ No newline at end of file + + cv: 3 + estimator_name: logreg + estimators: + logreg: # sklearn.linear_model.LogisticRegression + param_grid: # params of GridSearchCV constructor + C: [0.001] + max_iter: [100] + solver: ['lbfgs'] + multi_class: ['multinomial'] + svm: # sklearn.svm.SVC + param_grid: + C: [0.1, 1.0] + kernel: ['rbf', 'linear'] + gamma: ['scale'] + degree: [3, 5] + model_path: models/model.joblib + + +evaluate: + reports_dir: reports + metrics_file: 'metrics.json' + confusion_matrix_image: 'confusion_matrix.png' \ No newline at end of file diff --git a/reports/.gitignore b/reports/.gitignore deleted file mode 100644 index b722e9e1..00000000 --- a/reports/.gitignore +++ /dev/null @@ -1 +0,0 @@ -!.gitignore \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index d5b4910e..572cf417 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ -dvc==2.6.4 + +dvc==2.8.3 joblib==1.0.1 jupyter==1.0.0 jupyter_contrib_nbextensions==0.5.1 @@ -10,4 +11,6 @@ python-box==5.4.1 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.7.1 -tqdm==4.62.2 \ No newline at end of file +tqdm==4.62.2 + + diff --git a/src/evaluate.py b/src/evaluate.py new file mode 100644 index 00000000..599d73d5 --- /dev/null +++ b/src/evaluate.py @@ -0,0 +1,112 @@ +import json +import math +import os +import pickle +import sys + +import pandas as pd +from sklearn import metrics +from sklearn import tree +from dvclive import Live +from matplotlib import pyplot as plt + + +def evaluate(model, matrix, split, live, save_path): + """ + Dump all evaluation metrics and plots for given datasets. + + Args: + model (sklearn.ensemble.RandomForestClassifier): Trained classifier. + matrix (scipy.sparse.csr_matrix): Input matrix. + split (str): Dataset name. + live (dvclive.Live): Dvclive instance. + save_path (str): Path to save the metrics. + """ + labels = matrix[:, 1].toarray().astype(int) + x = matrix[:, 2:] + + predictions_by_class = model.predict_proba(x) + predictions = predictions_by_class[:, 1] + + # Use dvclive to log a few simple metrics... + avg_prec = metrics.average_precision_score(labels, predictions) + roc_auc = metrics.roc_auc_score(labels, predictions) + if not live.summary: + live.summary = {"avg_prec": {}, "roc_auc": {}} + live.summary["avg_prec"][split] = avg_prec + live.summary["roc_auc"][split] = roc_auc + + # ... and plots... + # ... like an roc plot... + live.log_sklearn_plot("roc", labels, predictions, name=f"roc/{split}") + # ... and precision recall plot... + # ... which passes `drop_intermediate=True` to the sklearn method... + live.log_sklearn_plot( + "precision_recall", + labels, + predictions, + name=f"prc/{split}", + drop_intermediate=True, + ) + # ... and confusion matrix plot + live.log_sklearn_plot( + "confusion_matrix", + labels.squeeze(), + predictions_by_class.argmax(-1), + name=f"cm/{split}", + ) + + +def save_importance_plot(live, model, feature_names): + """ + Save feature importance plot. + + Args: + live (dvclive.Live): DVCLive instance. + model (sklearn.ensemble.RandomForestClassifier): Trained classifier. + feature_names (list): List of feature names. + """ + fig, axes = plt.subplots(dpi=100) + fig.subplots_adjust(bottom=0.2, top=0.95) + axes.set_ylabel("Mean decrease in impurity") + + importances = model.feature_importances_ + forest_importances = pd.Series(importances, index=feature_names).nlargest(n=30) + forest_importances.plot.bar(ax=axes) + + live.log_image("importance.png", fig) + + +def main(): + EVAL_PATH = "eval" + + if len(sys.argv) != 3: + sys.stderr.write("Arguments error. Usage:\n") + sys.stderr.write("\tpython evaluate.py model features\n") + sys.exit(1) + + model_file = sys.argv[1] + train_file = os.path.join(sys.argv[2], "train.pkl") + test_file = os.path.join(sys.argv[2], "test.pkl") + + # Load model and data. + with open(model_file, "rb") as fd: + model = pickle.load(fd) + + with open(train_file, "rb") as fd: + train, feature_names = pickle.load(fd) + + with open(test_file, "rb") as fd: + test, _ = pickle.load(fd) + + # Evaluate train and test datasets. + with Live(EVAL_PATH, dvcyaml=False) as live: + evaluate(model, train, "train", live, save_path=EVAL_PATH) + evaluate(model, test, "test", live, save_path=EVAL_PATH) + + # Dump feature importance plot. + save_importance_plot(live, model, feature_names) + + +if __name__ == "__main__": + main() diff --git a/src/featurization.py b/src/featurization.py new file mode 100644 index 00000000..9f493049 --- /dev/null +++ b/src/featurization.py @@ -0,0 +1,136 @@ +import os +import pickle +import sys + +import numpy as np +import pandas as pd +import scipy.sparse as sparse +import yaml +from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer + + +def get_df(data): + """Read the input data file and return a data frame.""" + df = pd.read_csv( + data, + encoding="utf-8", + header=None, + delimiter="\t", + names=["id", "label", "text"], + ) + sys.stderr.write(f"The input data frame {data} size is {df.shape}\n") + return df + + +def save_matrix(df, matrix, names, output): + """ + Save the matrix to a pickle file. + + Args: + df (pandas.DataFrame): Input data frame. + matrix (scipy.sparse.csr_matrix): Input matrix. + names (list): List of feature names. + output (str): Output file name. + """ + id_matrix = sparse.csr_matrix(df.id.astype(np.int64)).T + label_matrix = sparse.csr_matrix(df.label.astype(np.int64)).T + + result = sparse.hstack([id_matrix, label_matrix, matrix], format="csr") + + msg = "The output matrix {} size is {} and data type is {}\n" + sys.stderr.write(msg.format(output, result.shape, result.dtype)) + + with open(output, "wb") as fd: + pickle.dump((result, names), fd) + pass + + +def generate_and_save_train_features(train_input, train_output, bag_of_words, tfidf): + """ + Generate train feature matrix. + + Args: + train_input (str): Train input file name. + train_output (str): Train output file name. + bag_of_words (sklearn.feature_extraction.text.CountVectorizer): Bag of words. + tfidf (sklearn.feature_extraction.text.TfidfTransformer): TF-IDF transformer. + """ + df_train = get_df(train_input) + train_words = np.array(df_train.text.str.lower().values) + + bag_of_words.fit(train_words) + + train_words_binary_matrix = bag_of_words.transform(train_words) + feature_names = bag_of_words.get_feature_names_out() + + tfidf.fit(train_words_binary_matrix) + train_words_tfidf_matrix = tfidf.transform(train_words_binary_matrix) + + save_matrix(df_train, train_words_tfidf_matrix, feature_names, train_output) + + +def generate_and_save_test_features(test_input, test_output, bag_of_words, tfidf): + """ + Generate test feature matrix. + + Args: + test_input (str): Test input file name. + test_output (str): Test output file name. + bag_of_words (sklearn.feature_extraction.text.CountVectorizer): Bag of words. + tfidf (sklearn.feature_extraction.text.TfidfTransformer): TF-IDF transformer. + """ + df_test = get_df(test_input) + test_words = np.array(df_test.text.str.lower().values) + + test_words_binary_matrix = bag_of_words.transform(test_words) + test_words_tfidf_matrix = tfidf.transform(test_words_binary_matrix) + feature_names = bag_of_words.get_feature_names_out() + + save_matrix(df_test, test_words_tfidf_matrix, feature_names, test_output) + + +def main(): + params = yaml.safe_load(open("params.yaml"))["featurize"] + + np.set_printoptions(suppress=True) + + if len(sys.argv) != 3 and len(sys.argv) != 5: + sys.stderr.write("Arguments error. Usage:\n") + sys.stderr.write("\tpython featurization.py data-dir-path features-dir-path\n") + sys.exit(1) + + in_path = sys.argv[1] + out_path = sys.argv[2] + + train_input = os.path.join(in_path, "train.tsv") + test_input = os.path.join(in_path, "test.tsv") + train_output = os.path.join(out_path, "train.pkl") + test_output = os.path.join(out_path, "test.pkl") + + max_features = params["max_features"] + ngrams = params["ngrams"] + + os.makedirs(out_path, exist_ok=True) + + bag_of_words = CountVectorizer( + stop_words="english", max_features=max_features, ngram_range=(1, ngrams) + ) + tfidf = TfidfTransformer(smooth_idf=False) + + generate_and_save_train_features( + train_input=train_input, + train_output=train_output, + bag_of_words=bag_of_words, + tfidf=tfidf, + ) + + generate_and_save_test_features( + test_input=test_input, + test_output=test_output, + bag_of_words=bag_of_words, + tfidf=tfidf, + ) + + +if __name__ == "__main__": + main() diff --git a/src/prepare.py b/src/prepare.py new file mode 100644 index 00000000..e6b3a2c0 --- /dev/null +++ b/src/prepare.py @@ -0,0 +1,78 @@ +import os +import random +import re +import sys +import xml.etree.ElementTree + +import yaml + + +def process_posts(input_lines, fd_out_train, fd_out_test, target_tag, split): + """ + Process the input lines and write the output to the output files. + + Args: + input_lines (list): List of input lines. + fd_out_train (file): Output file for the training data set. + fd_out_test (file): Output file for the test data set. + target_tag (str): Target tag. + split (float): Test data set split ratio. + """ + num = 1 + for line in input_lines: + try: + fd_out = fd_out_train if random.random() > split else fd_out_test + attr = xml.etree.ElementTree.fromstring(line).attrib + + pid = attr.get("Id", "") + label = 1 if target_tag in attr.get("Tags", "") else 0 + title = re.sub(r"\s+", " ", attr.get("Title", "")).strip() + body = re.sub(r"\s+", " ", attr.get("Body", "")).strip() + text = title + " " + body + + fd_out.write("{}\t{}\t{}\n".format(pid, label, text)) + + num += 1 + except Exception as ex: + sys.stderr.write(f"Skipping the broken line {num}: {ex}\n") + + +def main(): + params = yaml.safe_load(open("params.yaml"))["prepare"] + + if len(sys.argv) != 2: + sys.stderr.write("Arguments error. Usage:\n") + sys.stderr.write("\tpython prepare.py data-file\n") + sys.exit(1) + + # Test data set split ratio + split = params["split"] + random.seed(params["seed"]) + + input = sys.argv[1] + output_train = os.path.join("data", "prepared", "train.tsv") + output_test = os.path.join("data", "prepared", "test.tsv") + + os.makedirs(os.path.join("data", "prepared"), exist_ok=True) + + input_lines = [] + with open(input) as fd_in: + input_lines = fd_in.readlines() + + fd_out_train = open(output_train, "w", encoding="utf-8") + fd_out_test = open(output_test, "w", encoding="utf-8") + + process_posts( + input_lines=input_lines, + fd_out_train=fd_out_train, + fd_out_test=fd_out_test, + target_tag="", + split=split, + ) + + fd_out_train.close() + fd_out_test.close() + + +if __name__ == "__main__": + main() diff --git a/src/report/visualization.py b/src/report/visualization.py new file mode 100644 index 00000000..32952a43 --- /dev/null +++ b/src/report/visualization.py @@ -0,0 +1,74 @@ +import itertools +import matplotlib.pyplot as plt +import numpy as np + +def plot_confusion_matrix(cm, + target_names, + title='Confusion matrix', + cmap=None, + normalize=True): + """ + given a sklearn confusion matrix (cm), make a nice plot + + Arguments + --------- + cm: confusion matrix from sklearn.metrics.confusion_matrix + + target_names: given classification classes such as [0, 1, 2] + the class names, for example: ['high', 'medium', 'low'] + + title: the text to display at the top of the matrix + + cmap: the gradient of the values displayed from matplotlib.pyplot.cm + see http://matplotlib.org/examples/color/colormaps_reference.html + plt.get_cmap('jet') or plt.cm.Blues + + normalize: If False, plot the raw numbers + If True, plot the proportions + + Usage + ----- + plot_confusion_matrix(cm = cm, # confusion matrix created by + # sklearn.metrics.confusion_matrix + normalize = True, # show proportions + target_names = y_labels_vals, # list of names of the classes + title = best_estimator_name) # title of graph + Citiation + --------- + http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html + """ + accuracy = np.trace(cm) / float(np.sum(cm)) + misclass = 1 - accuracy + + if cmap is None: + cmap = plt.get_cmap('Blues') + + plt.figure(figsize=(8, 6)) + plt.imshow(cm, interpolation='nearest', cmap=cmap) + plt.title(title) + plt.colorbar() + + if target_names is not None: + tick_marks = np.arange(len(target_names)) + plt.xticks(tick_marks, target_names, rotation=45) + plt.yticks(tick_marks, target_names) + + if normalize: + cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] + + thresh = cm.max() / 1.5 if normalize else cm.max() / 2 + for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): + if normalize: + plt.text(j, i, "{:0.4f}".format(cm[i, j]), + horizontalalignment="center", + color="white" if cm[i, j] > thresh else "black") + else: + plt.text(j, i, "{:,}".format(cm[i, j]), + horizontalalignment="center", + color="white" if cm[i, j] > thresh else "black") + + plt.tight_layout() + plt.ylabel('True label') + plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass)) + + return plt.gcf() \ No newline at end of file diff --git a/src/requirements.txt b/src/requirements.txt new file mode 100644 index 00000000..e72d1a30 --- /dev/null +++ b/src/requirements.txt @@ -0,0 +1,6 @@ +dvclive>=3.0 +pandas +pyaml +scikit-learn>=1.3 +scipy +matplotlib diff --git a/src/stages/data_load.py b/src/stages/data_load.py new file mode 100644 index 00000000..90518af8 --- /dev/null +++ b/src/stages/data_load.py @@ -0,0 +1,38 @@ +# Import Dependencies + +import argparse +import pandas as pd +from sklearn.datasets import load_iris +from typing import Text +import yaml + + +# Load data functions + +def data_load(config_file: Text) -> None: + + # Load configuration file + with open('params.yaml') as conf_file: + config = yaml.safe_load(conf_file) + + # load the raw data functions from sklearn + data = load_iris(as_frame=True) + dataset = data.frame + + # feature names curated from dataset + dataset.columns = [colname.strip(' (cm)').replace(' ', '_') for colname in dataset.columns.tolist()] + + # Save raw data to path contained in params.yaml + dataset.to_csv(config['data_load']['dataset_csv'], index=False) + +print ("data load completed successfully") + +# Call the argparser api + +if __name__ == '__main__': + + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument("--config", dest = 'config',required=True,help="input config file path") + args = arg_parser.parse_args() + + data_load(config_file=args.config) \ No newline at end of file diff --git a/src/stages/data_split.py b/src/stages/data_split.py new file mode 100644 index 00000000..22352633 --- /dev/null +++ b/src/stages/data_split.py @@ -0,0 +1,44 @@ +import argparse +import pandas as pd +from sklearn.model_selection import train_test_split +from typing import Text +import yaml + +from src.utils.logs import get_logger + + +def data_split(config_path: Text) -> None: + """Split dataset into train/test. + Args: + config_path {Text}: path to config + """ + + with open('params.yaml') as conf_file: + config = yaml.safe_load(conf_file) + + logger = get_logger('DATA_SPLIT', log_level=config['base']['log_level']) + + logger.info('Load features') + dataset = pd.read_csv(config['featurize']['features_path']) + + logger.info('Split features into train and test sets') + train_dataset, test_dataset = train_test_split( + dataset, + test_size=config['data_split']['test_size'], + random_state=config['base']['random_state'] + ) + + logger.info('Save features for training and testing models') + train_csv_path = config['data_split']['trainset_path'] + test_csv_path = config['data_split']['testset_path'] + train_dataset.to_csv(train_csv_path, index=False) + test_dataset.to_csv(test_csv_path, index=False) + + +if __name__ == '__main__': + + args_parser = argparse.ArgumentParser() + args_parser.add_argument('--config', dest='config', required=True, help='Split dataset into train/test') + args = args_parser.parse_args() + + data_split(config_path=args.config) \ No newline at end of file diff --git a/src/stages/evaluate.py b/src/stages/evaluate.py new file mode 100644 index 00000000..3c10a4b3 --- /dev/null +++ b/src/stages/evaluate.py @@ -0,0 +1,76 @@ +import argparse +import joblib +import json +import pandas as pd +from pathlib import Path +from sklearn.datasets import load_iris +from sklearn.metrics import confusion_matrix, f1_score +from typing import Text, Dict +import yaml + +from src.report.visualization import plot_confusion_matrix +from src.utils.logs import get_logger + + +def evaluate_model(config_path: Text) -> None: + """Evaluate model. + Args: + config_path {Text}: path to config + """ + + with open('params.yaml') as conf_file: + config = yaml.safe_load(conf_file) + + logger = get_logger('EVALUATE', log_level=config['base']['log_level']) + + logger.info('Load model') + model_path = config['train']['model_path'] + model = joblib.load(model_path) + + logger.info('Load test dataset') + test_df = pd.read_csv(config['data_split']['testset_path']) + + logger.info('Evaluate (build report)') + target_column=config['featurize']['target_column'] + y_test = test_df.loc[:, target_column].values + X_test = test_df.drop(target_column, axis=1).values + + prediction = model.predict(X_test) + f1 = f1_score(y_true=y_test, y_pred=prediction, average='macro') + cm = confusion_matrix(prediction, y_test) + report = { + 'f1': f1, + 'cm': cm, + 'actual': y_test, + 'predicted': prediction + } + + logger.info('Save metrics') + # save f1 metrics file + reports_folder = Path(config['evaluate']['reports_dir']) + metrics_path = reports_folder / config['evaluate']['metrics_file'] + + json.dump( + obj={'f1_score': report['f1']}, + fp=open(metrics_path, 'w') + ) + + logger.info(f'F1 metrics file saved to : {metrics_path}') + + logger.info('Save confusion matrix') + # save confusion_matrix.png + plt = plot_confusion_matrix(cm=report['cm'], + target_names=load_iris(as_frame=True).target_names.tolist(), + normalize=False) + confusion_matrix_png_path = reports_folder / config['evaluate']['confusion_matrix_image'] + plt.savefig(confusion_matrix_png_path) + logger.info(f'Confusion matrix saved to : {confusion_matrix_png_path}') + + +if __name__ == '__main__': + + args_parser = argparse.ArgumentParser() + args_parser.add_argument('--config', dest='config', required=True, help="Assist in Evaluation of model using F1 and CM") + args = args_parser.parse_args() + + evaluate_model(config_path=args.config) \ No newline at end of file diff --git a/src/stages/featurize.py b/src/stages/featurize.py new file mode 100644 index 00000000..2898b555 --- /dev/null +++ b/src/stages/featurize.py @@ -0,0 +1,43 @@ +import argparse +import pandas as pd +from typing import Text +import yaml + +from src.utils.logs import get_logger + + +def featurize(config_path: Text) -> None: + """Create new features. + Args: + config_path {Text}: path to config + """ + + with open('params.yaml') as conf_file: + config = yaml.safe_load(conf_file) + + logger = get_logger('FEATURIZE', log_level=config['base']['log_level']) + + logger.info('Load the raw data') + dataset = pd.read_csv(config['data_load']['dataset_csv']) + + logger.info('Curate by extraction of features from the dataset') + dataset['sepal_length_to_sepal_width'] = dataset['sepal_length'] / dataset['sepal_width'] + dataset['petal_length_to_petal_width'] = dataset['petal_length'] / dataset['petal_width'] + featured_dataset = dataset[[ + 'sepal_length', 'sepal_width', 'petal_length', 'petal_width', + 'sepal_length_to_sepal_width', 'petal_length_to_petal_width', + 'target' + ]] + + logger.info('Save features') + features_path = config['featurize']['features_path'] + featured_dataset.to_csv(features_path, index=False) + + +if __name__ == '__main__': + + args_parser = argparse.ArgumentParser() + args_parser.add_argument('--config', dest='config', required=True, help="curate dataset") + args = args_parser.parse_args() + + featurize(config_path=args.config) \ No newline at end of file diff --git a/src/stages/train.py b/src/stages/train.py new file mode 100644 index 00000000..5764143c --- /dev/null +++ b/src/stages/train.py @@ -0,0 +1,52 @@ +import argparse +import joblib +import pandas as pd +from typing import Text +import yaml + +from src.train.train import train +from src.utils.logs import get_logger + + +def train_model(config_path: Text) -> None: + """Train model. + Args: + config_path {Text}: path to config + """ + + with open('params.yaml') as conf_file: + config = yaml.safe_load(conf_file) + + logger = get_logger('TRAIN', log_level=config['base']['log_level']) + + logger.info('Get model/estimator name') + estimator_name = config['train']['estimator_name'] + logger.info(f'The name of Model/Estimator: {estimator_name}') + + logger.info('Load train dataset') + train_df = pd.read_csv(config['data_split']['trainset_path']) + + logger.info('Train model/estimator') + model = train( + df=train_df, + target_column=config['featurize']['target_column'], + estimator_name=estimator_name, + param_grid=config['train']['estimators'][estimator_name]['param_grid'], + cv=config['train']['cv'] + ) + logger.info(f'Best score: {model.best_score_}') + + logger.info('Trained Model Saved') + models_path = config['train']['model_path'] + joblib.dump(model, models_path) + + print(f'the model trained successfully is : {estimator_name}') + + +if __name__ == '__main__': + + args_parser = argparse.ArgumentParser() + args_parser.add_argument('--config', dest='config', required=True, help='Training of model') + args = args_parser.parse_args() + + train_model(config_path=args.config) \ No newline at end of file diff --git a/src/train.py b/src/train.py new file mode 100644 index 00000000..3a476458 --- /dev/null +++ b/src/train.py @@ -0,0 +1,65 @@ +import os +import pickle +import sys + +import numpy as np +import yaml +from sklearn.ensemble import RandomForestClassifier + + +def train(seed, n_est, min_split, matrix): + """ + Train a random forest classifier. + + Args: + seed (int): Random seed. + n_est (int): Number of trees in the forest. + min_split (int): Minimum number of samples required to split an internal node. + matrix (scipy.sparse.csr_matrix): Input matrix. + + Returns: + sklearn.ensemble.RandomForestClassifier: Trained classifier. + """ + labels = np.squeeze(matrix[:, 1].toarray()) + x = matrix[:, 2:] + + sys.stderr.write("Input matrix size {}\n".format(matrix.shape)) + sys.stderr.write("X matrix size {}\n".format(x.shape)) + sys.stderr.write("Y matrix size {}\n".format(labels.shape)) + + clf = RandomForestClassifier( + n_estimators=n_est, min_samples_split=min_split, n_jobs=2, random_state=seed + ) + + clf.fit(x, labels) + + return clf + + +def main(): + params = yaml.safe_load(open("params.yaml"))["train"] + + if len(sys.argv) != 3: + sys.stderr.write("Arguments error. Usage:\n") + sys.stderr.write("\tpython train.py features model\n") + sys.exit(1) + + input = sys.argv[1] + output = sys.argv[2] + seed = params["seed"] + n_est = params["n_est"] + min_split = params["min_split"] + + # Load the data + with open(os.path.join(input, "train.pkl"), "rb") as fd: + matrix, _ = pickle.load(fd) + + clf = train(seed=seed, n_est=n_est, min_split=min_split, matrix=matrix) + + # Save the model + with open(output, "wb") as fd: + pickle.dump(clf, fd) + + +if __name__ == "__main__": + main() diff --git a/src/train/train.py b/src/train/train.py new file mode 100644 index 00000000..6327fe68 --- /dev/null +++ b/src/train/train.py @@ -0,0 +1,61 @@ +import pandas as pd +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import GridSearchCV +from sklearn.neighbors import KNeighborsClassifier +from sklearn.svm import SVC +from sklearn.metrics import f1_score, make_scorer +from typing import Dict, Text + + +class UnsupportedClassifier(Exception): + + def __init__(self, estimator_name): + + self.msg = f'Unsupported estimator {estimator_name}' + super().__init__(self.msg) + + +def get_supported_estimator() -> Dict: + """ + Returns: + Dict: supported classifiers + """ + + return { + 'logreg': LogisticRegression, + 'svm': SVC, + 'knn': KNeighborsClassifier + } + + +def train(df: pd.DataFrame, target_column: Text, + estimator_name: Text, param_grid: Dict, cv: int): + """Train model. + Args: + df {pandas.DataFrame}: dataset + target_column {Text}: target column name + estimator_name {Text}: estimator name + param_grid {Dict}: grid parameters + cv {int}: cross-validation value + Returns: + trained model + """ + + estimators = get_supported_estimator() + + if estimator_name not in estimators.keys(): + raise UnsupportedClassifier(estimator_name) + + estimator = estimators[estimator_name]() + f1_scorer = make_scorer(f1_score, average='weighted') + clf = GridSearchCV(estimator=estimator, + param_grid=param_grid, + cv=cv, + verbose=1, + scoring=f1_scorer) + # Get X and Y + y_train = df.loc[:, target_column].values.astype('int32') + X_train = df.drop(target_column, axis=1).values.astype('float32') + clf.fit(X_train, y_train) + + return clf \ No newline at end of file diff --git a/src/utils/logs.py b/src/utils/logs.py new file mode 100644 index 00000000..f7acc055 --- /dev/null +++ b/src/utils/logs.py @@ -0,0 +1,40 @@ +"""Provides functions to create loggers.""" + +import logging +from typing import Text, Union +import sys + + +def get_console_handler() -> logging.StreamHandler: + """Get console handler. + Returns: + logging.StreamHandler which logs into stdout + """ + + console_handler = logging.StreamHandler(sys.stdout) + formatter = logging.Formatter("%(asctime)s — %(name)s — %(levelname)s — %(message)s") + console_handler.setFormatter(formatter) + + return console_handler + + +def get_logger(name: Text = __name__, log_level: Union[Text, int] = logging.DEBUG) -> logging.Logger: + """Get logger. + Args: + name {Text}: logger name + log_level {Text or int}: logging level; can be string name or integer value + Returns: + logging.Logger instance + """ + + logger = logging.getLogger(name) + logger.setLevel(log_level) + + # Prevent duplicate outputs in Jypyter Notebook + if logger.hasHandlers(): + logger.handlers.clear() + + logger.addHandler(get_console_handler()) + logger.propagate = False + + return logger \ No newline at end of file