From 8a7e9887b88d99b48abec52121d7137e99a11899 Mon Sep 17 00:00:00 2001
From: prajwal <prajwalpoojari451@gmail.com>
Date: Sun, 1 Mar 2026 13:00:02 +0530
Subject: [PATCH 1/6] Add getting-started notebook demonstrating full SkillMesh
 workflow

---
 examples/notebooks/getting-started.ipynb | 208 +++++++++++++++++++++++
 1 file changed, 208 insertions(+)
 create mode 100644 examples/notebooks/getting-started.ipynb

diff --git a/examples/notebooks/getting-started.ipynb b/examples/notebooks/getting-started.ipynb
new file mode 100644
index 0000000..67f0d27
--- /dev/null
+++ b/examples/notebooks/getting-started.ipynb
@@ -0,0 +1,208 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "816d4bbf",
+   "metadata": {},
+   "source": [
+    "# SkillMesh Getting Started\n",
+    "\n",
+    "This notebook demonstrates:\n",
+    "\n",
+    "- Loading a registry\n",
+    "- Retrieving top-K expert cards\n",
+    "- Emitting provider-ready context\n",
+    "- Using it in a mock agent loop"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "d7480e85",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "92"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from skill_registry_rag.registry import load_registry\n",
+    "\n",
+    "registry = load_registry(\"../registry/tools.json\")\n",
+    "\n",
+    "len(registry)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "443289f7",
+   "metadata": {},
+   "source": [
+    "## 2. Retrieve Top-K Expert Cards"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "3caa3116",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[RetrievalHit(card=ToolCard(id='role.data-analyst', title='Data Analyst Role Orchestrator', domain='role_orchestrator', instruction_file='roles/data-analyst.md', description='Role expert that orchestrates profiling, cleaning, EDA, baseline modeling, and visualization for tabular analytics tasks.', tags=['role', 'data-analyst', 'eda', 'pandas', 'visualization', 'baseline-modeling'], tool_hints=['data.pandas-advanced', 'data.sql-queries', 'viz.matplotlib-seaborn', 'ml.sklearn-modeling', 'stats.scipy-statsmodels'], examples=['Profile messy dataset, clean it, and deliver insight dashboard', 'Run EDA plus baseline prediction with leakage-safe validation'], aliases=['role-data-analyst', 'analytics-orchestrator'], dependencies=['data.pandas-advanced', 'data.sql-queries', 'viz.matplotlib-seaborn', 'ml.sklearn-modeling', 'stats.scipy-statsmodels'], output_artifacts=['data_profile_summary', 'eda_findings', 'visual_report', 'baseline_model_report'], quality_checks=['missingness_and_dtype_audit_completed', 'insights_backed_by_numeric_evidence', 'charts_have_labels_units_and_titles'], constraints=['no_causal_claims_from_correlation_only', 'use_only_allowed_dependencies_unless_explicitly_authorized'], input_contract={'required': 'tabular dataset and analysis objective', 'optional': 'target metric, business context, and prediction requirement'}, risk_level='medium', maturity='beta', metadata={'provider_support': ['codex', 'claude'], 'owner': 'community', 'catalog_tier': 'roles', 'instruction_version': 'v1'}, instruction_text='# Data Analyst Role Expert\\n\\nUse this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\\n\\n## Allowed expert dependencies\\n\\n- `data.pandas-advanced`\\n- `data.sql-queries`\\n- `viz.matplotlib-seaborn`\\n- `ml.sklearn-modeling`\\n- `stats.scipy-statsmodels`\\n\\n## Execution behavior\\n\\n1. Start with a data quality audit:\\n   nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\\n2. Normalize and clean data using reproducible transformations.\\n3. Produce concise EDA:\\n   distributions, trends, segmentation, and relationship charts.\\n4. If prediction is requested, build a leakage-safe baseline model with validation metrics.\\n5. Explain findings in business terms:\\n   what changed, how much, and what action is implied.\\n6. End with caveats and next steps.\\n\\n## Output contract\\n\\n- `profile_summary`: row/column counts, missingness, type issues, and anomalies.\\n- `eda_insights`: ranked insights with numeric evidence.\\n- `visuals`: labeled plots with clear units and titles.\\n- `model_section` (optional): baseline model, metrics, and limitations.\\n- `repro_steps`: commands/notebook steps to reproduce.\\n\\n## Guardrails\\n\\n- Do not skip data validation before insights.\\n- Do not claim causality from correlation.\\n- Do not use tools outside allowed dependencies unless explicitly approved.'), score=1.0, sparse_score=1.0, dense_score=None),\n",
+       " RetrievalHit(card=ToolCard(id='docs.slides-pptx', title='Slide Deck Creation (PPTX)', domain='presentation_generation', instruction_file='instructions/slide-creation.md', description='Narrative deck creation with PPTX output and chart integration.', tags=['slides', 'pptx', 'python-pptx', 'presentation', 'storytelling'], tool_hints=['python-pptx', 'matplotlib', 'pillow'], examples=['Executive summary deck', 'Findings deck with visuals'], aliases=['deck-generation', 'slide-authoring'], dependencies=['python-pptx', 'pillow', 'matplotlib'], output_artifacts=['pptx_deck', 'slide_outline', 'speaker_notes_stub'], quality_checks=['title_and_recommendation_slides_present', 'visual_hierarchy_consistent'], constraints=['one_core_message_per_slide'], input_contract={'required': 'structured storyline or slide outline', 'optional': 'brand template and theme colors'}, risk_level='low', maturity='stable', metadata={'provider_support': ['codex', 'claude'], 'install_extra': 'slides'}, instruction_text='# Slide Creation Expert (PPTX)\\n\\nUse this expert for executive summaries, project updates, and narrative decks.\\n\\n## Execution behavior\\n\\n1. Derive a slide storyline first (problem, analysis, findings, actions).\\n2. Allocate one key message per slide and keep text concise.\\n3. Generate PPTX using `python-pptx` with consistent templates.\\n4. Embed charts/tables as visuals instead of dense paragraphs.\\n5. Export final deck and register slide artifact with version/date.\\n\\n## Output contract\\n\\n- Include title slide, evidence slides, and recommendation slide.\\n- Keep visual hierarchy consistent across slides.\\n- Add speaker-note placeholders for critical assumptions.\\n- Ensure deck can be opened in standard PowerPoint clients.'), score=0.667872965335846, sparse_score=0.667872965335846, dense_score=None),\n",
+       " RetrievalHit(card=ToolCard(id='ml.gradient-boosting', title='Gradient Boosting with XGBoost and LightGBM', domain='machine_learning', instruction_file='instructions/gradient-boosting-xgb-lgbm.md', description='High-performance boosting, tuning, and SHAP-based attribution.', tags=['xgboost', 'lightgbm', 'catboost', 'shap', 'boosting'], tool_hints=['xgboost', 'lightgbm', 'catboost', 'shap'], examples=['Tune XGBoost with early stopping', 'Compare LightGBM vs CatBoost'], aliases=['gbm', 'tree-boosting'], dependencies=['xgboost', 'lightgbm', 'catboost', 'shap', 'scikit-learn'], output_artifacts=['booster_model', 'feature_importance', 'shap_summary'], quality_checks=['early_stopping_enabled', 'class_imbalance_strategy', 'feature_leakage_review'], constraints=['consistent_validation_metric_across_models'], input_contract={'required': 'tabular train/validation datasets', 'optional': 'class weights and monotonic constraints'}, risk_level='medium', maturity='stable', metadata={'provider_support': ['codex', 'claude'], 'install_extra': 'boosting'}, instruction_text=\"# Gradient Boosting Expert (XGBoost / LightGBM / CatBoost)\\n\\nUse this expert for high-performance tabular modeling with gradient boosting frameworks, including classification, regression, and ranking tasks.\\n\\n## When to use this expert\\n- The task involves structured/tabular data where tree-based models are expected to excel.\\n- The user needs state-of-the-art predictive performance with feature attribution.\\n- Early stopping, hyperparameter tuning, or framework comparison is required.\\n- SHAP-based model interpretation or class imbalance handling is requested.\\n\\n## Execution behavior\\n\\n1. Build a clean train/validation/test split with leakage controls. For temporal data, split by time. For classification, use stratified splits to preserve class ratios.\\n2. Configure early stopping with a patience of 20-50 rounds on the validation set. Pass `eval_set` (XGBoost/LightGBM) or `eval_set` (CatBoost) explicitly.\\n3. Start with sensible defaults: `learning_rate=0.05`, `max_depth=6`, `n_estimators=2000` (relying on early stopping to find the right count). Tune in this order: (a) `n_estimators` via early stopping, (b) `max_depth` and `min_child_weight`, (c) `subsample` and `colsample_bytree`, (d) regularization (`reg_alpha`, `reg_lambda`), (e) `learning_rate` reduction with proportional `n_estimators` increase.\\n4. For multi-framework comparison, hold folds constant (pass the same `cv` splitter) and use identical metric definitions. Report results in a comparison table.\\n5. Compute SHAP values using `shap.TreeExplainer` for global and local feature attribution. Generate summary plots, dependence plots for top features, and force plots for individual predictions when interpretability is requested.\\n6. Handle class imbalance with `scale_pos_weight` (XGBoost), `is_unbalance` (LightGBM), or `auto_class_weights` (CatBoost). Compare against SMOTE-in-pipeline only if simple weighting underperforms.\\n7. For categorical features, prefer LightGBM or CatBoost native categorical handling over one-hot encoding when cardinality > 10.\\n8. Save the final model with native `.save_model()` format and record hyperparameters, best iteration, and validation metric in metadata.\\n\\n## Decision tree\\n- If dataset has > 100k rows and many categorical features -> prefer LightGBM for speed; use CatBoost if categoricals have high cardinality and natural ordering is absent.\\n- If dataset is small (< 5k rows) -> reduce `max_depth` to 3-4 and increase regularization to prevent overfitting; consider whether a simpler sklearn model might suffice.\\n- If the task is ranking -> use `XGBRanker` or `LGBMRanker` with `lambdarank` objective.\\n- If feature interactions matter for explanation -> use SHAP interaction values, not just main-effect importance.\\n- If prediction latency is critical -> export to ONNX or use LightGBM's `predict_disable_shape_check` for faster inference.\\n- If reproducibility is mandatory -> pin `random_state` in the booster AND the data split, and record library version.\\n\\n## Anti-patterns\\n- NEVER set `n_estimators` to a fixed value without early stopping. This either underfits or overfits by construction.\\n- NEVER tune hyperparameters on the test set. Use a validation set or inner cross-validation; the test set is touched exactly once.\\n- NEVER compare frameworks with different preprocessing (e.g., one-hot for XGBoost but native categoricals for CatBoost) and call it a fair comparison.\\n- NEVER ignore the `best_iteration` attribute after early stopping. Predictions must use `best_iteration` to avoid including over-trained trees.\\n- NEVER rely solely on `feature_importances_` (gain-based) for feature selection. Gain importance is biased toward high-cardinality and correlated features.\\n\\n## Common mistakes\\n- Using `eval_metric` that does not match the business objective (e.g., `logloss` for early stopping but reporting `F1`).\\n- Forgetting to pass `categorical_feature` to LightGBM, causing it to treat integer-encoded categoricals as continuous.\\n- Setting `scale_pos_weight` AND applying SMOTE simultaneously, which double-corrects for imbalance.\\n- Running SHAP on the training set instead of the validation/test set, which inflates apparent feature relevance.\\n- Not setting `verbosity=0` or `verbose=-1` during hyperparameter search, flooding logs with thousands of training lines.\\n- Using `pickle` instead of the framework's native `.save_model()`, which breaks across library version upgrades.\\n\\n## Output contract\\n- Report best hyperparameters, best iteration number, and validation metric trajectory (or at minimum start/best/final values).\\n- Include the class-imbalance strategy used and its rationale.\\n- Provide SHAP summary plots or feature importance rankings with the method explicitly named.\\n- Never report train-only metrics as final performance. Always include validation or test metrics.\\n- Record the framework name and version (e.g., `xgboost==2.0.3`) in artifact metadata.\\n- If multiple frameworks were compared, include a side-by-side metric table with identical folds.\\n- Save the model in native format alongside a JSON metadata sidecar.\\n\\n## Composability hints\\n- Before this expert -> use the **Data Cleaning Expert** for null handling and type coercion. Gradient boosters handle NaNs natively (XGBoost, LightGBM) but benefit from clean categoricals.\\n- Before this expert -> use the **Scikit-learn Modeling Expert** if a quick linear baseline is needed for comparison.\\n- After this expert -> use the **Visualization Expert** to plot SHAP summaries, learning curves, or metric comparisons.\\n- After this expert -> use the **Machine Learning Export Expert** to convert the model to ONNX or package it for serving.\\n- Related -> the **Statistics Expert** for post-hoc significance tests when comparing model performance across folds.\"), score=0.6425158381462097, sparse_score=0.6425158381462097, dense_score=None)]"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from skill_registry_rag.retriever import SkillRetriever\n",
+    "\n",
+    "retriever = SkillRetriever(registry)\n",
+    "\n",
+    "results = retriever.retrieve(\n",
+    "    query=\"clean messy sales data and generate charts\",\n",
+    "    top_k=3\n",
+    ")\n",
+    "\n",
+    "results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "47c4c7a6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "# Data Analyst Role Expert\n",
+      "\n",
+      "Use this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\n",
+      "\n",
+      "## Allowed expert dependencies\n",
+      "\n",
+      "- `data.pandas-advanced`\n",
+      "- `data.sql-queries`\n",
+      "- `viz.matplotlib-seaborn`\n",
+      "- `ml.sklearn-modeling`\n",
+      "- `stats.scipy-statsmodels`\n",
+      "\n",
+      "## Execution behavior\n",
+      "\n",
+      "1. Start with a data quality audit:\n",
+      "   nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\n",
+      "2. Normalize and clean data using reproducible transformations.\n",
+      "3. Produce concise EDA:\n",
+      "   distributions, trends, segmentation, and relationship charts.\n",
+      "4. If prediction is requested, build a leakage-safe baseline model with validation metrics.\n",
+      "5. Explain findings in business terms:\n",
+      "   what changed, how much, and what action is implied.\n",
+      "6. End with caveats and next steps.\n",
+      "\n",
+      "## Output contract\n",
+      "\n",
+      "- `profile_summary`: row/column counts, missingness, type issues, and anomalies.\n",
+      "- `eda_insights`: ranked insights \n"
+     ]
+    }
+   ],
+   "source": [
+    "context = \"\\n\\n\".join(\n",
+    "    hit.card.instruction_text for hit in results\n",
+    ")\n",
+    "\n",
+    "print(context[:1000])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "7b898caf",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "USER QUERY:\n",
+      "clean messy sales data and generate charts\n",
+      "\n",
+      "RETRIEVED CONTEXT:\n",
+      "# Data Analyst Role Expert\n",
+      "\n",
+      "Use this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\n",
+      "\n",
+      "## Allowed expert dependencies\n",
+      "\n",
+      "- `data.pandas-advanced`\n",
+      "- `data.sql-queries`\n",
+      "- `viz.matplotlib-seaborn`\n",
+      "- `ml.sklearn-modeling`\n",
+      "- `stats.scipy-statsmodels`\n",
+      "\n",
+      "## Execution behavior\n",
+      "\n",
+      "1. Start with a data quality audit:\n",
+      "   nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\n",
+      "2. Normalize and clean data using reproducible transformations.\n",
+      "3. Produce concise EDA:\n",
+      "   distributions, trends, segmentation, and relationship charts.\n",
+      "4. If prediction is requested, build a leakage-safe baseline model with validation metrics.\n",
+      "5. Explain findings in business terms:\n",
+      "   what changed, how much, \n"
+     ]
+    }
+   ],
+   "source": [
+    "user_query = \"clean messy sales data and generate charts\"\n",
+    "\n",
+    "retriever = SkillRetriever(registry)\n",
+    "hits = retriever.retrieve(user_query, top_k=3)\n",
+    "\n",
+    "context = \"\\n\\n\".join(hit.card.instruction_text for hit in hits)\n",
+    "\n",
+    "print(\"USER QUERY:\")\n",
+    "print(user_query)\n",
+    "\n",
+    "print(\"\\nRETRIEVED CONTEXT:\")\n",
+    "print(context[:800])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From c1e708f2c70393cc1f99a2645cddd3934bda61ab Mon Sep 17 00:00:00 2001
From: prajwal <prajwalpoojari451@gmail.com>
Date: Sun, 1 Mar 2026 16:00:46 +0530
Subject: [PATCH 2/6] Fix notebook: renumber mock agent loop to section 4 and
 add markdown explanations

---
 examples/notebooks/getting-started.ipynb | 195 ++++++++++++++++++-----
 1 file changed, 152 insertions(+), 43 deletions(-)

diff --git a/examples/notebooks/getting-started.ipynb b/examples/notebooks/getting-started.ipynb
index 67f0d27..3ead37d 100644
--- a/examples/notebooks/getting-started.ipynb
+++ b/examples/notebooks/getting-started.ipynb
@@ -2,22 +2,65 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "816d4bbf",
+   "id": "b74f6b23",
    "metadata": {},
    "source": [
     "# SkillMesh Getting Started\n",
     "\n",
-    "This notebook demonstrates:\n",
+    "This notebook walks through the full SkillMesh workflow:\n",
     "\n",
-    "- Loading a registry\n",
-    "- Retrieving top-K expert cards\n",
-    "- Emitting provider-ready context\n",
-    "- Using it in a mock agent loop"
+    "1. Install SkillMesh dependencies  \n",
+    "2. Load the registry  \n",
+    "3. Retrieve top-K expert cards  \n",
+    "4. Emit provider-ready context  \n",
+    "5. Demonstrate a simple mock agent loop"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 52,
+   "id": "a02b3ef1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Obtaining file:///E:/ForOpensource/SkillMesh/examples/notebooks\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "ERROR: file:///E:/ForOpensource/SkillMesh/examples/notebooks does not appear to be a Python project: neither 'setup.py' nor 'pyproject.toml' found.\n",
+      "\n",
+      "[notice] A new release of pip is available: 23.2.1 -> 26.0.1\n",
+      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Install SkillMesh dependencies\n",
+    "!pip install -e ."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0f34646f",
+   "metadata": {},
+   "source": [
+    "## 1.Load SkillMesh Registry\n",
+    "\n",
+    "This cell loads the SkillMesh registry, which contains all expert cards.\n",
+    "\n",
+    "- `load_registry(path)` loads the registry JSON file.\n",
+    "- `len(registry)` shows how many expert cards were loaded."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
    "id": "d7480e85",
    "metadata": {},
    "outputs": [
@@ -27,7 +70,7 @@
        "92"
       ]
      },
-     "execution_count": 1,
+     "execution_count": 53,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -48,9 +91,21 @@
     "## 2. Retrieve Top-K Expert Cards"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "4129236f",
+   "metadata": {},
+   "source": [
+    "2. Retrieve Top-K Expert Cards\n",
+    "\n",
+    "- `SkillRetriever(registry)` creates a retriever object from the loaded registry.\n",
+    "- `query` is the user request.\n",
+    "- `top_k=3` means we want the top 3 expert cards matching the query."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 54,
    "id": "3caa3116",
    "metadata": {},
    "outputs": [
@@ -62,7 +117,7 @@
        " RetrievalHit(card=ToolCard(id='ml.gradient-boosting', title='Gradient Boosting with XGBoost and LightGBM', domain='machine_learning', instruction_file='instructions/gradient-boosting-xgb-lgbm.md', description='High-performance boosting, tuning, and SHAP-based attribution.', tags=['xgboost', 'lightgbm', 'catboost', 'shap', 'boosting'], tool_hints=['xgboost', 'lightgbm', 'catboost', 'shap'], examples=['Tune XGBoost with early stopping', 'Compare LightGBM vs CatBoost'], aliases=['gbm', 'tree-boosting'], dependencies=['xgboost', 'lightgbm', 'catboost', 'shap', 'scikit-learn'], output_artifacts=['booster_model', 'feature_importance', 'shap_summary'], quality_checks=['early_stopping_enabled', 'class_imbalance_strategy', 'feature_leakage_review'], constraints=['consistent_validation_metric_across_models'], input_contract={'required': 'tabular train/validation datasets', 'optional': 'class weights and monotonic constraints'}, risk_level='medium', maturity='stable', metadata={'provider_support': ['codex', 'claude'], 'install_extra': 'boosting'}, instruction_text=\"# Gradient Boosting Expert (XGBoost / LightGBM / CatBoost)\\n\\nUse this expert for high-performance tabular modeling with gradient boosting frameworks, including classification, regression, and ranking tasks.\\n\\n## When to use this expert\\n- The task involves structured/tabular data where tree-based models are expected to excel.\\n- The user needs state-of-the-art predictive performance with feature attribution.\\n- Early stopping, hyperparameter tuning, or framework comparison is required.\\n- SHAP-based model interpretation or class imbalance handling is requested.\\n\\n## Execution behavior\\n\\n1. Build a clean train/validation/test split with leakage controls. For temporal data, split by time. For classification, use stratified splits to preserve class ratios.\\n2. Configure early stopping with a patience of 20-50 rounds on the validation set. Pass `eval_set` (XGBoost/LightGBM) or `eval_set` (CatBoost) explicitly.\\n3. Start with sensible defaults: `learning_rate=0.05`, `max_depth=6`, `n_estimators=2000` (relying on early stopping to find the right count). Tune in this order: (a) `n_estimators` via early stopping, (b) `max_depth` and `min_child_weight`, (c) `subsample` and `colsample_bytree`, (d) regularization (`reg_alpha`, `reg_lambda`), (e) `learning_rate` reduction with proportional `n_estimators` increase.\\n4. For multi-framework comparison, hold folds constant (pass the same `cv` splitter) and use identical metric definitions. Report results in a comparison table.\\n5. Compute SHAP values using `shap.TreeExplainer` for global and local feature attribution. Generate summary plots, dependence plots for top features, and force plots for individual predictions when interpretability is requested.\\n6. Handle class imbalance with `scale_pos_weight` (XGBoost), `is_unbalance` (LightGBM), or `auto_class_weights` (CatBoost). Compare against SMOTE-in-pipeline only if simple weighting underperforms.\\n7. For categorical features, prefer LightGBM or CatBoost native categorical handling over one-hot encoding when cardinality > 10.\\n8. Save the final model with native `.save_model()` format and record hyperparameters, best iteration, and validation metric in metadata.\\n\\n## Decision tree\\n- If dataset has > 100k rows and many categorical features -> prefer LightGBM for speed; use CatBoost if categoricals have high cardinality and natural ordering is absent.\\n- If dataset is small (< 5k rows) -> reduce `max_depth` to 3-4 and increase regularization to prevent overfitting; consider whether a simpler sklearn model might suffice.\\n- If the task is ranking -> use `XGBRanker` or `LGBMRanker` with `lambdarank` objective.\\n- If feature interactions matter for explanation -> use SHAP interaction values, not just main-effect importance.\\n- If prediction latency is critical -> export to ONNX or use LightGBM's `predict_disable_shape_check` for faster inference.\\n- If reproducibility is mandatory -> pin `random_state` in the booster AND the data split, and record library version.\\n\\n## Anti-patterns\\n- NEVER set `n_estimators` to a fixed value without early stopping. This either underfits or overfits by construction.\\n- NEVER tune hyperparameters on the test set. Use a validation set or inner cross-validation; the test set is touched exactly once.\\n- NEVER compare frameworks with different preprocessing (e.g., one-hot for XGBoost but native categoricals for CatBoost) and call it a fair comparison.\\n- NEVER ignore the `best_iteration` attribute after early stopping. Predictions must use `best_iteration` to avoid including over-trained trees.\\n- NEVER rely solely on `feature_importances_` (gain-based) for feature selection. Gain importance is biased toward high-cardinality and correlated features.\\n\\n## Common mistakes\\n- Using `eval_metric` that does not match the business objective (e.g., `logloss` for early stopping but reporting `F1`).\\n- Forgetting to pass `categorical_feature` to LightGBM, causing it to treat integer-encoded categoricals as continuous.\\n- Setting `scale_pos_weight` AND applying SMOTE simultaneously, which double-corrects for imbalance.\\n- Running SHAP on the training set instead of the validation/test set, which inflates apparent feature relevance.\\n- Not setting `verbosity=0` or `verbose=-1` during hyperparameter search, flooding logs with thousands of training lines.\\n- Using `pickle` instead of the framework's native `.save_model()`, which breaks across library version upgrades.\\n\\n## Output contract\\n- Report best hyperparameters, best iteration number, and validation metric trajectory (or at minimum start/best/final values).\\n- Include the class-imbalance strategy used and its rationale.\\n- Provide SHAP summary plots or feature importance rankings with the method explicitly named.\\n- Never report train-only metrics as final performance. Always include validation or test metrics.\\n- Record the framework name and version (e.g., `xgboost==2.0.3`) in artifact metadata.\\n- If multiple frameworks were compared, include a side-by-side metric table with identical folds.\\n- Save the model in native format alongside a JSON metadata sidecar.\\n\\n## Composability hints\\n- Before this expert -> use the **Data Cleaning Expert** for null handling and type coercion. Gradient boosters handle NaNs natively (XGBoost, LightGBM) but benefit from clean categoricals.\\n- Before this expert -> use the **Scikit-learn Modeling Expert** if a quick linear baseline is needed for comparison.\\n- After this expert -> use the **Visualization Expert** to plot SHAP summaries, learning curves, or metric comparisons.\\n- After this expert -> use the **Machine Learning Export Expert** to convert the model to ONNX or package it for serving.\\n- Related -> the **Statistics Expert** for post-hoc significance tests when comparing model performance across folds.\"), score=0.6425158381462097, sparse_score=0.6425158381462097, dense_score=None)]"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 54,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -72,17 +127,26 @@
     "\n",
     "retriever = SkillRetriever(registry)\n",
     "\n",
-    "results = retriever.retrieve(\n",
-    "    query=\"clean messy sales data and generate charts\",\n",
-    "    top_k=3\n",
-    ")\n",
+    "user_query = \"clean messy sales data and generate charts\"\n",
+    "hits = retriever.retrieve(user_query, top_k=3)\n",
     "\n",
-    "results"
+    "hits  # shows top 3 relevant expert cards"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5c85573e",
+   "metadata": {},
+   "source": [
+    "## 3. Emit Context for LLM\n",
+    "\n",
+    "This step formats the instructions from the top-K retrieval results into a single context string\n",
+    "that could be passed to an AI model (like Claude or Codex) for answering the query."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 55,
    "id": "47c4c7a6",
    "metadata": {},
    "outputs": [
@@ -90,6 +154,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "USER QUERY:\n",
+      "clean messy sales data and generate charts\n",
+      "\n",
+      "RETRIEVED CONTEXT:\n",
       "# Data Analyst Role Expert\n",
       "\n",
       "Use this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\n",
@@ -122,27 +190,38 @@
     }
    ],
    "source": [
-    "context = \"\\n\\n\".join(\n",
-    "    hit.card.instruction_text for hit in results\n",
-    ")\n",
+    "context = \"\\n\\n\".join(hit.card.instruction_text for hit in hits)\n",
+    "\n",
+    "print(\"USER QUERY:\")\n",
+    "print(user_query)\n",
     "\n",
-    "print(context[:1000])"
+    "print(\"\\nRETRIEVED CONTEXT:\")\n",
+    "print(context[:1000])  # prints first 1000 characters of combined instructions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f232afa3",
+   "metadata": {},
+   "source": [
+    "## 5 Mock Agent Loop\n",
+    "\n",
+    "- Simulates multiple queries in a loop.  \n",
+    "- Retrieves top-K expert cards for each query.  \n",
+    "- Shows how an LLM could consume this context iteratively."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "7b898caf",
+   "execution_count": 56,
+   "id": "08b5202d",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "USER QUERY:\n",
-      "clean messy sales data and generate charts\n",
-      "\n",
-      "RETRIEVED CONTEXT:\n",
+      "Query: clean messy sales data\n",
       "# Data Analyst Role Expert\n",
       "\n",
       "Use this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\n",
@@ -159,28 +238,58 @@
       "\n",
       "1. Start with a data quality audit:\n",
       "   nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\n",
-      "2. Normalize and clean data using reproducible transformations.\n",
-      "3. Produce concise EDA:\n",
-      "   distributions, trends, segmentation, and relationship charts.\n",
-      "4. If prediction is requested, build a leakage-safe baseline model with validation metrics.\n",
-      "5. Explain findings in business terms:\n",
-      "   what changed, how much, \n"
+      "2. Normaliz\n",
+      "--------------------------------------------------\n",
+      "Query: generate charts for sales\n",
+      "# Slide Creation Expert (PPTX)\n",
+      "\n",
+      "Use this expert for executive summaries, project updates, and narrative decks.\n",
+      "\n",
+      "## Execution behavior\n",
+      "\n",
+      "1. Derive a slide storyline first (problem, analysis, findings, actions).\n",
+      "2. Allocate one key message per slide and keep text concise.\n",
+      "3. Generate PPTX using `python-pptx` with consistent templates.\n",
+      "4. Embed charts/tables as visuals instead of dense paragraphs.\n",
+      "5. Export final deck and register slide artifact with version/date.\n",
+      "\n",
+      "## Output contract\n",
+      "\n",
+      "- Include titl\n",
+      "--------------------------------------------------\n",
+      "Query: summarize findings\n",
+      "# Slide Creation Expert (PPTX)\n",
+      "\n",
+      "Use this expert for executive summaries, project updates, and narrative decks.\n",
+      "\n",
+      "## Execution behavior\n",
+      "\n",
+      "1. Derive a slide storyline first (problem, analysis, findings, actions).\n",
+      "2. Allocate one key message per slide and keep text concise.\n",
+      "3. Generate PPTX using `python-pptx` with consistent templates.\n",
+      "4. Embed charts/tables as visuals instead of dense paragraphs.\n",
+      "5. Export final deck and register slide artifact with version/date.\n",
+      "\n",
+      "## Output contract\n",
+      "\n",
+      "- Include titl\n",
+      "--------------------------------------------------\n"
      ]
     }
    ],
    "source": [
-    "user_query = \"clean messy sales data and generate charts\"\n",
-    "\n",
-    "retriever = SkillRetriever(registry)\n",
-    "hits = retriever.retrieve(user_query, top_k=3)\n",
+    "queries = [\n",
+    "    \"clean messy sales data\",\n",
+    "    \"generate charts for sales\",\n",
+    "    \"summarize findings\"\n",
+    "]\n",
     "\n",
-    "context = \"\\n\\n\".join(hit.card.instruction_text for hit in hits)\n",
-    "\n",
-    "print(\"USER QUERY:\")\n",
-    "print(user_query)\n",
-    "\n",
-    "print(\"\\nRETRIEVED CONTEXT:\")\n",
-    "print(context[:800])"
+    "for query in queries:\n",
+    "    hits = retriever.retrieve(query, top_k=3)\n",
+    "    context = \"\\n\\n\".join(hit.card.instruction_text for hit in hits)\n",
+    "    print(f\"Query: {query}\")\n",
+    "    print(context[:500])\n",
+    "    print(\"-\"*50)"
    ]
   }
  ],

From 4271142af8a359a2cf6958bf2a646e2a8b650a86 Mon Sep 17 00:00:00 2001
From: prajwal <prajwalpoojari451@gmail.com>
Date: Sun, 1 Mar 2026 16:43:25 +0530
Subject: [PATCH 3/6] Add Markdown cells and fix headings in getting-started
 notebook

---
 examples/notebooks/getting-started.ipynb | 146 +++++++++++++++++++----
 1 file changed, 123 insertions(+), 23 deletions(-)

diff --git a/examples/notebooks/getting-started.ipynb b/examples/notebooks/getting-started.ipynb
index 3ead37d..421ccd2 100644
--- a/examples/notebooks/getting-started.ipynb
+++ b/examples/notebooks/getting-started.ipynb
@@ -16,9 +16,19 @@
     "5. Demonstrate a simple mock agent loop"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "e8fc1b2b",
+   "metadata": {},
+   "source": [
+    "## 1. Install SkillMesh dependencies\n",
+    "\n",
+    "This cell installs the SkillMesh package from the local repository in editable mode."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 5,
    "id": "a02b3ef1",
    "metadata": {},
    "outputs": [
@@ -26,14 +36,112 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Obtaining file:///E:/ForOpensource/SkillMesh/examples/notebooks\n"
+      "Obtaining file:///E:/ForOpensource/SkillMesh\n",
+      "  Installing build dependencies: started\n",
+      "  Installing build dependencies: finished with status 'done'\n",
+      "  Checking if build backend supports build_editable: started\n",
+      "  Checking if build backend supports build_editable: finished with status 'done'\n",
+      "  Getting requirements to build editable: started\n",
+      "  Getting requirements to build editable: finished with status 'done'\n",
+      "  Preparing editable metadata (pyproject.toml): started\n",
+      "  Preparing editable metadata (pyproject.toml): finished with status 'done'\n",
+      "Requirement already satisfied: numpy>=1.24 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (2.4.2)\n",
+      "Requirement already satisfied: PyYAML>=6.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (6.0.3)\n",
+      "Requirement already satisfied: rank-bm25>=0.2.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (0.2.2)\n",
+      "Requirement already satisfied: jsonschema>=4.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (4.26.0)\n",
+      "Requirement already satisfied: chromadb>=0.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (1.5.2)\n",
+      "Requirement already satisfied: build>=1.0.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.4.0)\n",
+      "Requirement already satisfied: pydantic>=1.9 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (2.12.5)\n",
+      "Requirement already satisfied: pybase64>=1.4.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.4.3)\n",
+      "Requirement already satisfied: uvicorn[standard]>=0.18.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.41.0)\n",
+      "Requirement already satisfied: posthog<6.0.0,>=2.4.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (5.4.0)\n",
+      "Requirement already satisfied: typing-extensions>=4.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (4.15.0)\n",
+      "Requirement already satisfied: onnxruntime>=1.14.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.24.2)\n",
+      "Requirement already satisfied: opentelemetry-api>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n",
+      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n",
+      "Requirement already satisfied: opentelemetry-sdk>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n",
+      "Requirement already satisfied: tokenizers>=0.13.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.22.2)\n",
+      "Requirement already satisfied: pypika>=0.48.9 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.51.1)\n",
+      "Requirement already satisfied: tqdm>=4.65.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (4.67.3)\n",
+      "Requirement already satisfied: overrides>=7.3.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (7.7.0)\n",
+      "Requirement already satisfied: importlib-resources in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (6.5.2)\n",
+      "Requirement already satisfied: grpcio>=1.58.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.78.0)\n",
+      "Requirement already satisfied: bcrypt>=4.0.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (5.0.0)\n",
+      "Requirement already satisfied: typer>=0.9.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.24.1)\n",
+      "Requirement already satisfied: kubernetes>=28.1.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (35.0.0)\n",
+      "Requirement already satisfied: tenacity>=8.2.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (9.1.4)\n",
+      "Requirement already satisfied: mmh3>=4.0.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (5.2.0)\n",
+      "Requirement already satisfied: orjson>=3.9.12 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (3.11.7)\n",
+      "Requirement already satisfied: httpx>=0.27.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.28.1)\n",
+      "Requirement already satisfied: rich>=10.11.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (14.3.3)\n",
+      "Requirement already satisfied: attrs>=22.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (25.4.0)\n",
+      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (2025.9.1)\n",
+      "Requirement already satisfied: referencing>=0.28.4 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (0.37.0)\n",
+      "Requirement already satisfied: rpds-py>=0.25.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (0.30.0)\n",
+      "Requirement already satisfied: packaging>=24.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from build>=1.0.3->chromadb>=0.5.0->skillmesh==0.1.0) (26.0)\n",
+      "Requirement already satisfied: pyproject_hooks in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from build>=1.0.3->chromadb>=0.5.0->skillmesh==0.1.0) (1.2.0)\n",
+      "Requirement already satisfied: colorama in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from build>=1.0.3->chromadb>=0.5.0->skillmesh==0.1.0) (0.4.6)\n",
+      "Requirement already satisfied: anyio in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (4.12.1)\n",
+      "Requirement already satisfied: certifi in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (2026.2.25)\n",
+      "Requirement already satisfied: httpcore==1.* in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.0.9)\n",
+      "Requirement already satisfied: idna in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.11)\n",
+      "Requirement already satisfied: h11>=0.16 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpcore==1.*->httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.16.0)\n",
+      "Requirement already satisfied: six>=1.9.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.17.0)\n",
+      "Requirement already satisfied: python-dateutil>=2.5.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.9.0.post0)\n",
+      "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.9.0)\n",
+      "Requirement already satisfied: requests in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.32.5)\n",
+      "Requirement already satisfied: requests-oauthlib in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.0.0)\n",
+      "Requirement already satisfied: urllib3!=2.6.0,>=1.24.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.6.3)\n",
+      "Requirement already satisfied: durationpy>=0.7 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.10)\n",
+      "Requirement already satisfied: flatbuffers in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (25.12.19)\n",
+      "Requirement already satisfied: protobuf in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (6.33.5)\n",
+      "Requirement already satisfied: sympy in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (1.14.0)\n",
+      "Requirement already satisfied: importlib-metadata<8.8.0,>=6.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-api>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (8.7.1)\n",
+      "Requirement already satisfied: googleapis-common-protos~=1.57 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.72.0)\n",
+      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.39.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n",
+      "Requirement already satisfied: opentelemetry-proto==1.39.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n",
+      "Requirement already satisfied: opentelemetry-semantic-conventions==0.60b1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-sdk>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.60b1)\n",
+      "Requirement already satisfied: backoff>=1.10.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from posthog<6.0.0,>=2.4.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.2.1)\n",
+      "Requirement already satisfied: distro>=1.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from posthog<6.0.0,>=2.4.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.9.0)\n",
+      "Requirement already satisfied: annotated-types>=0.6.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from pydantic>=1.9->chromadb>=0.5.0->skillmesh==0.1.0) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.41.5 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from pydantic>=1.9->chromadb>=0.5.0->skillmesh==0.1.0) (2.41.5)\n",
+      "Requirement already satisfied: typing-inspection>=0.4.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from pydantic>=1.9->chromadb>=0.5.0->skillmesh==0.1.0) (0.4.2)\n",
+      "Requirement already satisfied: markdown-it-py>=2.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from rich>=10.11.0->chromadb>=0.5.0->skillmesh==0.1.0) (4.0.0)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from rich>=10.11.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.19.2)\n",
+      "Requirement already satisfied: huggingface-hub<2.0,>=0.16.4 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (1.5.0)\n",
+      "Requirement already satisfied: click>=8.2.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from typer>=0.9.0->chromadb>=0.5.0->skillmesh==0.1.0) (8.3.1)\n",
+      "Requirement already satisfied: shellingham>=1.3.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from typer>=0.9.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.5.4)\n",
+      "Requirement already satisfied: annotated-doc>=0.0.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from typer>=0.9.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.0.4)\n",
+      "Requirement already satisfied: httptools>=0.6.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (0.7.1)\n",
+      "Requirement already satisfied: python-dotenv>=0.13 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (1.2.1)\n",
+      "Requirement already satisfied: watchfiles>=0.20 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (1.1.1)\n",
+      "Requirement already satisfied: websockets>=10.4 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (16.0)\n",
+      "Requirement already satisfied: filelock>=3.10.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (3.24.3)\n",
+      "Requirement already satisfied: fsspec>=2023.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (2026.2.0)\n",
+      "Requirement already satisfied: hf-xet<2.0.0,>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (1.3.2)\n",
+      "Requirement already satisfied: zipp>=3.20 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from importlib-metadata<8.8.0,>=6.0->opentelemetry-api>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.23.0)\n",
+      "Requirement already satisfied: mdurl~=0.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.1.2)\n",
+      "Requirement already satisfied: charset_normalizer<4,>=2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from requests->kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.4.4)\n",
+      "Requirement already satisfied: oauthlib>=3.0.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from requests-oauthlib->kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.3.1)\n",
+      "Requirement already satisfied: mpmath<1.4,>=1.1.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from sympy->onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (1.3.0)\n",
+      "Building wheels for collected packages: skillmesh\n",
+      "  Building editable for skillmesh (pyproject.toml): started\n",
+      "  Building editable for skillmesh (pyproject.toml): finished with status 'done'\n",
+      "  Created wheel for skillmesh: filename=skillmesh-0.1.0-0.editable-py3-none-any.whl size=5534 sha256=5c7a3f4fb3c8a58a9891fdc3c5f2bd3930d5bc38eb9d46b6a6d82bb26722dca0\n",
+      "  Stored in directory: C:\\Users\\DELL\\AppData\\Local\\Temp\\pip-ephem-wheel-cache-dm_bov34\\wheels\\26\\fc\\af\\1aa2362d8ccbc2ed9db0df865eeffcbf5130dc61876fdaf17b\n",
+      "Successfully built skillmesh\n",
+      "Installing collected packages: skillmesh\n",
+      "  Attempting uninstall: skillmesh\n",
+      "    Found existing installation: skillmesh 0.1.0\n",
+      "    Uninstalling skillmesh-0.1.0:\n",
+      "      Successfully uninstalled skillmesh-0.1.0\n",
+      "Successfully installed skillmesh-0.1.0\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "ERROR: file:///E:/ForOpensource/SkillMesh/examples/notebooks does not appear to be a Python project: neither 'setup.py' nor 'pyproject.toml' found.\n",
       "\n",
       "[notice] A new release of pip is available: 23.2.1 -> 26.0.1\n",
       "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
@@ -41,8 +149,8 @@
     }
    ],
    "source": [
-    "# Install SkillMesh dependencies\n",
-    "!pip install -e ."
+    "# Install SkillMesh dependencies from root folder\n",
+    "!pip install -e ../.."
    ]
   },
   {
@@ -50,7 +158,7 @@
    "id": "0f34646f",
    "metadata": {},
    "source": [
-    "## 1.Load SkillMesh Registry\n",
+    "## 2.Load the Registry\n",
     "\n",
     "This cell loads the SkillMesh registry, which contains all expert cards.\n",
     "\n",
@@ -60,7 +168,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 6,
    "id": "d7480e85",
    "metadata": {},
    "outputs": [
@@ -70,7 +178,7 @@
        "92"
       ]
      },
-     "execution_count": 53,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -83,20 +191,12 @@
     "len(registry)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "443289f7",
-   "metadata": {},
-   "source": [
-    "## 2. Retrieve Top-K Expert Cards"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "4129236f",
    "metadata": {},
    "source": [
-    "2. Retrieve Top-K Expert Cards\n",
+    "## 3. Retrieve Top-K Expert Cards\n",
     "\n",
     "- `SkillRetriever(registry)` creates a retriever object from the loaded registry.\n",
     "- `query` is the user request.\n",
@@ -105,7 +205,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 7,
    "id": "3caa3116",
    "metadata": {},
    "outputs": [
@@ -117,7 +217,7 @@
        " RetrievalHit(card=ToolCard(id='ml.gradient-boosting', title='Gradient Boosting with XGBoost and LightGBM', domain='machine_learning', instruction_file='instructions/gradient-boosting-xgb-lgbm.md', description='High-performance boosting, tuning, and SHAP-based attribution.', tags=['xgboost', 'lightgbm', 'catboost', 'shap', 'boosting'], tool_hints=['xgboost', 'lightgbm', 'catboost', 'shap'], examples=['Tune XGBoost with early stopping', 'Compare LightGBM vs CatBoost'], aliases=['gbm', 'tree-boosting'], dependencies=['xgboost', 'lightgbm', 'catboost', 'shap', 'scikit-learn'], output_artifacts=['booster_model', 'feature_importance', 'shap_summary'], quality_checks=['early_stopping_enabled', 'class_imbalance_strategy', 'feature_leakage_review'], constraints=['consistent_validation_metric_across_models'], input_contract={'required': 'tabular train/validation datasets', 'optional': 'class weights and monotonic constraints'}, risk_level='medium', maturity='stable', metadata={'provider_support': ['codex', 'claude'], 'install_extra': 'boosting'}, instruction_text=\"# Gradient Boosting Expert (XGBoost / LightGBM / CatBoost)\\n\\nUse this expert for high-performance tabular modeling with gradient boosting frameworks, including classification, regression, and ranking tasks.\\n\\n## When to use this expert\\n- The task involves structured/tabular data where tree-based models are expected to excel.\\n- The user needs state-of-the-art predictive performance with feature attribution.\\n- Early stopping, hyperparameter tuning, or framework comparison is required.\\n- SHAP-based model interpretation or class imbalance handling is requested.\\n\\n## Execution behavior\\n\\n1. Build a clean train/validation/test split with leakage controls. For temporal data, split by time. For classification, use stratified splits to preserve class ratios.\\n2. Configure early stopping with a patience of 20-50 rounds on the validation set. Pass `eval_set` (XGBoost/LightGBM) or `eval_set` (CatBoost) explicitly.\\n3. Start with sensible defaults: `learning_rate=0.05`, `max_depth=6`, `n_estimators=2000` (relying on early stopping to find the right count). Tune in this order: (a) `n_estimators` via early stopping, (b) `max_depth` and `min_child_weight`, (c) `subsample` and `colsample_bytree`, (d) regularization (`reg_alpha`, `reg_lambda`), (e) `learning_rate` reduction with proportional `n_estimators` increase.\\n4. For multi-framework comparison, hold folds constant (pass the same `cv` splitter) and use identical metric definitions. Report results in a comparison table.\\n5. Compute SHAP values using `shap.TreeExplainer` for global and local feature attribution. Generate summary plots, dependence plots for top features, and force plots for individual predictions when interpretability is requested.\\n6. Handle class imbalance with `scale_pos_weight` (XGBoost), `is_unbalance` (LightGBM), or `auto_class_weights` (CatBoost). Compare against SMOTE-in-pipeline only if simple weighting underperforms.\\n7. For categorical features, prefer LightGBM or CatBoost native categorical handling over one-hot encoding when cardinality > 10.\\n8. Save the final model with native `.save_model()` format and record hyperparameters, best iteration, and validation metric in metadata.\\n\\n## Decision tree\\n- If dataset has > 100k rows and many categorical features -> prefer LightGBM for speed; use CatBoost if categoricals have high cardinality and natural ordering is absent.\\n- If dataset is small (< 5k rows) -> reduce `max_depth` to 3-4 and increase regularization to prevent overfitting; consider whether a simpler sklearn model might suffice.\\n- If the task is ranking -> use `XGBRanker` or `LGBMRanker` with `lambdarank` objective.\\n- If feature interactions matter for explanation -> use SHAP interaction values, not just main-effect importance.\\n- If prediction latency is critical -> export to ONNX or use LightGBM's `predict_disable_shape_check` for faster inference.\\n- If reproducibility is mandatory -> pin `random_state` in the booster AND the data split, and record library version.\\n\\n## Anti-patterns\\n- NEVER set `n_estimators` to a fixed value without early stopping. This either underfits or overfits by construction.\\n- NEVER tune hyperparameters on the test set. Use a validation set or inner cross-validation; the test set is touched exactly once.\\n- NEVER compare frameworks with different preprocessing (e.g., one-hot for XGBoost but native categoricals for CatBoost) and call it a fair comparison.\\n- NEVER ignore the `best_iteration` attribute after early stopping. Predictions must use `best_iteration` to avoid including over-trained trees.\\n- NEVER rely solely on `feature_importances_` (gain-based) for feature selection. Gain importance is biased toward high-cardinality and correlated features.\\n\\n## Common mistakes\\n- Using `eval_metric` that does not match the business objective (e.g., `logloss` for early stopping but reporting `F1`).\\n- Forgetting to pass `categorical_feature` to LightGBM, causing it to treat integer-encoded categoricals as continuous.\\n- Setting `scale_pos_weight` AND applying SMOTE simultaneously, which double-corrects for imbalance.\\n- Running SHAP on the training set instead of the validation/test set, which inflates apparent feature relevance.\\n- Not setting `verbosity=0` or `verbose=-1` during hyperparameter search, flooding logs with thousands of training lines.\\n- Using `pickle` instead of the framework's native `.save_model()`, which breaks across library version upgrades.\\n\\n## Output contract\\n- Report best hyperparameters, best iteration number, and validation metric trajectory (or at minimum start/best/final values).\\n- Include the class-imbalance strategy used and its rationale.\\n- Provide SHAP summary plots or feature importance rankings with the method explicitly named.\\n- Never report train-only metrics as final performance. Always include validation or test metrics.\\n- Record the framework name and version (e.g., `xgboost==2.0.3`) in artifact metadata.\\n- If multiple frameworks were compared, include a side-by-side metric table with identical folds.\\n- Save the model in native format alongside a JSON metadata sidecar.\\n\\n## Composability hints\\n- Before this expert -> use the **Data Cleaning Expert** for null handling and type coercion. Gradient boosters handle NaNs natively (XGBoost, LightGBM) but benefit from clean categoricals.\\n- Before this expert -> use the **Scikit-learn Modeling Expert** if a quick linear baseline is needed for comparison.\\n- After this expert -> use the **Visualization Expert** to plot SHAP summaries, learning curves, or metric comparisons.\\n- After this expert -> use the **Machine Learning Export Expert** to convert the model to ONNX or package it for serving.\\n- Related -> the **Statistics Expert** for post-hoc significance tests when comparing model performance across folds.\"), score=0.6425158381462097, sparse_score=0.6425158381462097, dense_score=None)]"
       ]
      },
-     "execution_count": 54,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -138,7 +238,7 @@
    "id": "5c85573e",
    "metadata": {},
    "source": [
-    "## 3. Emit Context for LLM\n",
+    "## 4. Emit provider-ready context \n",
     "\n",
     "This step formats the instructions from the top-K retrieval results into a single context string\n",
     "that could be passed to an AI model (like Claude or Codex) for answering the query."
@@ -146,7 +246,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 8,
    "id": "47c4c7a6",
    "metadata": {},
    "outputs": [
@@ -204,7 +304,7 @@
    "id": "f232afa3",
    "metadata": {},
    "source": [
-    "## 5 Mock Agent Loop\n",
+    "## 5. Demonstrate a simple mock agent loop\n",
     "\n",
     "- Simulates multiple queries in a loop.  \n",
     "- Retrieves top-K expert cards for each query.  \n",
@@ -213,7 +313,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 9,
    "id": "08b5202d",
    "metadata": {},
    "outputs": [

From f8cad68c7ca91b4ad26d865fe1a3d8d2f187ba7b Mon Sep 17 00:00:00 2001
From: prajwal <prajwalpoojari451@gmail.com>
Date: Sun, 1 Mar 2026 19:07:02 +0530
Subject: [PATCH 4/6] Fix notebook headers to match intro numbering

---
 examples/notebooks/getting-started.ipynb | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/examples/notebooks/getting-started.ipynb b/examples/notebooks/getting-started.ipynb
index 421ccd2..6f2aace 100644
--- a/examples/notebooks/getting-started.ipynb
+++ b/examples/notebooks/getting-started.ipynb
@@ -28,7 +28,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 1,
    "id": "a02b3ef1",
    "metadata": {},
    "outputs": [
@@ -127,8 +127,8 @@
       "Building wheels for collected packages: skillmesh\n",
       "  Building editable for skillmesh (pyproject.toml): started\n",
       "  Building editable for skillmesh (pyproject.toml): finished with status 'done'\n",
-      "  Created wheel for skillmesh: filename=skillmesh-0.1.0-0.editable-py3-none-any.whl size=5534 sha256=5c7a3f4fb3c8a58a9891fdc3c5f2bd3930d5bc38eb9d46b6a6d82bb26722dca0\n",
-      "  Stored in directory: C:\\Users\\DELL\\AppData\\Local\\Temp\\pip-ephem-wheel-cache-dm_bov34\\wheels\\26\\fc\\af\\1aa2362d8ccbc2ed9db0df865eeffcbf5130dc61876fdaf17b\n",
+      "  Created wheel for skillmesh: filename=skillmesh-0.1.0-0.editable-py3-none-any.whl size=5534 sha256=6dfabd1ecad61c06188c5aa448167818c7e5ee7490ef689be1230fcc4dfc2684\n",
+      "  Stored in directory: C:\\Users\\DELL\\AppData\\Local\\Temp\\pip-ephem-wheel-cache-w74lwmao\\wheels\\26\\fc\\af\\1aa2362d8ccbc2ed9db0df865eeffcbf5130dc61876fdaf17b\n",
       "Successfully built skillmesh\n",
       "Installing collected packages: skillmesh\n",
       "  Attempting uninstall: skillmesh\n",
@@ -158,7 +158,7 @@
    "id": "0f34646f",
    "metadata": {},
    "source": [
-    "## 2.Load the Registry\n",
+    "## 2. Load SkillMesh Registry\n",
     "\n",
     "This cell loads the SkillMesh registry, which contains all expert cards.\n",
     "\n",
@@ -168,7 +168,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 2,
    "id": "d7480e85",
    "metadata": {},
    "outputs": [
@@ -178,7 +178,7 @@
        "92"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -205,7 +205,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 3,
    "id": "3caa3116",
    "metadata": {},
    "outputs": [
@@ -217,7 +217,7 @@
        " RetrievalHit(card=ToolCard(id='ml.gradient-boosting', title='Gradient Boosting with XGBoost and LightGBM', domain='machine_learning', instruction_file='instructions/gradient-boosting-xgb-lgbm.md', description='High-performance boosting, tuning, and SHAP-based attribution.', tags=['xgboost', 'lightgbm', 'catboost', 'shap', 'boosting'], tool_hints=['xgboost', 'lightgbm', 'catboost', 'shap'], examples=['Tune XGBoost with early stopping', 'Compare LightGBM vs CatBoost'], aliases=['gbm', 'tree-boosting'], dependencies=['xgboost', 'lightgbm', 'catboost', 'shap', 'scikit-learn'], output_artifacts=['booster_model', 'feature_importance', 'shap_summary'], quality_checks=['early_stopping_enabled', 'class_imbalance_strategy', 'feature_leakage_review'], constraints=['consistent_validation_metric_across_models'], input_contract={'required': 'tabular train/validation datasets', 'optional': 'class weights and monotonic constraints'}, risk_level='medium', maturity='stable', metadata={'provider_support': ['codex', 'claude'], 'install_extra': 'boosting'}, instruction_text=\"# Gradient Boosting Expert (XGBoost / LightGBM / CatBoost)\\n\\nUse this expert for high-performance tabular modeling with gradient boosting frameworks, including classification, regression, and ranking tasks.\\n\\n## When to use this expert\\n- The task involves structured/tabular data where tree-based models are expected to excel.\\n- The user needs state-of-the-art predictive performance with feature attribution.\\n- Early stopping, hyperparameter tuning, or framework comparison is required.\\n- SHAP-based model interpretation or class imbalance handling is requested.\\n\\n## Execution behavior\\n\\n1. Build a clean train/validation/test split with leakage controls. For temporal data, split by time. For classification, use stratified splits to preserve class ratios.\\n2. Configure early stopping with a patience of 20-50 rounds on the validation set. Pass `eval_set` (XGBoost/LightGBM) or `eval_set` (CatBoost) explicitly.\\n3. Start with sensible defaults: `learning_rate=0.05`, `max_depth=6`, `n_estimators=2000` (relying on early stopping to find the right count). Tune in this order: (a) `n_estimators` via early stopping, (b) `max_depth` and `min_child_weight`, (c) `subsample` and `colsample_bytree`, (d) regularization (`reg_alpha`, `reg_lambda`), (e) `learning_rate` reduction with proportional `n_estimators` increase.\\n4. For multi-framework comparison, hold folds constant (pass the same `cv` splitter) and use identical metric definitions. Report results in a comparison table.\\n5. Compute SHAP values using `shap.TreeExplainer` for global and local feature attribution. Generate summary plots, dependence plots for top features, and force plots for individual predictions when interpretability is requested.\\n6. Handle class imbalance with `scale_pos_weight` (XGBoost), `is_unbalance` (LightGBM), or `auto_class_weights` (CatBoost). Compare against SMOTE-in-pipeline only if simple weighting underperforms.\\n7. For categorical features, prefer LightGBM or CatBoost native categorical handling over one-hot encoding when cardinality > 10.\\n8. Save the final model with native `.save_model()` format and record hyperparameters, best iteration, and validation metric in metadata.\\n\\n## Decision tree\\n- If dataset has > 100k rows and many categorical features -> prefer LightGBM for speed; use CatBoost if categoricals have high cardinality and natural ordering is absent.\\n- If dataset is small (< 5k rows) -> reduce `max_depth` to 3-4 and increase regularization to prevent overfitting; consider whether a simpler sklearn model might suffice.\\n- If the task is ranking -> use `XGBRanker` or `LGBMRanker` with `lambdarank` objective.\\n- If feature interactions matter for explanation -> use SHAP interaction values, not just main-effect importance.\\n- If prediction latency is critical -> export to ONNX or use LightGBM's `predict_disable_shape_check` for faster inference.\\n- If reproducibility is mandatory -> pin `random_state` in the booster AND the data split, and record library version.\\n\\n## Anti-patterns\\n- NEVER set `n_estimators` to a fixed value without early stopping. This either underfits or overfits by construction.\\n- NEVER tune hyperparameters on the test set. Use a validation set or inner cross-validation; the test set is touched exactly once.\\n- NEVER compare frameworks with different preprocessing (e.g., one-hot for XGBoost but native categoricals for CatBoost) and call it a fair comparison.\\n- NEVER ignore the `best_iteration` attribute after early stopping. Predictions must use `best_iteration` to avoid including over-trained trees.\\n- NEVER rely solely on `feature_importances_` (gain-based) for feature selection. Gain importance is biased toward high-cardinality and correlated features.\\n\\n## Common mistakes\\n- Using `eval_metric` that does not match the business objective (e.g., `logloss` for early stopping but reporting `F1`).\\n- Forgetting to pass `categorical_feature` to LightGBM, causing it to treat integer-encoded categoricals as continuous.\\n- Setting `scale_pos_weight` AND applying SMOTE simultaneously, which double-corrects for imbalance.\\n- Running SHAP on the training set instead of the validation/test set, which inflates apparent feature relevance.\\n- Not setting `verbosity=0` or `verbose=-1` during hyperparameter search, flooding logs with thousands of training lines.\\n- Using `pickle` instead of the framework's native `.save_model()`, which breaks across library version upgrades.\\n\\n## Output contract\\n- Report best hyperparameters, best iteration number, and validation metric trajectory (or at minimum start/best/final values).\\n- Include the class-imbalance strategy used and its rationale.\\n- Provide SHAP summary plots or feature importance rankings with the method explicitly named.\\n- Never report train-only metrics as final performance. Always include validation or test metrics.\\n- Record the framework name and version (e.g., `xgboost==2.0.3`) in artifact metadata.\\n- If multiple frameworks were compared, include a side-by-side metric table with identical folds.\\n- Save the model in native format alongside a JSON metadata sidecar.\\n\\n## Composability hints\\n- Before this expert -> use the **Data Cleaning Expert** for null handling and type coercion. Gradient boosters handle NaNs natively (XGBoost, LightGBM) but benefit from clean categoricals.\\n- Before this expert -> use the **Scikit-learn Modeling Expert** if a quick linear baseline is needed for comparison.\\n- After this expert -> use the **Visualization Expert** to plot SHAP summaries, learning curves, or metric comparisons.\\n- After this expert -> use the **Machine Learning Export Expert** to convert the model to ONNX or package it for serving.\\n- Related -> the **Statistics Expert** for post-hoc significance tests when comparing model performance across folds.\"), score=0.6425158381462097, sparse_score=0.6425158381462097, dense_score=None)]"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -246,7 +246,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 4,
    "id": "47c4c7a6",
    "metadata": {},
    "outputs": [
@@ -304,7 +304,7 @@
    "id": "f232afa3",
    "metadata": {},
    "source": [
-    "## 5. Demonstrate a simple mock agent loop\n",
+    "## 5. Mock Agent Loop\n",
     "\n",
     "- Simulates multiple queries in a loop.  \n",
     "- Retrieves top-K expert cards for each query.  \n",
@@ -313,7 +313,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 5,
    "id": "08b5202d",
    "metadata": {},
    "outputs": [

From d46bd825f0b0e04f3f230693af505a7858c38fb2 Mon Sep 17 00:00:00 2001
From: prajwal <prajwalpoojari451@gmail.com>
Date: Sun, 1 Mar 2026 19:41:40 +0530
Subject: [PATCH 5/6] Clear notebook outputs to remove environment-specific
 logs

---
 examples/notebooks/getting-started.ipynb | 259 +----------------------
 1 file changed, 10 insertions(+), 249 deletions(-)

diff --git a/examples/notebooks/getting-started.ipynb b/examples/notebooks/getting-started.ipynb
index 6f2aace..fc50540 100644
--- a/examples/notebooks/getting-started.ipynb
+++ b/examples/notebooks/getting-started.ipynb
@@ -28,126 +28,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "a02b3ef1",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Obtaining file:///E:/ForOpensource/SkillMesh\n",
-      "  Installing build dependencies: started\n",
-      "  Installing build dependencies: finished with status 'done'\n",
-      "  Checking if build backend supports build_editable: started\n",
-      "  Checking if build backend supports build_editable: finished with status 'done'\n",
-      "  Getting requirements to build editable: started\n",
-      "  Getting requirements to build editable: finished with status 'done'\n",
-      "  Preparing editable metadata (pyproject.toml): started\n",
-      "  Preparing editable metadata (pyproject.toml): finished with status 'done'\n",
-      "Requirement already satisfied: numpy>=1.24 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (2.4.2)\n",
-      "Requirement already satisfied: PyYAML>=6.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (6.0.3)\n",
-      "Requirement already satisfied: rank-bm25>=0.2.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (0.2.2)\n",
-      "Requirement already satisfied: jsonschema>=4.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (4.26.0)\n",
-      "Requirement already satisfied: chromadb>=0.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (1.5.2)\n",
-      "Requirement already satisfied: build>=1.0.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.4.0)\n",
-      "Requirement already satisfied: pydantic>=1.9 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (2.12.5)\n",
-      "Requirement already satisfied: pybase64>=1.4.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.4.3)\n",
-      "Requirement already satisfied: uvicorn[standard]>=0.18.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.41.0)\n",
-      "Requirement already satisfied: posthog<6.0.0,>=2.4.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (5.4.0)\n",
-      "Requirement already satisfied: typing-extensions>=4.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (4.15.0)\n",
-      "Requirement already satisfied: onnxruntime>=1.14.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.24.2)\n",
-      "Requirement already satisfied: opentelemetry-api>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n",
-      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n",
-      "Requirement already satisfied: opentelemetry-sdk>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n",
-      "Requirement already satisfied: tokenizers>=0.13.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.22.2)\n",
-      "Requirement already satisfied: pypika>=0.48.9 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.51.1)\n",
-      "Requirement already satisfied: tqdm>=4.65.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (4.67.3)\n",
-      "Requirement already satisfied: overrides>=7.3.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (7.7.0)\n",
-      "Requirement already satisfied: importlib-resources in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (6.5.2)\n",
-      "Requirement already satisfied: grpcio>=1.58.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.78.0)\n",
-      "Requirement already satisfied: bcrypt>=4.0.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (5.0.0)\n",
-      "Requirement already satisfied: typer>=0.9.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.24.1)\n",
-      "Requirement already satisfied: kubernetes>=28.1.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (35.0.0)\n",
-      "Requirement already satisfied: tenacity>=8.2.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (9.1.4)\n",
-      "Requirement already satisfied: mmh3>=4.0.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (5.2.0)\n",
-      "Requirement already satisfied: orjson>=3.9.12 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (3.11.7)\n",
-      "Requirement already satisfied: httpx>=0.27.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.28.1)\n",
-      "Requirement already satisfied: rich>=10.11.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (14.3.3)\n",
-      "Requirement already satisfied: attrs>=22.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (25.4.0)\n",
-      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (2025.9.1)\n",
-      "Requirement already satisfied: referencing>=0.28.4 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (0.37.0)\n",
-      "Requirement already satisfied: rpds-py>=0.25.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (0.30.0)\n",
-      "Requirement already satisfied: packaging>=24.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from build>=1.0.3->chromadb>=0.5.0->skillmesh==0.1.0) (26.0)\n",
-      "Requirement already satisfied: pyproject_hooks in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from build>=1.0.3->chromadb>=0.5.0->skillmesh==0.1.0) (1.2.0)\n",
-      "Requirement already satisfied: colorama in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from build>=1.0.3->chromadb>=0.5.0->skillmesh==0.1.0) (0.4.6)\n",
-      "Requirement already satisfied: anyio in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (4.12.1)\n",
-      "Requirement already satisfied: certifi in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (2026.2.25)\n",
-      "Requirement already satisfied: httpcore==1.* in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.0.9)\n",
-      "Requirement already satisfied: idna in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.11)\n",
-      "Requirement already satisfied: h11>=0.16 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpcore==1.*->httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.16.0)\n",
-      "Requirement already satisfied: six>=1.9.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.17.0)\n",
-      "Requirement already satisfied: python-dateutil>=2.5.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.9.0.post0)\n",
-      "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.9.0)\n",
-      "Requirement already satisfied: requests in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.32.5)\n",
-      "Requirement already satisfied: requests-oauthlib in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.0.0)\n",
-      "Requirement already satisfied: urllib3!=2.6.0,>=1.24.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.6.3)\n",
-      "Requirement already satisfied: durationpy>=0.7 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.10)\n",
-      "Requirement already satisfied: flatbuffers in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (25.12.19)\n",
-      "Requirement already satisfied: protobuf in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (6.33.5)\n",
-      "Requirement already satisfied: sympy in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (1.14.0)\n",
-      "Requirement already satisfied: importlib-metadata<8.8.0,>=6.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-api>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (8.7.1)\n",
-      "Requirement already satisfied: googleapis-common-protos~=1.57 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.72.0)\n",
-      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.39.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n",
-      "Requirement already satisfied: opentelemetry-proto==1.39.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n",
-      "Requirement already satisfied: opentelemetry-semantic-conventions==0.60b1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-sdk>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.60b1)\n",
-      "Requirement already satisfied: backoff>=1.10.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from posthog<6.0.0,>=2.4.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.2.1)\n",
-      "Requirement already satisfied: distro>=1.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from posthog<6.0.0,>=2.4.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.9.0)\n",
-      "Requirement already satisfied: annotated-types>=0.6.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from pydantic>=1.9->chromadb>=0.5.0->skillmesh==0.1.0) (0.7.0)\n",
-      "Requirement already satisfied: pydantic-core==2.41.5 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from pydantic>=1.9->chromadb>=0.5.0->skillmesh==0.1.0) (2.41.5)\n",
-      "Requirement already satisfied: typing-inspection>=0.4.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from pydantic>=1.9->chromadb>=0.5.0->skillmesh==0.1.0) (0.4.2)\n",
-      "Requirement already satisfied: markdown-it-py>=2.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from rich>=10.11.0->chromadb>=0.5.0->skillmesh==0.1.0) (4.0.0)\n",
-      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from rich>=10.11.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.19.2)\n",
-      "Requirement already satisfied: huggingface-hub<2.0,>=0.16.4 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (1.5.0)\n",
-      "Requirement already satisfied: click>=8.2.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from typer>=0.9.0->chromadb>=0.5.0->skillmesh==0.1.0) (8.3.1)\n",
-      "Requirement already satisfied: shellingham>=1.3.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from typer>=0.9.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.5.4)\n",
-      "Requirement already satisfied: annotated-doc>=0.0.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from typer>=0.9.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.0.4)\n",
-      "Requirement already satisfied: httptools>=0.6.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (0.7.1)\n",
-      "Requirement already satisfied: python-dotenv>=0.13 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (1.2.1)\n",
-      "Requirement already satisfied: watchfiles>=0.20 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (1.1.1)\n",
-      "Requirement already satisfied: websockets>=10.4 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (16.0)\n",
-      "Requirement already satisfied: filelock>=3.10.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (3.24.3)\n",
-      "Requirement already satisfied: fsspec>=2023.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (2026.2.0)\n",
-      "Requirement already satisfied: hf-xet<2.0.0,>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (1.3.2)\n",
-      "Requirement already satisfied: zipp>=3.20 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from importlib-metadata<8.8.0,>=6.0->opentelemetry-api>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.23.0)\n",
-      "Requirement already satisfied: mdurl~=0.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.1.2)\n",
-      "Requirement already satisfied: charset_normalizer<4,>=2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from requests->kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.4.4)\n",
-      "Requirement already satisfied: oauthlib>=3.0.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from requests-oauthlib->kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.3.1)\n",
-      "Requirement already satisfied: mpmath<1.4,>=1.1.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from sympy->onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (1.3.0)\n",
-      "Building wheels for collected packages: skillmesh\n",
-      "  Building editable for skillmesh (pyproject.toml): started\n",
-      "  Building editable for skillmesh (pyproject.toml): finished with status 'done'\n",
-      "  Created wheel for skillmesh: filename=skillmesh-0.1.0-0.editable-py3-none-any.whl size=5534 sha256=6dfabd1ecad61c06188c5aa448167818c7e5ee7490ef689be1230fcc4dfc2684\n",
-      "  Stored in directory: C:\\Users\\DELL\\AppData\\Local\\Temp\\pip-ephem-wheel-cache-w74lwmao\\wheels\\26\\fc\\af\\1aa2362d8ccbc2ed9db0df865eeffcbf5130dc61876fdaf17b\n",
-      "Successfully built skillmesh\n",
-      "Installing collected packages: skillmesh\n",
-      "  Attempting uninstall: skillmesh\n",
-      "    Found existing installation: skillmesh 0.1.0\n",
-      "    Uninstalling skillmesh-0.1.0:\n",
-      "      Successfully uninstalled skillmesh-0.1.0\n",
-      "Successfully installed skillmesh-0.1.0\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "[notice] A new release of pip is available: 23.2.1 -> 26.0.1\n",
-      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Install SkillMesh dependencies from root folder\n",
     "!pip install -e ../.."
@@ -168,21 +52,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "d7480e85",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "92"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from skill_registry_rag.registry import load_registry\n",
     "\n",
@@ -205,23 +78,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "3caa3116",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[RetrievalHit(card=ToolCard(id='role.data-analyst', title='Data Analyst Role Orchestrator', domain='role_orchestrator', instruction_file='roles/data-analyst.md', description='Role expert that orchestrates profiling, cleaning, EDA, baseline modeling, and visualization for tabular analytics tasks.', tags=['role', 'data-analyst', 'eda', 'pandas', 'visualization', 'baseline-modeling'], tool_hints=['data.pandas-advanced', 'data.sql-queries', 'viz.matplotlib-seaborn', 'ml.sklearn-modeling', 'stats.scipy-statsmodels'], examples=['Profile messy dataset, clean it, and deliver insight dashboard', 'Run EDA plus baseline prediction with leakage-safe validation'], aliases=['role-data-analyst', 'analytics-orchestrator'], dependencies=['data.pandas-advanced', 'data.sql-queries', 'viz.matplotlib-seaborn', 'ml.sklearn-modeling', 'stats.scipy-statsmodels'], output_artifacts=['data_profile_summary', 'eda_findings', 'visual_report', 'baseline_model_report'], quality_checks=['missingness_and_dtype_audit_completed', 'insights_backed_by_numeric_evidence', 'charts_have_labels_units_and_titles'], constraints=['no_causal_claims_from_correlation_only', 'use_only_allowed_dependencies_unless_explicitly_authorized'], input_contract={'required': 'tabular dataset and analysis objective', 'optional': 'target metric, business context, and prediction requirement'}, risk_level='medium', maturity='beta', metadata={'provider_support': ['codex', 'claude'], 'owner': 'community', 'catalog_tier': 'roles', 'instruction_version': 'v1'}, instruction_text='# Data Analyst Role Expert\\n\\nUse this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\\n\\n## Allowed expert dependencies\\n\\n- `data.pandas-advanced`\\n- `data.sql-queries`\\n- `viz.matplotlib-seaborn`\\n- `ml.sklearn-modeling`\\n- `stats.scipy-statsmodels`\\n\\n## Execution behavior\\n\\n1. Start with a data quality audit:\\n   nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\\n2. Normalize and clean data using reproducible transformations.\\n3. Produce concise EDA:\\n   distributions, trends, segmentation, and relationship charts.\\n4. If prediction is requested, build a leakage-safe baseline model with validation metrics.\\n5. Explain findings in business terms:\\n   what changed, how much, and what action is implied.\\n6. End with caveats and next steps.\\n\\n## Output contract\\n\\n- `profile_summary`: row/column counts, missingness, type issues, and anomalies.\\n- `eda_insights`: ranked insights with numeric evidence.\\n- `visuals`: labeled plots with clear units and titles.\\n- `model_section` (optional): baseline model, metrics, and limitations.\\n- `repro_steps`: commands/notebook steps to reproduce.\\n\\n## Guardrails\\n\\n- Do not skip data validation before insights.\\n- Do not claim causality from correlation.\\n- Do not use tools outside allowed dependencies unless explicitly approved.'), score=1.0, sparse_score=1.0, dense_score=None),\n",
-       " RetrievalHit(card=ToolCard(id='docs.slides-pptx', title='Slide Deck Creation (PPTX)', domain='presentation_generation', instruction_file='instructions/slide-creation.md', description='Narrative deck creation with PPTX output and chart integration.', tags=['slides', 'pptx', 'python-pptx', 'presentation', 'storytelling'], tool_hints=['python-pptx', 'matplotlib', 'pillow'], examples=['Executive summary deck', 'Findings deck with visuals'], aliases=['deck-generation', 'slide-authoring'], dependencies=['python-pptx', 'pillow', 'matplotlib'], output_artifacts=['pptx_deck', 'slide_outline', 'speaker_notes_stub'], quality_checks=['title_and_recommendation_slides_present', 'visual_hierarchy_consistent'], constraints=['one_core_message_per_slide'], input_contract={'required': 'structured storyline or slide outline', 'optional': 'brand template and theme colors'}, risk_level='low', maturity='stable', metadata={'provider_support': ['codex', 'claude'], 'install_extra': 'slides'}, instruction_text='# Slide Creation Expert (PPTX)\\n\\nUse this expert for executive summaries, project updates, and narrative decks.\\n\\n## Execution behavior\\n\\n1. Derive a slide storyline first (problem, analysis, findings, actions).\\n2. Allocate one key message per slide and keep text concise.\\n3. Generate PPTX using `python-pptx` with consistent templates.\\n4. Embed charts/tables as visuals instead of dense paragraphs.\\n5. Export final deck and register slide artifact with version/date.\\n\\n## Output contract\\n\\n- Include title slide, evidence slides, and recommendation slide.\\n- Keep visual hierarchy consistent across slides.\\n- Add speaker-note placeholders for critical assumptions.\\n- Ensure deck can be opened in standard PowerPoint clients.'), score=0.667872965335846, sparse_score=0.667872965335846, dense_score=None),\n",
-       " RetrievalHit(card=ToolCard(id='ml.gradient-boosting', title='Gradient Boosting with XGBoost and LightGBM', domain='machine_learning', instruction_file='instructions/gradient-boosting-xgb-lgbm.md', description='High-performance boosting, tuning, and SHAP-based attribution.', tags=['xgboost', 'lightgbm', 'catboost', 'shap', 'boosting'], tool_hints=['xgboost', 'lightgbm', 'catboost', 'shap'], examples=['Tune XGBoost with early stopping', 'Compare LightGBM vs CatBoost'], aliases=['gbm', 'tree-boosting'], dependencies=['xgboost', 'lightgbm', 'catboost', 'shap', 'scikit-learn'], output_artifacts=['booster_model', 'feature_importance', 'shap_summary'], quality_checks=['early_stopping_enabled', 'class_imbalance_strategy', 'feature_leakage_review'], constraints=['consistent_validation_metric_across_models'], input_contract={'required': 'tabular train/validation datasets', 'optional': 'class weights and monotonic constraints'}, risk_level='medium', maturity='stable', metadata={'provider_support': ['codex', 'claude'], 'install_extra': 'boosting'}, instruction_text=\"# Gradient Boosting Expert (XGBoost / LightGBM / CatBoost)\\n\\nUse this expert for high-performance tabular modeling with gradient boosting frameworks, including classification, regression, and ranking tasks.\\n\\n## When to use this expert\\n- The task involves structured/tabular data where tree-based models are expected to excel.\\n- The user needs state-of-the-art predictive performance with feature attribution.\\n- Early stopping, hyperparameter tuning, or framework comparison is required.\\n- SHAP-based model interpretation or class imbalance handling is requested.\\n\\n## Execution behavior\\n\\n1. Build a clean train/validation/test split with leakage controls. For temporal data, split by time. For classification, use stratified splits to preserve class ratios.\\n2. Configure early stopping with a patience of 20-50 rounds on the validation set. Pass `eval_set` (XGBoost/LightGBM) or `eval_set` (CatBoost) explicitly.\\n3. Start with sensible defaults: `learning_rate=0.05`, `max_depth=6`, `n_estimators=2000` (relying on early stopping to find the right count). Tune in this order: (a) `n_estimators` via early stopping, (b) `max_depth` and `min_child_weight`, (c) `subsample` and `colsample_bytree`, (d) regularization (`reg_alpha`, `reg_lambda`), (e) `learning_rate` reduction with proportional `n_estimators` increase.\\n4. For multi-framework comparison, hold folds constant (pass the same `cv` splitter) and use identical metric definitions. Report results in a comparison table.\\n5. Compute SHAP values using `shap.TreeExplainer` for global and local feature attribution. Generate summary plots, dependence plots for top features, and force plots for individual predictions when interpretability is requested.\\n6. Handle class imbalance with `scale_pos_weight` (XGBoost), `is_unbalance` (LightGBM), or `auto_class_weights` (CatBoost). Compare against SMOTE-in-pipeline only if simple weighting underperforms.\\n7. For categorical features, prefer LightGBM or CatBoost native categorical handling over one-hot encoding when cardinality > 10.\\n8. Save the final model with native `.save_model()` format and record hyperparameters, best iteration, and validation metric in metadata.\\n\\n## Decision tree\\n- If dataset has > 100k rows and many categorical features -> prefer LightGBM for speed; use CatBoost if categoricals have high cardinality and natural ordering is absent.\\n- If dataset is small (< 5k rows) -> reduce `max_depth` to 3-4 and increase regularization to prevent overfitting; consider whether a simpler sklearn model might suffice.\\n- If the task is ranking -> use `XGBRanker` or `LGBMRanker` with `lambdarank` objective.\\n- If feature interactions matter for explanation -> use SHAP interaction values, not just main-effect importance.\\n- If prediction latency is critical -> export to ONNX or use LightGBM's `predict_disable_shape_check` for faster inference.\\n- If reproducibility is mandatory -> pin `random_state` in the booster AND the data split, and record library version.\\n\\n## Anti-patterns\\n- NEVER set `n_estimators` to a fixed value without early stopping. This either underfits or overfits by construction.\\n- NEVER tune hyperparameters on the test set. Use a validation set or inner cross-validation; the test set is touched exactly once.\\n- NEVER compare frameworks with different preprocessing (e.g., one-hot for XGBoost but native categoricals for CatBoost) and call it a fair comparison.\\n- NEVER ignore the `best_iteration` attribute after early stopping. Predictions must use `best_iteration` to avoid including over-trained trees.\\n- NEVER rely solely on `feature_importances_` (gain-based) for feature selection. Gain importance is biased toward high-cardinality and correlated features.\\n\\n## Common mistakes\\n- Using `eval_metric` that does not match the business objective (e.g., `logloss` for early stopping but reporting `F1`).\\n- Forgetting to pass `categorical_feature` to LightGBM, causing it to treat integer-encoded categoricals as continuous.\\n- Setting `scale_pos_weight` AND applying SMOTE simultaneously, which double-corrects for imbalance.\\n- Running SHAP on the training set instead of the validation/test set, which inflates apparent feature relevance.\\n- Not setting `verbosity=0` or `verbose=-1` during hyperparameter search, flooding logs with thousands of training lines.\\n- Using `pickle` instead of the framework's native `.save_model()`, which breaks across library version upgrades.\\n\\n## Output contract\\n- Report best hyperparameters, best iteration number, and validation metric trajectory (or at minimum start/best/final values).\\n- Include the class-imbalance strategy used and its rationale.\\n- Provide SHAP summary plots or feature importance rankings with the method explicitly named.\\n- Never report train-only metrics as final performance. Always include validation or test metrics.\\n- Record the framework name and version (e.g., `xgboost==2.0.3`) in artifact metadata.\\n- If multiple frameworks were compared, include a side-by-side metric table with identical folds.\\n- Save the model in native format alongside a JSON metadata sidecar.\\n\\n## Composability hints\\n- Before this expert -> use the **Data Cleaning Expert** for null handling and type coercion. Gradient boosters handle NaNs natively (XGBoost, LightGBM) but benefit from clean categoricals.\\n- Before this expert -> use the **Scikit-learn Modeling Expert** if a quick linear baseline is needed for comparison.\\n- After this expert -> use the **Visualization Expert** to plot SHAP summaries, learning curves, or metric comparisons.\\n- After this expert -> use the **Machine Learning Export Expert** to convert the model to ONNX or package it for serving.\\n- Related -> the **Statistics Expert** for post-hoc significance tests when comparing model performance across folds.\"), score=0.6425158381462097, sparse_score=0.6425158381462097, dense_score=None)]"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from skill_registry_rag.retriever import SkillRetriever\n",
     "\n",
@@ -246,49 +106,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "47c4c7a6",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "USER QUERY:\n",
-      "clean messy sales data and generate charts\n",
-      "\n",
-      "RETRIEVED CONTEXT:\n",
-      "# Data Analyst Role Expert\n",
-      "\n",
-      "Use this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\n",
-      "\n",
-      "## Allowed expert dependencies\n",
-      "\n",
-      "- `data.pandas-advanced`\n",
-      "- `data.sql-queries`\n",
-      "- `viz.matplotlib-seaborn`\n",
-      "- `ml.sklearn-modeling`\n",
-      "- `stats.scipy-statsmodels`\n",
-      "\n",
-      "## Execution behavior\n",
-      "\n",
-      "1. Start with a data quality audit:\n",
-      "   nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\n",
-      "2. Normalize and clean data using reproducible transformations.\n",
-      "3. Produce concise EDA:\n",
-      "   distributions, trends, segmentation, and relationship charts.\n",
-      "4. If prediction is requested, build a leakage-safe baseline model with validation metrics.\n",
-      "5. Explain findings in business terms:\n",
-      "   what changed, how much, and what action is implied.\n",
-      "6. End with caveats and next steps.\n",
-      "\n",
-      "## Output contract\n",
-      "\n",
-      "- `profile_summary`: row/column counts, missingness, type issues, and anomalies.\n",
-      "- `eda_insights`: ranked insights \n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "context = \"\\n\\n\".join(hit.card.instruction_text for hit in hits)\n",
     "\n",
@@ -313,70 +134,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "08b5202d",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Query: clean messy sales data\n",
-      "# Data Analyst Role Expert\n",
-      "\n",
-      "Use this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\n",
-      "\n",
-      "## Allowed expert dependencies\n",
-      "\n",
-      "- `data.pandas-advanced`\n",
-      "- `data.sql-queries`\n",
-      "- `viz.matplotlib-seaborn`\n",
-      "- `ml.sklearn-modeling`\n",
-      "- `stats.scipy-statsmodels`\n",
-      "\n",
-      "## Execution behavior\n",
-      "\n",
-      "1. Start with a data quality audit:\n",
-      "   nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\n",
-      "2. Normaliz\n",
-      "--------------------------------------------------\n",
-      "Query: generate charts for sales\n",
-      "# Slide Creation Expert (PPTX)\n",
-      "\n",
-      "Use this expert for executive summaries, project updates, and narrative decks.\n",
-      "\n",
-      "## Execution behavior\n",
-      "\n",
-      "1. Derive a slide storyline first (problem, analysis, findings, actions).\n",
-      "2. Allocate one key message per slide and keep text concise.\n",
-      "3. Generate PPTX using `python-pptx` with consistent templates.\n",
-      "4. Embed charts/tables as visuals instead of dense paragraphs.\n",
-      "5. Export final deck and register slide artifact with version/date.\n",
-      "\n",
-      "## Output contract\n",
-      "\n",
-      "- Include titl\n",
-      "--------------------------------------------------\n",
-      "Query: summarize findings\n",
-      "# Slide Creation Expert (PPTX)\n",
-      "\n",
-      "Use this expert for executive summaries, project updates, and narrative decks.\n",
-      "\n",
-      "## Execution behavior\n",
-      "\n",
-      "1. Derive a slide storyline first (problem, analysis, findings, actions).\n",
-      "2. Allocate one key message per slide and keep text concise.\n",
-      "3. Generate PPTX using `python-pptx` with consistent templates.\n",
-      "4. Embed charts/tables as visuals instead of dense paragraphs.\n",
-      "5. Export final deck and register slide artifact with version/date.\n",
-      "\n",
-      "## Output contract\n",
-      "\n",
-      "- Include titl\n",
-      "--------------------------------------------------\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "queries = [\n",
     "    \"clean messy sales data\",\n",

From ae6754e1ed85a7e2849a4f392d7d2d5a2298623f Mon Sep 17 00:00:00 2001
From: prajwal <prajwalpoojari451@gmail.com>
Date: Sun, 1 Mar 2026 22:09:48 +0530
Subject: [PATCH 6/6] Improve getting-started notebook by adding structured
 markdown sample outputs

---
 examples/notebooks/getting-started.ipynb | 402 ++++++++++++++++++++++-
 1 file changed, 392 insertions(+), 10 deletions(-)

diff --git a/examples/notebooks/getting-started.ipynb b/examples/notebooks/getting-started.ipynb
index fc50540..4471aa1 100644
--- a/examples/notebooks/getting-started.ipynb
+++ b/examples/notebooks/getting-started.ipynb
@@ -28,15 +28,144 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "id": "a02b3ef1",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Obtaining file:///E:/ForOpensource/SkillMesh\n",
+      "  Installing build dependencies: started\n",
+      "  Installing build dependencies: finished with status 'done'\n",
+      "  Checking if build backend supports build_editable: started\n",
+      "  Checking if build backend supports build_editable: finished with status 'done'\n",
+      "  Getting requirements to build editable: started\n",
+      "  Getting requirements to build editable: finished with status 'done'\n",
+      "  Preparing editable metadata (pyproject.toml): started\n",
+      "  Preparing editable metadata (pyproject.toml): finished with status 'done'\n",
+      "Requirement already satisfied: numpy>=1.24 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (2.4.2)\n",
+      "Requirement already satisfied: PyYAML>=6.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (6.0.3)\n",
+      "Requirement already satisfied: rank-bm25>=0.2.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (0.2.2)\n",
+      "Requirement already satisfied: jsonschema>=4.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (4.26.0)\n",
+      "Requirement already satisfied: chromadb>=0.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (1.5.2)\n",
+      "Requirement already satisfied: build>=1.0.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.4.0)\n",
+      "Requirement already satisfied: pydantic>=1.9 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (2.12.5)\n",
+      "Requirement already satisfied: pybase64>=1.4.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.4.3)\n",
+      "Requirement already satisfied: uvicorn[standard]>=0.18.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.41.0)\n",
+      "Requirement already satisfied: posthog<6.0.0,>=2.4.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (5.4.0)\n",
+      "Requirement already satisfied: typing-extensions>=4.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (4.15.0)\n",
+      "Requirement already satisfied: onnxruntime>=1.14.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.24.2)\n",
+      "Requirement already satisfied: opentelemetry-api>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n",
+      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n",
+      "Requirement already satisfied: opentelemetry-sdk>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n",
+      "Requirement already satisfied: tokenizers>=0.13.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.22.2)\n",
+      "Requirement already satisfied: pypika>=0.48.9 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.51.1)\n",
+      "Requirement already satisfied: tqdm>=4.65.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (4.67.3)\n",
+      "Requirement already satisfied: overrides>=7.3.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (7.7.0)\n",
+      "Requirement already satisfied: importlib-resources in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (6.5.2)\n",
+      "Requirement already satisfied: grpcio>=1.58.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.78.0)\n",
+      "Requirement already satisfied: bcrypt>=4.0.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (5.0.0)\n",
+      "Requirement already satisfied: typer>=0.9.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.24.1)\n",
+      "Requirement already satisfied: kubernetes>=28.1.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (35.0.0)\n",
+      "Requirement already satisfied: tenacity>=8.2.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (9.1.4)\n",
+      "Requirement already satisfied: mmh3>=4.0.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (5.2.0)\n",
+      "Requirement already satisfied: orjson>=3.9.12 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (3.11.7)\n",
+      "Requirement already satisfied: httpx>=0.27.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.28.1)\n",
+      "Requirement already satisfied: rich>=10.11.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (14.3.3)\n",
+      "Requirement already satisfied: attrs>=22.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (25.4.0)\n",
+      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (2025.9.1)\n",
+      "Requirement already satisfied: referencing>=0.28.4 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (0.37.0)\n",
+      "Requirement already satisfied: rpds-py>=0.25.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (0.30.0)\n",
+      "Requirement already satisfied: packaging>=24.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from build>=1.0.3->chromadb>=0.5.0->skillmesh==0.1.0) (26.0)\n",
+      "Requirement already satisfied: pyproject_hooks in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from build>=1.0.3->chromadb>=0.5.0->skillmesh==0.1.0) (1.2.0)\n",
+      "Requirement already satisfied: colorama in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from build>=1.0.3->chromadb>=0.5.0->skillmesh==0.1.0) (0.4.6)\n",
+      "Requirement already satisfied: anyio in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (4.12.1)\n",
+      "Requirement already satisfied: certifi in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (2026.2.25)\n",
+      "Requirement already satisfied: httpcore==1.* in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.0.9)\n",
+      "Requirement already satisfied: idna in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.11)\n",
+      "Requirement already satisfied: h11>=0.16 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpcore==1.*->httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.16.0)\n",
+      "Requirement already satisfied: six>=1.9.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.17.0)\n",
+      "Requirement already satisfied: python-dateutil>=2.5.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.9.0.post0)\n",
+      "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.9.0)\n",
+      "Requirement already satisfied: requests in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.32.5)\n",
+      "Requirement already satisfied: requests-oauthlib in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.0.0)\n",
+      "Requirement already satisfied: urllib3!=2.6.0,>=1.24.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.6.3)\n",
+      "Requirement already satisfied: durationpy>=0.7 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.10)\n",
+      "Requirement already satisfied: flatbuffers in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (25.12.19)\n",
+      "Requirement already satisfied: protobuf in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (6.33.5)\n",
+      "Requirement already satisfied: sympy in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (1.14.0)\n",
+      "Requirement already satisfied: importlib-metadata<8.8.0,>=6.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-api>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (8.7.1)\n",
+      "Requirement already satisfied: googleapis-common-protos~=1.57 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.72.0)\n",
+      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.39.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n",
+      "Requirement already satisfied: opentelemetry-proto==1.39.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n",
+      "Requirement already satisfied: opentelemetry-semantic-conventions==0.60b1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-sdk>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.60b1)\n",
+      "Requirement already satisfied: backoff>=1.10.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from posthog<6.0.0,>=2.4.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.2.1)\n",
+      "Requirement already satisfied: distro>=1.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from posthog<6.0.0,>=2.4.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.9.0)\n",
+      "Requirement already satisfied: annotated-types>=0.6.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from pydantic>=1.9->chromadb>=0.5.0->skillmesh==0.1.0) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.41.5 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from pydantic>=1.9->chromadb>=0.5.0->skillmesh==0.1.0) (2.41.5)\n",
+      "Requirement already satisfied: typing-inspection>=0.4.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from pydantic>=1.9->chromadb>=0.5.0->skillmesh==0.1.0) (0.4.2)\n",
+      "Requirement already satisfied: markdown-it-py>=2.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from rich>=10.11.0->chromadb>=0.5.0->skillmesh==0.1.0) (4.0.0)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from rich>=10.11.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.19.2)\n",
+      "Requirement already satisfied: huggingface-hub<2.0,>=0.16.4 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (1.5.0)\n",
+      "Requirement already satisfied: click>=8.2.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from typer>=0.9.0->chromadb>=0.5.0->skillmesh==0.1.0) (8.3.1)\n",
+      "Requirement already satisfied: shellingham>=1.3.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from typer>=0.9.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.5.4)\n",
+      "Requirement already satisfied: annotated-doc>=0.0.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from typer>=0.9.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.0.4)\n",
+      "Requirement already satisfied: httptools>=0.6.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (0.7.1)\n",
+      "Requirement already satisfied: python-dotenv>=0.13 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (1.2.1)\n",
+      "Requirement already satisfied: watchfiles>=0.20 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (1.1.1)\n",
+      "Requirement already satisfied: websockets>=10.4 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (16.0)\n",
+      "Requirement already satisfied: filelock>=3.10.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (3.24.3)\n",
+      "Requirement already satisfied: fsspec>=2023.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (2026.2.0)\n",
+      "Requirement already satisfied: hf-xet<2.0.0,>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (1.3.2)\n",
+      "Requirement already satisfied: zipp>=3.20 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from importlib-metadata<8.8.0,>=6.0->opentelemetry-api>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.23.0)\n",
+      "Requirement already satisfied: mdurl~=0.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.1.2)\n",
+      "Requirement already satisfied: charset_normalizer<4,>=2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from requests->kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.4.4)\n",
+      "Requirement already satisfied: oauthlib>=3.0.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from requests-oauthlib->kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.3.1)\n",
+      "Requirement already satisfied: mpmath<1.4,>=1.1.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from sympy->onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (1.3.0)\n",
+      "Building wheels for collected packages: skillmesh\n",
+      "  Building editable for skillmesh (pyproject.toml): started\n",
+      "  Building editable for skillmesh (pyproject.toml): finished with status 'done'\n",
+      "  Created wheel for skillmesh: filename=skillmesh-0.1.0-0.editable-py3-none-any.whl size=5534 sha256=bab34fbd5a9a5befaac3d36a4f19317fe9418a366d0debf96c3916181cd7cabe\n",
+      "  Stored in directory: C:\\Users\\DELL\\AppData\\Local\\Temp\\pip-ephem-wheel-cache-ld87_yh7\\wheels\\26\\fc\\af\\1aa2362d8ccbc2ed9db0df865eeffcbf5130dc61876fdaf17b\n",
+      "Successfully built skillmesh\n",
+      "Installing collected packages: skillmesh\n",
+      "  Attempting uninstall: skillmesh\n",
+      "    Found existing installation: skillmesh 0.1.0\n",
+      "    Uninstalling skillmesh-0.1.0:\n",
+      "      Successfully uninstalled skillmesh-0.1.0\n",
+      "Successfully installed skillmesh-0.1.0\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "[notice] A new release of pip is available: 23.2.1 -> 26.0.1\n",
+      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
+     ]
+    }
+   ],
    "source": [
     "# Install SkillMesh dependencies from root folder\n",
     "!pip install -e ../.."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "f8624728",
+   "metadata": {},
+   "source": [
+    "### Sample Output\n",
+    "\n",
+    "```text\n",
+    "Obtaining skillmesh\n",
+    "Successfully installed skillmesh-0.1.0\n",
+    "```"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "0f34646f",
@@ -52,10 +181,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 22,
    "id": "d7480e85",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "92"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "from skill_registry_rag.registry import load_registry\n",
     "\n",
@@ -64,6 +204,18 @@
     "len(registry)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "f742e66d",
+   "metadata": {},
+   "source": [
+    "### Sample output:\n",
+    "\n",
+    "```text\n",
+    "92\n",
+    "```"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "4129236f",
@@ -78,10 +230,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "id": "3caa3116",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[RetrievalHit(card=ToolCard(id='role.data-analyst', title='Data Analyst Role Orchestrator', domain='role_orchestrator', instruction_file='roles/data-analyst.md', description='Role expert that orchestrates profiling, cleaning, EDA, baseline modeling, and visualization for tabular analytics tasks.', tags=['role', 'data-analyst', 'eda', 'pandas', 'visualization', 'baseline-modeling'], tool_hints=['data.pandas-advanced', 'data.sql-queries', 'viz.matplotlib-seaborn', 'ml.sklearn-modeling', 'stats.scipy-statsmodels'], examples=['Profile messy dataset, clean it, and deliver insight dashboard', 'Run EDA plus baseline prediction with leakage-safe validation'], aliases=['role-data-analyst', 'analytics-orchestrator'], dependencies=['data.pandas-advanced', 'data.sql-queries', 'viz.matplotlib-seaborn', 'ml.sklearn-modeling', 'stats.scipy-statsmodels'], output_artifacts=['data_profile_summary', 'eda_findings', 'visual_report', 'baseline_model_report'], quality_checks=['missingness_and_dtype_audit_completed', 'insights_backed_by_numeric_evidence', 'charts_have_labels_units_and_titles'], constraints=['no_causal_claims_from_correlation_only', 'use_only_allowed_dependencies_unless_explicitly_authorized'], input_contract={'required': 'tabular dataset and analysis objective', 'optional': 'target metric, business context, and prediction requirement'}, risk_level='medium', maturity='beta', metadata={'provider_support': ['codex', 'claude'], 'owner': 'community', 'catalog_tier': 'roles', 'instruction_version': 'v1'}, instruction_text='# Data Analyst Role Expert\\n\\nUse this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\\n\\n## Allowed expert dependencies\\n\\n- `data.pandas-advanced`\\n- `data.sql-queries`\\n- `viz.matplotlib-seaborn`\\n- `ml.sklearn-modeling`\\n- `stats.scipy-statsmodels`\\n\\n## Execution behavior\\n\\n1. Start with a data quality audit:\\n   nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\\n2. Normalize and clean data using reproducible transformations.\\n3. Produce concise EDA:\\n   distributions, trends, segmentation, and relationship charts.\\n4. If prediction is requested, build a leakage-safe baseline model with validation metrics.\\n5. Explain findings in business terms:\\n   what changed, how much, and what action is implied.\\n6. End with caveats and next steps.\\n\\n## Output contract\\n\\n- `profile_summary`: row/column counts, missingness, type issues, and anomalies.\\n- `eda_insights`: ranked insights with numeric evidence.\\n- `visuals`: labeled plots with clear units and titles.\\n- `model_section` (optional): baseline model, metrics, and limitations.\\n- `repro_steps`: commands/notebook steps to reproduce.\\n\\n## Guardrails\\n\\n- Do not skip data validation before insights.\\n- Do not claim causality from correlation.\\n- Do not use tools outside allowed dependencies unless explicitly approved.'), score=1.0, sparse_score=1.0, dense_score=None),\n",
+       " RetrievalHit(card=ToolCard(id='docs.slides-pptx', title='Slide Deck Creation (PPTX)', domain='presentation_generation', instruction_file='instructions/slide-creation.md', description='Narrative deck creation with PPTX output and chart integration.', tags=['slides', 'pptx', 'python-pptx', 'presentation', 'storytelling'], tool_hints=['python-pptx', 'matplotlib', 'pillow'], examples=['Executive summary deck', 'Findings deck with visuals'], aliases=['deck-generation', 'slide-authoring'], dependencies=['python-pptx', 'pillow', 'matplotlib'], output_artifacts=['pptx_deck', 'slide_outline', 'speaker_notes_stub'], quality_checks=['title_and_recommendation_slides_present', 'visual_hierarchy_consistent'], constraints=['one_core_message_per_slide'], input_contract={'required': 'structured storyline or slide outline', 'optional': 'brand template and theme colors'}, risk_level='low', maturity='stable', metadata={'provider_support': ['codex', 'claude'], 'install_extra': 'slides'}, instruction_text='# Slide Creation Expert (PPTX)\\n\\nUse this expert for executive summaries, project updates, and narrative decks.\\n\\n## Execution behavior\\n\\n1. Derive a slide storyline first (problem, analysis, findings, actions).\\n2. Allocate one key message per slide and keep text concise.\\n3. Generate PPTX using `python-pptx` with consistent templates.\\n4. Embed charts/tables as visuals instead of dense paragraphs.\\n5. Export final deck and register slide artifact with version/date.\\n\\n## Output contract\\n\\n- Include title slide, evidence slides, and recommendation slide.\\n- Keep visual hierarchy consistent across slides.\\n- Add speaker-note placeholders for critical assumptions.\\n- Ensure deck can be opened in standard PowerPoint clients.'), score=0.667872965335846, sparse_score=0.667872965335846, dense_score=None),\n",
+       " RetrievalHit(card=ToolCard(id='ml.gradient-boosting', title='Gradient Boosting with XGBoost and LightGBM', domain='machine_learning', instruction_file='instructions/gradient-boosting-xgb-lgbm.md', description='High-performance boosting, tuning, and SHAP-based attribution.', tags=['xgboost', 'lightgbm', 'catboost', 'shap', 'boosting'], tool_hints=['xgboost', 'lightgbm', 'catboost', 'shap'], examples=['Tune XGBoost with early stopping', 'Compare LightGBM vs CatBoost'], aliases=['gbm', 'tree-boosting'], dependencies=['xgboost', 'lightgbm', 'catboost', 'shap', 'scikit-learn'], output_artifacts=['booster_model', 'feature_importance', 'shap_summary'], quality_checks=['early_stopping_enabled', 'class_imbalance_strategy', 'feature_leakage_review'], constraints=['consistent_validation_metric_across_models'], input_contract={'required': 'tabular train/validation datasets', 'optional': 'class weights and monotonic constraints'}, risk_level='medium', maturity='stable', metadata={'provider_support': ['codex', 'claude'], 'install_extra': 'boosting'}, instruction_text=\"# Gradient Boosting Expert (XGBoost / LightGBM / CatBoost)\\n\\nUse this expert for high-performance tabular modeling with gradient boosting frameworks, including classification, regression, and ranking tasks.\\n\\n## When to use this expert\\n- The task involves structured/tabular data where tree-based models are expected to excel.\\n- The user needs state-of-the-art predictive performance with feature attribution.\\n- Early stopping, hyperparameter tuning, or framework comparison is required.\\n- SHAP-based model interpretation or class imbalance handling is requested.\\n\\n## Execution behavior\\n\\n1. Build a clean train/validation/test split with leakage controls. For temporal data, split by time. For classification, use stratified splits to preserve class ratios.\\n2. Configure early stopping with a patience of 20-50 rounds on the validation set. Pass `eval_set` (XGBoost/LightGBM) or `eval_set` (CatBoost) explicitly.\\n3. Start with sensible defaults: `learning_rate=0.05`, `max_depth=6`, `n_estimators=2000` (relying on early stopping to find the right count). Tune in this order: (a) `n_estimators` via early stopping, (b) `max_depth` and `min_child_weight`, (c) `subsample` and `colsample_bytree`, (d) regularization (`reg_alpha`, `reg_lambda`), (e) `learning_rate` reduction with proportional `n_estimators` increase.\\n4. For multi-framework comparison, hold folds constant (pass the same `cv` splitter) and use identical metric definitions. Report results in a comparison table.\\n5. Compute SHAP values using `shap.TreeExplainer` for global and local feature attribution. Generate summary plots, dependence plots for top features, and force plots for individual predictions when interpretability is requested.\\n6. Handle class imbalance with `scale_pos_weight` (XGBoost), `is_unbalance` (LightGBM), or `auto_class_weights` (CatBoost). Compare against SMOTE-in-pipeline only if simple weighting underperforms.\\n7. For categorical features, prefer LightGBM or CatBoost native categorical handling over one-hot encoding when cardinality > 10.\\n8. Save the final model with native `.save_model()` format and record hyperparameters, best iteration, and validation metric in metadata.\\n\\n## Decision tree\\n- If dataset has > 100k rows and many categorical features -> prefer LightGBM for speed; use CatBoost if categoricals have high cardinality and natural ordering is absent.\\n- If dataset is small (< 5k rows) -> reduce `max_depth` to 3-4 and increase regularization to prevent overfitting; consider whether a simpler sklearn model might suffice.\\n- If the task is ranking -> use `XGBRanker` or `LGBMRanker` with `lambdarank` objective.\\n- If feature interactions matter for explanation -> use SHAP interaction values, not just main-effect importance.\\n- If prediction latency is critical -> export to ONNX or use LightGBM's `predict_disable_shape_check` for faster inference.\\n- If reproducibility is mandatory -> pin `random_state` in the booster AND the data split, and record library version.\\n\\n## Anti-patterns\\n- NEVER set `n_estimators` to a fixed value without early stopping. This either underfits or overfits by construction.\\n- NEVER tune hyperparameters on the test set. Use a validation set or inner cross-validation; the test set is touched exactly once.\\n- NEVER compare frameworks with different preprocessing (e.g., one-hot for XGBoost but native categoricals for CatBoost) and call it a fair comparison.\\n- NEVER ignore the `best_iteration` attribute after early stopping. Predictions must use `best_iteration` to avoid including over-trained trees.\\n- NEVER rely solely on `feature_importances_` (gain-based) for feature selection. Gain importance is biased toward high-cardinality and correlated features.\\n\\n## Common mistakes\\n- Using `eval_metric` that does not match the business objective (e.g., `logloss` for early stopping but reporting `F1`).\\n- Forgetting to pass `categorical_feature` to LightGBM, causing it to treat integer-encoded categoricals as continuous.\\n- Setting `scale_pos_weight` AND applying SMOTE simultaneously, which double-corrects for imbalance.\\n- Running SHAP on the training set instead of the validation/test set, which inflates apparent feature relevance.\\n- Not setting `verbosity=0` or `verbose=-1` during hyperparameter search, flooding logs with thousands of training lines.\\n- Using `pickle` instead of the framework's native `.save_model()`, which breaks across library version upgrades.\\n\\n## Output contract\\n- Report best hyperparameters, best iteration number, and validation metric trajectory (or at minimum start/best/final values).\\n- Include the class-imbalance strategy used and its rationale.\\n- Provide SHAP summary plots or feature importance rankings with the method explicitly named.\\n- Never report train-only metrics as final performance. Always include validation or test metrics.\\n- Record the framework name and version (e.g., `xgboost==2.0.3`) in artifact metadata.\\n- If multiple frameworks were compared, include a side-by-side metric table with identical folds.\\n- Save the model in native format alongside a JSON metadata sidecar.\\n\\n## Composability hints\\n- Before this expert -> use the **Data Cleaning Expert** for null handling and type coercion. Gradient boosters handle NaNs natively (XGBoost, LightGBM) but benefit from clean categoricals.\\n- Before this expert -> use the **Scikit-learn Modeling Expert** if a quick linear baseline is needed for comparison.\\n- After this expert -> use the **Visualization Expert** to plot SHAP summaries, learning curves, or metric comparisons.\\n- After this expert -> use the **Machine Learning Export Expert** to convert the model to ONNX or package it for serving.\\n- Related -> the **Statistics Expert** for post-hoc significance tests when comparing model performance across folds.\"), score=0.6425158381462097, sparse_score=0.6425158381462097, dense_score=None)]"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "from skill_registry_rag.retriever import SkillRetriever\n",
     "\n",
@@ -93,6 +258,31 @@
     "hits  # shows top 3 relevant expert cards"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "de519191",
+   "metadata": {},
+   "source": [
+    "### Sample Output\n",
+    "\n",
+    "```text\n",
+    "Top-3 Retrieval Results:\n",
+    "\n",
+    "1. role.data-analyst\n",
+    "   Title: Data Analyst Role Orchestrator\n",
+    "   Score: 1.00\n",
+    "   Provider Support: codex, claude\n",
+    "\n",
+    "2. docs.slides-pptx\n",
+    "   Title: Slide Deck Creation (PPTX)\n",
+    "   Score: 0.67\n",
+    "\n",
+    "3. ml.gradient-boosting\n",
+    "   Title: Gradient Boosting with XGBoost and LightGBM\n",
+    "   Score: 0.64\n",
+    "```"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "5c85573e",
@@ -106,10 +296,49 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 24,
    "id": "47c4c7a6",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "USER QUERY:\n",
+      "clean messy sales data and generate charts\n",
+      "\n",
+      "RETRIEVED CONTEXT:\n",
+      "# Data Analyst Role Expert\n",
+      "\n",
+      "Use this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\n",
+      "\n",
+      "## Allowed expert dependencies\n",
+      "\n",
+      "- `data.pandas-advanced`\n",
+      "- `data.sql-queries`\n",
+      "- `viz.matplotlib-seaborn`\n",
+      "- `ml.sklearn-modeling`\n",
+      "- `stats.scipy-statsmodels`\n",
+      "\n",
+      "## Execution behavior\n",
+      "\n",
+      "1. Start with a data quality audit:\n",
+      "   nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\n",
+      "2. Normalize and clean data using reproducible transformations.\n",
+      "3. Produce concise EDA:\n",
+      "   distributions, trends, segmentation, and relationship charts.\n",
+      "4. If prediction is requested, build a leakage-safe baseline model with validation metrics.\n",
+      "5. Explain findings in business terms:\n",
+      "   what changed, how much, and what action is implied.\n",
+      "6. End with caveats and next steps.\n",
+      "\n",
+      "## Output contract\n",
+      "\n",
+      "- `profile_summary`: row/column counts, missingness, type issues, and anomalies.\n",
+      "- `eda_insights`: ranked insights \n"
+     ]
+    }
+   ],
    "source": [
     "context = \"\\n\\n\".join(hit.card.instruction_text for hit in hits)\n",
     "\n",
@@ -120,6 +349,40 @@
     "print(context[:1000])  # prints first 1000 characters of combined instructions"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "9f832f4c",
+   "metadata": {},
+   "source": [
+    "### Sample Output\n",
+    "\n",
+    "```text\n",
+    "USER QUERY:\n",
+    "clean messy sales data and generate charts\n",
+    "\n",
+    "RETRIEVED CONTEXT:\n",
+    "# Data Analyst Role Expert\n",
+    "\n",
+    "Use this role when the request needs end-to-end tabular analysis:\n",
+    "data profiling, cleaning, exploratory analysis, baseline modeling,\n",
+    "and clear visual communication.\n",
+    "\n",
+    "Allowed expert dependencies:\n",
+    "- data.pandas-advanced\n",
+    "- data.sql-queries\n",
+    "- viz.matplotlib-seaborn\n",
+    "- ml.sklearn-modeling\n",
+    "- stats.scipy-statsmodels\n",
+    "\n",
+    "Execution behavior:\n",
+    "1. Perform data quality audit\n",
+    "2. Normalize and clean data\n",
+    "3. Produce EDA with charts\n",
+    "4. Build baseline model if required\n",
+    "5. Explain findings in business terms\n",
+    "```"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "f232afa3",
@@ -134,10 +397,70 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
    "id": "08b5202d",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Query: clean messy sales data\n",
+      "# Data Analyst Role Expert\n",
+      "\n",
+      "Use this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\n",
+      "\n",
+      "## Allowed expert dependencies\n",
+      "\n",
+      "- `data.pandas-advanced`\n",
+      "- `data.sql-queries`\n",
+      "- `viz.matplotlib-seaborn`\n",
+      "- `ml.sklearn-modeling`\n",
+      "- `stats.scipy-statsmodels`\n",
+      "\n",
+      "## Execution behavior\n",
+      "\n",
+      "1. Start with a data quality audit:\n",
+      "   nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\n",
+      "2. Normaliz\n",
+      "--------------------------------------------------\n",
+      "Query: generate charts for sales\n",
+      "# Slide Creation Expert (PPTX)\n",
+      "\n",
+      "Use this expert for executive summaries, project updates, and narrative decks.\n",
+      "\n",
+      "## Execution behavior\n",
+      "\n",
+      "1. Derive a slide storyline first (problem, analysis, findings, actions).\n",
+      "2. Allocate one key message per slide and keep text concise.\n",
+      "3. Generate PPTX using `python-pptx` with consistent templates.\n",
+      "4. Embed charts/tables as visuals instead of dense paragraphs.\n",
+      "5. Export final deck and register slide artifact with version/date.\n",
+      "\n",
+      "## Output contract\n",
+      "\n",
+      "- Include titl\n",
+      "--------------------------------------------------\n",
+      "Query: summarize findings\n",
+      "# Slide Creation Expert (PPTX)\n",
+      "\n",
+      "Use this expert for executive summaries, project updates, and narrative decks.\n",
+      "\n",
+      "## Execution behavior\n",
+      "\n",
+      "1. Derive a slide storyline first (problem, analysis, findings, actions).\n",
+      "2. Allocate one key message per slide and keep text concise.\n",
+      "3. Generate PPTX using `python-pptx` with consistent templates.\n",
+      "4. Embed charts/tables as visuals instead of dense paragraphs.\n",
+      "5. Export final deck and register slide artifact with version/date.\n",
+      "\n",
+      "## Output contract\n",
+      "\n",
+      "- Include titl\n",
+      "--------------------------------------------------\n"
+     ]
+    }
+   ],
    "source": [
     "queries = [\n",
     "    \"clean messy sales data\",\n",
@@ -152,6 +475,65 @@
     "    print(context[:500])\n",
     "    print(\"-\"*50)"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bbabd1f3",
+   "metadata": {},
+   "source": [
+    "### Sample Output\n",
+    "\n",
+    "---\n",
+    "\n",
+    "## Query: clean messy sales data\n",
+    "\n",
+    "### Retrieved Expert: Data Analyst Role Expert\n",
+    "\n",
+    "Use this role when the request needs end-to-end tabular analysis:\n",
+    "data profiling, cleaning, exploratory analysis, baseline modeling,\n",
+    "and clear visual communication.\n",
+    "\n",
+    "#### Allowed Expert Dependencies\n",
+    "- data.pandas-advanced\n",
+    "- data.sql-queries\n",
+    "- viz.matplotlib-seaborn\n",
+    "- ml.sklearn-modeling\n",
+    "- stats.scipy-statsmodels\n",
+    "\n",
+    "#### Execution Behavior\n",
+    "1. Start with a data quality audit:\n",
+    "   - Null values\n",
+    "   - Data types\n",
+    "   - Duplicates\n",
+    "   - Outliers\n",
+    "   - Key integrity\n",
+    "   - Temporal coverage\n",
+    "2. Normalize and clean data\n",
+    "3. Produce exploratory data analysis (EDA)\n",
+    "4. Build baseline model if required\n",
+    "5. Explain findings in business terms\n",
+    "\n",
+    "---\n",
+    "\n",
+    "## Query: generate charts for sales\n",
+    "\n",
+    "### Retrieved Expert: Slide Creation Expert (PPTX)\n",
+    "\n",
+    "Use this expert for executive summaries, project updates,\n",
+    "and narrative decks.\n",
+    "\n",
+    "#### Execution Behavior\n",
+    "- Build structured slide decks\n",
+    "- Embed charts and visuals\n",
+    "- Export PPTX file\n",
+    "\n",
+    "#### Output Contract\n",
+    "- Title slide\n",
+    "- Evidence slides with charts\n",
+    "- Recommendation slide\n",
+    "\n",
+    "---"
+   ]
   }
  ],
  "metadata": {