From 8a7e9887b88d99b48abec52121d7137e99a11899 Mon Sep 17 00:00:00 2001 From: prajwal Date: Sun, 1 Mar 2026 13:00:02 +0530 Subject: [PATCH 1/6] Add getting-started notebook demonstrating full SkillMesh workflow --- examples/notebooks/getting-started.ipynb | 208 +++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 examples/notebooks/getting-started.ipynb diff --git a/examples/notebooks/getting-started.ipynb b/examples/notebooks/getting-started.ipynb new file mode 100644 index 0000000..67f0d27 --- /dev/null +++ b/examples/notebooks/getting-started.ipynb @@ -0,0 +1,208 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "816d4bbf", + "metadata": {}, + "source": [ + "# SkillMesh Getting Started\n", + "\n", + "This notebook demonstrates:\n", + "\n", + "- Loading a registry\n", + "- Retrieving top-K expert cards\n", + "- Emitting provider-ready context\n", + "- Using it in a mock agent loop" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d7480e85", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "92" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from skill_registry_rag.registry import load_registry\n", + "\n", + "registry = load_registry(\"../registry/tools.json\")\n", + "\n", + "len(registry)" + ] + }, + { + "cell_type": "markdown", + "id": "443289f7", + "metadata": {}, + "source": [ + "## 2. Retrieve Top-K Expert Cards" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3caa3116", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[RetrievalHit(card=ToolCard(id='role.data-analyst', title='Data Analyst Role Orchestrator', domain='role_orchestrator', instruction_file='roles/data-analyst.md', description='Role expert that orchestrates profiling, cleaning, EDA, baseline modeling, and visualization for tabular analytics tasks.', tags=['role', 'data-analyst', 'eda', 'pandas', 'visualization', 'baseline-modeling'], tool_hints=['data.pandas-advanced', 'data.sql-queries', 'viz.matplotlib-seaborn', 'ml.sklearn-modeling', 'stats.scipy-statsmodels'], examples=['Profile messy dataset, clean it, and deliver insight dashboard', 'Run EDA plus baseline prediction with leakage-safe validation'], aliases=['role-data-analyst', 'analytics-orchestrator'], dependencies=['data.pandas-advanced', 'data.sql-queries', 'viz.matplotlib-seaborn', 'ml.sklearn-modeling', 'stats.scipy-statsmodels'], output_artifacts=['data_profile_summary', 'eda_findings', 'visual_report', 'baseline_model_report'], quality_checks=['missingness_and_dtype_audit_completed', 'insights_backed_by_numeric_evidence', 'charts_have_labels_units_and_titles'], constraints=['no_causal_claims_from_correlation_only', 'use_only_allowed_dependencies_unless_explicitly_authorized'], input_contract={'required': 'tabular dataset and analysis objective', 'optional': 'target metric, business context, and prediction requirement'}, risk_level='medium', maturity='beta', metadata={'provider_support': ['codex', 'claude'], 'owner': 'community', 'catalog_tier': 'roles', 'instruction_version': 'v1'}, instruction_text='# Data Analyst Role Expert\\n\\nUse this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\\n\\n## Allowed expert dependencies\\n\\n- `data.pandas-advanced`\\n- `data.sql-queries`\\n- `viz.matplotlib-seaborn`\\n- `ml.sklearn-modeling`\\n- `stats.scipy-statsmodels`\\n\\n## Execution behavior\\n\\n1. Start with a data quality audit:\\n nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\\n2. Normalize and clean data using reproducible transformations.\\n3. Produce concise EDA:\\n distributions, trends, segmentation, and relationship charts.\\n4. If prediction is requested, build a leakage-safe baseline model with validation metrics.\\n5. Explain findings in business terms:\\n what changed, how much, and what action is implied.\\n6. End with caveats and next steps.\\n\\n## Output contract\\n\\n- `profile_summary`: row/column counts, missingness, type issues, and anomalies.\\n- `eda_insights`: ranked insights with numeric evidence.\\n- `visuals`: labeled plots with clear units and titles.\\n- `model_section` (optional): baseline model, metrics, and limitations.\\n- `repro_steps`: commands/notebook steps to reproduce.\\n\\n## Guardrails\\n\\n- Do not skip data validation before insights.\\n- Do not claim causality from correlation.\\n- Do not use tools outside allowed dependencies unless explicitly approved.'), score=1.0, sparse_score=1.0, dense_score=None),\n", + " RetrievalHit(card=ToolCard(id='docs.slides-pptx', title='Slide Deck Creation (PPTX)', domain='presentation_generation', instruction_file='instructions/slide-creation.md', description='Narrative deck creation with PPTX output and chart integration.', tags=['slides', 'pptx', 'python-pptx', 'presentation', 'storytelling'], tool_hints=['python-pptx', 'matplotlib', 'pillow'], examples=['Executive summary deck', 'Findings deck with visuals'], aliases=['deck-generation', 'slide-authoring'], dependencies=['python-pptx', 'pillow', 'matplotlib'], output_artifacts=['pptx_deck', 'slide_outline', 'speaker_notes_stub'], quality_checks=['title_and_recommendation_slides_present', 'visual_hierarchy_consistent'], constraints=['one_core_message_per_slide'], input_contract={'required': 'structured storyline or slide outline', 'optional': 'brand template and theme colors'}, risk_level='low', maturity='stable', metadata={'provider_support': ['codex', 'claude'], 'install_extra': 'slides'}, instruction_text='# Slide Creation Expert (PPTX)\\n\\nUse this expert for executive summaries, project updates, and narrative decks.\\n\\n## Execution behavior\\n\\n1. Derive a slide storyline first (problem, analysis, findings, actions).\\n2. Allocate one key message per slide and keep text concise.\\n3. Generate PPTX using `python-pptx` with consistent templates.\\n4. Embed charts/tables as visuals instead of dense paragraphs.\\n5. Export final deck and register slide artifact with version/date.\\n\\n## Output contract\\n\\n- Include title slide, evidence slides, and recommendation slide.\\n- Keep visual hierarchy consistent across slides.\\n- Add speaker-note placeholders for critical assumptions.\\n- Ensure deck can be opened in standard PowerPoint clients.'), score=0.667872965335846, sparse_score=0.667872965335846, dense_score=None),\n", + " RetrievalHit(card=ToolCard(id='ml.gradient-boosting', title='Gradient Boosting with XGBoost and LightGBM', domain='machine_learning', instruction_file='instructions/gradient-boosting-xgb-lgbm.md', description='High-performance boosting, tuning, and SHAP-based attribution.', tags=['xgboost', 'lightgbm', 'catboost', 'shap', 'boosting'], tool_hints=['xgboost', 'lightgbm', 'catboost', 'shap'], examples=['Tune XGBoost with early stopping', 'Compare LightGBM vs CatBoost'], aliases=['gbm', 'tree-boosting'], dependencies=['xgboost', 'lightgbm', 'catboost', 'shap', 'scikit-learn'], output_artifacts=['booster_model', 'feature_importance', 'shap_summary'], quality_checks=['early_stopping_enabled', 'class_imbalance_strategy', 'feature_leakage_review'], constraints=['consistent_validation_metric_across_models'], input_contract={'required': 'tabular train/validation datasets', 'optional': 'class weights and monotonic constraints'}, risk_level='medium', maturity='stable', metadata={'provider_support': ['codex', 'claude'], 'install_extra': 'boosting'}, instruction_text=\"# Gradient Boosting Expert (XGBoost / LightGBM / CatBoost)\\n\\nUse this expert for high-performance tabular modeling with gradient boosting frameworks, including classification, regression, and ranking tasks.\\n\\n## When to use this expert\\n- The task involves structured/tabular data where tree-based models are expected to excel.\\n- The user needs state-of-the-art predictive performance with feature attribution.\\n- Early stopping, hyperparameter tuning, or framework comparison is required.\\n- SHAP-based model interpretation or class imbalance handling is requested.\\n\\n## Execution behavior\\n\\n1. Build a clean train/validation/test split with leakage controls. For temporal data, split by time. For classification, use stratified splits to preserve class ratios.\\n2. Configure early stopping with a patience of 20-50 rounds on the validation set. Pass `eval_set` (XGBoost/LightGBM) or `eval_set` (CatBoost) explicitly.\\n3. Start with sensible defaults: `learning_rate=0.05`, `max_depth=6`, `n_estimators=2000` (relying on early stopping to find the right count). Tune in this order: (a) `n_estimators` via early stopping, (b) `max_depth` and `min_child_weight`, (c) `subsample` and `colsample_bytree`, (d) regularization (`reg_alpha`, `reg_lambda`), (e) `learning_rate` reduction with proportional `n_estimators` increase.\\n4. For multi-framework comparison, hold folds constant (pass the same `cv` splitter) and use identical metric definitions. Report results in a comparison table.\\n5. Compute SHAP values using `shap.TreeExplainer` for global and local feature attribution. Generate summary plots, dependence plots for top features, and force plots for individual predictions when interpretability is requested.\\n6. Handle class imbalance with `scale_pos_weight` (XGBoost), `is_unbalance` (LightGBM), or `auto_class_weights` (CatBoost). Compare against SMOTE-in-pipeline only if simple weighting underperforms.\\n7. For categorical features, prefer LightGBM or CatBoost native categorical handling over one-hot encoding when cardinality > 10.\\n8. Save the final model with native `.save_model()` format and record hyperparameters, best iteration, and validation metric in metadata.\\n\\n## Decision tree\\n- If dataset has > 100k rows and many categorical features -> prefer LightGBM for speed; use CatBoost if categoricals have high cardinality and natural ordering is absent.\\n- If dataset is small (< 5k rows) -> reduce `max_depth` to 3-4 and increase regularization to prevent overfitting; consider whether a simpler sklearn model might suffice.\\n- If the task is ranking -> use `XGBRanker` or `LGBMRanker` with `lambdarank` objective.\\n- If feature interactions matter for explanation -> use SHAP interaction values, not just main-effect importance.\\n- If prediction latency is critical -> export to ONNX or use LightGBM's `predict_disable_shape_check` for faster inference.\\n- If reproducibility is mandatory -> pin `random_state` in the booster AND the data split, and record library version.\\n\\n## Anti-patterns\\n- NEVER set `n_estimators` to a fixed value without early stopping. This either underfits or overfits by construction.\\n- NEVER tune hyperparameters on the test set. Use a validation set or inner cross-validation; the test set is touched exactly once.\\n- NEVER compare frameworks with different preprocessing (e.g., one-hot for XGBoost but native categoricals for CatBoost) and call it a fair comparison.\\n- NEVER ignore the `best_iteration` attribute after early stopping. Predictions must use `best_iteration` to avoid including over-trained trees.\\n- NEVER rely solely on `feature_importances_` (gain-based) for feature selection. Gain importance is biased toward high-cardinality and correlated features.\\n\\n## Common mistakes\\n- Using `eval_metric` that does not match the business objective (e.g., `logloss` for early stopping but reporting `F1`).\\n- Forgetting to pass `categorical_feature` to LightGBM, causing it to treat integer-encoded categoricals as continuous.\\n- Setting `scale_pos_weight` AND applying SMOTE simultaneously, which double-corrects for imbalance.\\n- Running SHAP on the training set instead of the validation/test set, which inflates apparent feature relevance.\\n- Not setting `verbosity=0` or `verbose=-1` during hyperparameter search, flooding logs with thousands of training lines.\\n- Using `pickle` instead of the framework's native `.save_model()`, which breaks across library version upgrades.\\n\\n## Output contract\\n- Report best hyperparameters, best iteration number, and validation metric trajectory (or at minimum start/best/final values).\\n- Include the class-imbalance strategy used and its rationale.\\n- Provide SHAP summary plots or feature importance rankings with the method explicitly named.\\n- Never report train-only metrics as final performance. Always include validation or test metrics.\\n- Record the framework name and version (e.g., `xgboost==2.0.3`) in artifact metadata.\\n- If multiple frameworks were compared, include a side-by-side metric table with identical folds.\\n- Save the model in native format alongside a JSON metadata sidecar.\\n\\n## Composability hints\\n- Before this expert -> use the **Data Cleaning Expert** for null handling and type coercion. Gradient boosters handle NaNs natively (XGBoost, LightGBM) but benefit from clean categoricals.\\n- Before this expert -> use the **Scikit-learn Modeling Expert** if a quick linear baseline is needed for comparison.\\n- After this expert -> use the **Visualization Expert** to plot SHAP summaries, learning curves, or metric comparisons.\\n- After this expert -> use the **Machine Learning Export Expert** to convert the model to ONNX or package it for serving.\\n- Related -> the **Statistics Expert** for post-hoc significance tests when comparing model performance across folds.\"), score=0.6425158381462097, sparse_score=0.6425158381462097, dense_score=None)]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from skill_registry_rag.retriever import SkillRetriever\n", + "\n", + "retriever = SkillRetriever(registry)\n", + "\n", + "results = retriever.retrieve(\n", + " query=\"clean messy sales data and generate charts\",\n", + " top_k=3\n", + ")\n", + "\n", + "results" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "47c4c7a6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# Data Analyst Role Expert\n", + "\n", + "Use this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\n", + "\n", + "## Allowed expert dependencies\n", + "\n", + "- `data.pandas-advanced`\n", + "- `data.sql-queries`\n", + "- `viz.matplotlib-seaborn`\n", + "- `ml.sklearn-modeling`\n", + "- `stats.scipy-statsmodels`\n", + "\n", + "## Execution behavior\n", + "\n", + "1. Start with a data quality audit:\n", + " nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\n", + "2. Normalize and clean data using reproducible transformations.\n", + "3. Produce concise EDA:\n", + " distributions, trends, segmentation, and relationship charts.\n", + "4. If prediction is requested, build a leakage-safe baseline model with validation metrics.\n", + "5. Explain findings in business terms:\n", + " what changed, how much, and what action is implied.\n", + "6. End with caveats and next steps.\n", + "\n", + "## Output contract\n", + "\n", + "- `profile_summary`: row/column counts, missingness, type issues, and anomalies.\n", + "- `eda_insights`: ranked insights \n" + ] + } + ], + "source": [ + "context = \"\\n\\n\".join(\n", + " hit.card.instruction_text for hit in results\n", + ")\n", + "\n", + "print(context[:1000])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7b898caf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "USER QUERY:\n", + "clean messy sales data and generate charts\n", + "\n", + "RETRIEVED CONTEXT:\n", + "# Data Analyst Role Expert\n", + "\n", + "Use this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\n", + "\n", + "## Allowed expert dependencies\n", + "\n", + "- `data.pandas-advanced`\n", + "- `data.sql-queries`\n", + "- `viz.matplotlib-seaborn`\n", + "- `ml.sklearn-modeling`\n", + "- `stats.scipy-statsmodels`\n", + "\n", + "## Execution behavior\n", + "\n", + "1. Start with a data quality audit:\n", + " nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\n", + "2. Normalize and clean data using reproducible transformations.\n", + "3. Produce concise EDA:\n", + " distributions, trends, segmentation, and relationship charts.\n", + "4. If prediction is requested, build a leakage-safe baseline model with validation metrics.\n", + "5. Explain findings in business terms:\n", + " what changed, how much, \n" + ] + } + ], + "source": [ + "user_query = \"clean messy sales data and generate charts\"\n", + "\n", + "retriever = SkillRetriever(registry)\n", + "hits = retriever.retrieve(user_query, top_k=3)\n", + "\n", + "context = \"\\n\\n\".join(hit.card.instruction_text for hit in hits)\n", + "\n", + "print(\"USER QUERY:\")\n", + "print(user_query)\n", + "\n", + "print(\"\\nRETRIEVED CONTEXT:\")\n", + "print(context[:800])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From c1e708f2c70393cc1f99a2645cddd3934bda61ab Mon Sep 17 00:00:00 2001 From: prajwal Date: Sun, 1 Mar 2026 16:00:46 +0530 Subject: [PATCH 2/6] Fix notebook: renumber mock agent loop to section 4 and add markdown explanations --- examples/notebooks/getting-started.ipynb | 195 ++++++++++++++++++----- 1 file changed, 152 insertions(+), 43 deletions(-) diff --git a/examples/notebooks/getting-started.ipynb b/examples/notebooks/getting-started.ipynb index 67f0d27..3ead37d 100644 --- a/examples/notebooks/getting-started.ipynb +++ b/examples/notebooks/getting-started.ipynb @@ -2,22 +2,65 @@ "cells": [ { "cell_type": "markdown", - "id": "816d4bbf", + "id": "b74f6b23", "metadata": {}, "source": [ "# SkillMesh Getting Started\n", "\n", - "This notebook demonstrates:\n", + "This notebook walks through the full SkillMesh workflow:\n", "\n", - "- Loading a registry\n", - "- Retrieving top-K expert cards\n", - "- Emitting provider-ready context\n", - "- Using it in a mock agent loop" + "1. Install SkillMesh dependencies \n", + "2. Load the registry \n", + "3. Retrieve top-K expert cards \n", + "4. Emit provider-ready context \n", + "5. Demonstrate a simple mock agent loop" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 52, + "id": "a02b3ef1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Obtaining file:///E:/ForOpensource/SkillMesh/examples/notebooks\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ERROR: file:///E:/ForOpensource/SkillMesh/examples/notebooks does not appear to be a Python project: neither 'setup.py' nor 'pyproject.toml' found.\n", + "\n", + "[notice] A new release of pip is available: 23.2.1 -> 26.0.1\n", + "[notice] To update, run: python.exe -m pip install --upgrade pip\n" + ] + } + ], + "source": [ + "# Install SkillMesh dependencies\n", + "!pip install -e ." + ] + }, + { + "cell_type": "markdown", + "id": "0f34646f", + "metadata": {}, + "source": [ + "## 1.Load SkillMesh Registry\n", + "\n", + "This cell loads the SkillMesh registry, which contains all expert cards.\n", + "\n", + "- `load_registry(path)` loads the registry JSON file.\n", + "- `len(registry)` shows how many expert cards were loaded." + ] + }, + { + "cell_type": "code", + "execution_count": 53, "id": "d7480e85", "metadata": {}, "outputs": [ @@ -27,7 +70,7 @@ "92" ] }, - "execution_count": 1, + "execution_count": 53, "metadata": {}, "output_type": "execute_result" } @@ -48,9 +91,21 @@ "## 2. Retrieve Top-K Expert Cards" ] }, + { + "cell_type": "markdown", + "id": "4129236f", + "metadata": {}, + "source": [ + "2. Retrieve Top-K Expert Cards\n", + "\n", + "- `SkillRetriever(registry)` creates a retriever object from the loaded registry.\n", + "- `query` is the user request.\n", + "- `top_k=3` means we want the top 3 expert cards matching the query." + ] + }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 54, "id": "3caa3116", "metadata": {}, "outputs": [ @@ -62,7 +117,7 @@ " RetrievalHit(card=ToolCard(id='ml.gradient-boosting', title='Gradient Boosting with XGBoost and LightGBM', domain='machine_learning', instruction_file='instructions/gradient-boosting-xgb-lgbm.md', description='High-performance boosting, tuning, and SHAP-based attribution.', tags=['xgboost', 'lightgbm', 'catboost', 'shap', 'boosting'], tool_hints=['xgboost', 'lightgbm', 'catboost', 'shap'], examples=['Tune XGBoost with early stopping', 'Compare LightGBM vs CatBoost'], aliases=['gbm', 'tree-boosting'], dependencies=['xgboost', 'lightgbm', 'catboost', 'shap', 'scikit-learn'], output_artifacts=['booster_model', 'feature_importance', 'shap_summary'], quality_checks=['early_stopping_enabled', 'class_imbalance_strategy', 'feature_leakage_review'], constraints=['consistent_validation_metric_across_models'], input_contract={'required': 'tabular train/validation datasets', 'optional': 'class weights and monotonic constraints'}, risk_level='medium', maturity='stable', metadata={'provider_support': ['codex', 'claude'], 'install_extra': 'boosting'}, instruction_text=\"# Gradient Boosting Expert (XGBoost / LightGBM / CatBoost)\\n\\nUse this expert for high-performance tabular modeling with gradient boosting frameworks, including classification, regression, and ranking tasks.\\n\\n## When to use this expert\\n- The task involves structured/tabular data where tree-based models are expected to excel.\\n- The user needs state-of-the-art predictive performance with feature attribution.\\n- Early stopping, hyperparameter tuning, or framework comparison is required.\\n- SHAP-based model interpretation or class imbalance handling is requested.\\n\\n## Execution behavior\\n\\n1. Build a clean train/validation/test split with leakage controls. For temporal data, split by time. For classification, use stratified splits to preserve class ratios.\\n2. Configure early stopping with a patience of 20-50 rounds on the validation set. Pass `eval_set` (XGBoost/LightGBM) or `eval_set` (CatBoost) explicitly.\\n3. Start with sensible defaults: `learning_rate=0.05`, `max_depth=6`, `n_estimators=2000` (relying on early stopping to find the right count). Tune in this order: (a) `n_estimators` via early stopping, (b) `max_depth` and `min_child_weight`, (c) `subsample` and `colsample_bytree`, (d) regularization (`reg_alpha`, `reg_lambda`), (e) `learning_rate` reduction with proportional `n_estimators` increase.\\n4. For multi-framework comparison, hold folds constant (pass the same `cv` splitter) and use identical metric definitions. Report results in a comparison table.\\n5. Compute SHAP values using `shap.TreeExplainer` for global and local feature attribution. Generate summary plots, dependence plots for top features, and force plots for individual predictions when interpretability is requested.\\n6. Handle class imbalance with `scale_pos_weight` (XGBoost), `is_unbalance` (LightGBM), or `auto_class_weights` (CatBoost). Compare against SMOTE-in-pipeline only if simple weighting underperforms.\\n7. For categorical features, prefer LightGBM or CatBoost native categorical handling over one-hot encoding when cardinality > 10.\\n8. Save the final model with native `.save_model()` format and record hyperparameters, best iteration, and validation metric in metadata.\\n\\n## Decision tree\\n- If dataset has > 100k rows and many categorical features -> prefer LightGBM for speed; use CatBoost if categoricals have high cardinality and natural ordering is absent.\\n- If dataset is small (< 5k rows) -> reduce `max_depth` to 3-4 and increase regularization to prevent overfitting; consider whether a simpler sklearn model might suffice.\\n- If the task is ranking -> use `XGBRanker` or `LGBMRanker` with `lambdarank` objective.\\n- If feature interactions matter for explanation -> use SHAP interaction values, not just main-effect importance.\\n- If prediction latency is critical -> export to ONNX or use LightGBM's `predict_disable_shape_check` for faster inference.\\n- If reproducibility is mandatory -> pin `random_state` in the booster AND the data split, and record library version.\\n\\n## Anti-patterns\\n- NEVER set `n_estimators` to a fixed value without early stopping. This either underfits or overfits by construction.\\n- NEVER tune hyperparameters on the test set. Use a validation set or inner cross-validation; the test set is touched exactly once.\\n- NEVER compare frameworks with different preprocessing (e.g., one-hot for XGBoost but native categoricals for CatBoost) and call it a fair comparison.\\n- NEVER ignore the `best_iteration` attribute after early stopping. Predictions must use `best_iteration` to avoid including over-trained trees.\\n- NEVER rely solely on `feature_importances_` (gain-based) for feature selection. Gain importance is biased toward high-cardinality and correlated features.\\n\\n## Common mistakes\\n- Using `eval_metric` that does not match the business objective (e.g., `logloss` for early stopping but reporting `F1`).\\n- Forgetting to pass `categorical_feature` to LightGBM, causing it to treat integer-encoded categoricals as continuous.\\n- Setting `scale_pos_weight` AND applying SMOTE simultaneously, which double-corrects for imbalance.\\n- Running SHAP on the training set instead of the validation/test set, which inflates apparent feature relevance.\\n- Not setting `verbosity=0` or `verbose=-1` during hyperparameter search, flooding logs with thousands of training lines.\\n- Using `pickle` instead of the framework's native `.save_model()`, which breaks across library version upgrades.\\n\\n## Output contract\\n- Report best hyperparameters, best iteration number, and validation metric trajectory (or at minimum start/best/final values).\\n- Include the class-imbalance strategy used and its rationale.\\n- Provide SHAP summary plots or feature importance rankings with the method explicitly named.\\n- Never report train-only metrics as final performance. Always include validation or test metrics.\\n- Record the framework name and version (e.g., `xgboost==2.0.3`) in artifact metadata.\\n- If multiple frameworks were compared, include a side-by-side metric table with identical folds.\\n- Save the model in native format alongside a JSON metadata sidecar.\\n\\n## Composability hints\\n- Before this expert -> use the **Data Cleaning Expert** for null handling and type coercion. Gradient boosters handle NaNs natively (XGBoost, LightGBM) but benefit from clean categoricals.\\n- Before this expert -> use the **Scikit-learn Modeling Expert** if a quick linear baseline is needed for comparison.\\n- After this expert -> use the **Visualization Expert** to plot SHAP summaries, learning curves, or metric comparisons.\\n- After this expert -> use the **Machine Learning Export Expert** to convert the model to ONNX or package it for serving.\\n- Related -> the **Statistics Expert** for post-hoc significance tests when comparing model performance across folds.\"), score=0.6425158381462097, sparse_score=0.6425158381462097, dense_score=None)]" ] }, - "execution_count": 2, + "execution_count": 54, "metadata": {}, "output_type": "execute_result" } @@ -72,17 +127,26 @@ "\n", "retriever = SkillRetriever(registry)\n", "\n", - "results = retriever.retrieve(\n", - " query=\"clean messy sales data and generate charts\",\n", - " top_k=3\n", - ")\n", + "user_query = \"clean messy sales data and generate charts\"\n", + "hits = retriever.retrieve(user_query, top_k=3)\n", "\n", - "results" + "hits # shows top 3 relevant expert cards" + ] + }, + { + "cell_type": "markdown", + "id": "5c85573e", + "metadata": {}, + "source": [ + "## 3. Emit Context for LLM\n", + "\n", + "This step formats the instructions from the top-K retrieval results into a single context string\n", + "that could be passed to an AI model (like Claude or Codex) for answering the query." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 55, "id": "47c4c7a6", "metadata": {}, "outputs": [ @@ -90,6 +154,10 @@ "name": "stdout", "output_type": "stream", "text": [ + "USER QUERY:\n", + "clean messy sales data and generate charts\n", + "\n", + "RETRIEVED CONTEXT:\n", "# Data Analyst Role Expert\n", "\n", "Use this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\n", @@ -122,27 +190,38 @@ } ], "source": [ - "context = \"\\n\\n\".join(\n", - " hit.card.instruction_text for hit in results\n", - ")\n", + "context = \"\\n\\n\".join(hit.card.instruction_text for hit in hits)\n", + "\n", + "print(\"USER QUERY:\")\n", + "print(user_query)\n", "\n", - "print(context[:1000])" + "print(\"\\nRETRIEVED CONTEXT:\")\n", + "print(context[:1000]) # prints first 1000 characters of combined instructions" + ] + }, + { + "cell_type": "markdown", + "id": "f232afa3", + "metadata": {}, + "source": [ + "## 5 Mock Agent Loop\n", + "\n", + "- Simulates multiple queries in a loop. \n", + "- Retrieves top-K expert cards for each query. \n", + "- Shows how an LLM could consume this context iteratively." ] }, { "cell_type": "code", - "execution_count": 4, - "id": "7b898caf", + "execution_count": 56, + "id": "08b5202d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "USER QUERY:\n", - "clean messy sales data and generate charts\n", - "\n", - "RETRIEVED CONTEXT:\n", + "Query: clean messy sales data\n", "# Data Analyst Role Expert\n", "\n", "Use this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\n", @@ -159,28 +238,58 @@ "\n", "1. Start with a data quality audit:\n", " nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\n", - "2. Normalize and clean data using reproducible transformations.\n", - "3. Produce concise EDA:\n", - " distributions, trends, segmentation, and relationship charts.\n", - "4. If prediction is requested, build a leakage-safe baseline model with validation metrics.\n", - "5. Explain findings in business terms:\n", - " what changed, how much, \n" + "2. Normaliz\n", + "--------------------------------------------------\n", + "Query: generate charts for sales\n", + "# Slide Creation Expert (PPTX)\n", + "\n", + "Use this expert for executive summaries, project updates, and narrative decks.\n", + "\n", + "## Execution behavior\n", + "\n", + "1. Derive a slide storyline first (problem, analysis, findings, actions).\n", + "2. Allocate one key message per slide and keep text concise.\n", + "3. Generate PPTX using `python-pptx` with consistent templates.\n", + "4. Embed charts/tables as visuals instead of dense paragraphs.\n", + "5. Export final deck and register slide artifact with version/date.\n", + "\n", + "## Output contract\n", + "\n", + "- Include titl\n", + "--------------------------------------------------\n", + "Query: summarize findings\n", + "# Slide Creation Expert (PPTX)\n", + "\n", + "Use this expert for executive summaries, project updates, and narrative decks.\n", + "\n", + "## Execution behavior\n", + "\n", + "1. Derive a slide storyline first (problem, analysis, findings, actions).\n", + "2. Allocate one key message per slide and keep text concise.\n", + "3. Generate PPTX using `python-pptx` with consistent templates.\n", + "4. Embed charts/tables as visuals instead of dense paragraphs.\n", + "5. Export final deck and register slide artifact with version/date.\n", + "\n", + "## Output contract\n", + "\n", + "- Include titl\n", + "--------------------------------------------------\n" ] } ], "source": [ - "user_query = \"clean messy sales data and generate charts\"\n", - "\n", - "retriever = SkillRetriever(registry)\n", - "hits = retriever.retrieve(user_query, top_k=3)\n", + "queries = [\n", + " \"clean messy sales data\",\n", + " \"generate charts for sales\",\n", + " \"summarize findings\"\n", + "]\n", "\n", - "context = \"\\n\\n\".join(hit.card.instruction_text for hit in hits)\n", - "\n", - "print(\"USER QUERY:\")\n", - "print(user_query)\n", - "\n", - "print(\"\\nRETRIEVED CONTEXT:\")\n", - "print(context[:800])" + "for query in queries:\n", + " hits = retriever.retrieve(query, top_k=3)\n", + " context = \"\\n\\n\".join(hit.card.instruction_text for hit in hits)\n", + " print(f\"Query: {query}\")\n", + " print(context[:500])\n", + " print(\"-\"*50)" ] } ], From 4271142af8a359a2cf6958bf2a646e2a8b650a86 Mon Sep 17 00:00:00 2001 From: prajwal Date: Sun, 1 Mar 2026 16:43:25 +0530 Subject: [PATCH 3/6] Add Markdown cells and fix headings in getting-started notebook --- examples/notebooks/getting-started.ipynb | 146 +++++++++++++++++++---- 1 file changed, 123 insertions(+), 23 deletions(-) diff --git a/examples/notebooks/getting-started.ipynb b/examples/notebooks/getting-started.ipynb index 3ead37d..421ccd2 100644 --- a/examples/notebooks/getting-started.ipynb +++ b/examples/notebooks/getting-started.ipynb @@ -16,9 +16,19 @@ "5. Demonstrate a simple mock agent loop" ] }, + { + "cell_type": "markdown", + "id": "e8fc1b2b", + "metadata": {}, + "source": [ + "## 1. Install SkillMesh dependencies\n", + "\n", + "This cell installs the SkillMesh package from the local repository in editable mode." + ] + }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 5, "id": "a02b3ef1", "metadata": {}, "outputs": [ @@ -26,14 +36,112 @@ "name": "stdout", "output_type": "stream", "text": [ - "Obtaining file:///E:/ForOpensource/SkillMesh/examples/notebooks\n" + "Obtaining file:///E:/ForOpensource/SkillMesh\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Checking if build backend supports build_editable: started\n", + " Checking if build backend supports build_editable: finished with status 'done'\n", + " Getting requirements to build editable: started\n", + " Getting requirements to build editable: finished with status 'done'\n", + " Preparing editable metadata (pyproject.toml): started\n", + " Preparing editable metadata (pyproject.toml): finished with status 'done'\n", + "Requirement already satisfied: numpy>=1.24 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (2.4.2)\n", + "Requirement already satisfied: PyYAML>=6.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (6.0.3)\n", + "Requirement already satisfied: rank-bm25>=0.2.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (0.2.2)\n", + "Requirement already satisfied: jsonschema>=4.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (4.26.0)\n", + "Requirement already satisfied: chromadb>=0.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (1.5.2)\n", + "Requirement already satisfied: build>=1.0.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.4.0)\n", + "Requirement already satisfied: pydantic>=1.9 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (2.12.5)\n", + "Requirement already satisfied: pybase64>=1.4.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.4.3)\n", + "Requirement already satisfied: uvicorn[standard]>=0.18.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.41.0)\n", + "Requirement already satisfied: posthog<6.0.0,>=2.4.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (5.4.0)\n", + "Requirement already satisfied: typing-extensions>=4.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (4.15.0)\n", + "Requirement already satisfied: onnxruntime>=1.14.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.24.2)\n", + "Requirement already satisfied: opentelemetry-api>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n", + "Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n", + "Requirement already satisfied: opentelemetry-sdk>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n", + "Requirement already satisfied: tokenizers>=0.13.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.22.2)\n", + "Requirement already satisfied: pypika>=0.48.9 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.51.1)\n", + "Requirement already satisfied: tqdm>=4.65.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (4.67.3)\n", + "Requirement already satisfied: overrides>=7.3.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (7.7.0)\n", + "Requirement already satisfied: importlib-resources in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (6.5.2)\n", + "Requirement already satisfied: grpcio>=1.58.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.78.0)\n", + "Requirement already satisfied: bcrypt>=4.0.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (5.0.0)\n", + "Requirement already satisfied: typer>=0.9.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.24.1)\n", + "Requirement already satisfied: kubernetes>=28.1.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (35.0.0)\n", + "Requirement already satisfied: tenacity>=8.2.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (9.1.4)\n", + "Requirement already satisfied: mmh3>=4.0.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (5.2.0)\n", + "Requirement already satisfied: orjson>=3.9.12 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (3.11.7)\n", + "Requirement already satisfied: httpx>=0.27.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.28.1)\n", + "Requirement already satisfied: rich>=10.11.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (14.3.3)\n", + "Requirement already satisfied: attrs>=22.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (25.4.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (2025.9.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (0.37.0)\n", + "Requirement already satisfied: rpds-py>=0.25.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (0.30.0)\n", + "Requirement already satisfied: packaging>=24.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from build>=1.0.3->chromadb>=0.5.0->skillmesh==0.1.0) (26.0)\n", + "Requirement already satisfied: pyproject_hooks in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from build>=1.0.3->chromadb>=0.5.0->skillmesh==0.1.0) (1.2.0)\n", + "Requirement already satisfied: colorama in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from build>=1.0.3->chromadb>=0.5.0->skillmesh==0.1.0) (0.4.6)\n", + "Requirement already satisfied: anyio in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (4.12.1)\n", + "Requirement already satisfied: certifi in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (2026.2.25)\n", + "Requirement already satisfied: httpcore==1.* in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.0.9)\n", + "Requirement already satisfied: idna in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.11)\n", + "Requirement already satisfied: h11>=0.16 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpcore==1.*->httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.16.0)\n", + "Requirement already satisfied: six>=1.9.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.17.0)\n", + "Requirement already satisfied: python-dateutil>=2.5.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.9.0.post0)\n", + "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.9.0)\n", + "Requirement already satisfied: requests in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.32.5)\n", + "Requirement already satisfied: requests-oauthlib in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.0.0)\n", + "Requirement already satisfied: urllib3!=2.6.0,>=1.24.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.6.3)\n", + "Requirement already satisfied: durationpy>=0.7 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.10)\n", + "Requirement already satisfied: flatbuffers in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (25.12.19)\n", + "Requirement already satisfied: protobuf in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (6.33.5)\n", + "Requirement already satisfied: sympy in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (1.14.0)\n", + "Requirement already satisfied: importlib-metadata<8.8.0,>=6.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-api>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (8.7.1)\n", + "Requirement already satisfied: googleapis-common-protos~=1.57 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.72.0)\n", + "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.39.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n", + "Requirement already satisfied: opentelemetry-proto==1.39.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n", + "Requirement already satisfied: opentelemetry-semantic-conventions==0.60b1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-sdk>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.60b1)\n", + "Requirement already satisfied: backoff>=1.10.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from posthog<6.0.0,>=2.4.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.2.1)\n", + "Requirement already satisfied: distro>=1.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from posthog<6.0.0,>=2.4.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.9.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from pydantic>=1.9->chromadb>=0.5.0->skillmesh==0.1.0) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.41.5 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from pydantic>=1.9->chromadb>=0.5.0->skillmesh==0.1.0) (2.41.5)\n", + "Requirement already satisfied: typing-inspection>=0.4.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from pydantic>=1.9->chromadb>=0.5.0->skillmesh==0.1.0) (0.4.2)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from rich>=10.11.0->chromadb>=0.5.0->skillmesh==0.1.0) (4.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from rich>=10.11.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.19.2)\n", + "Requirement already satisfied: huggingface-hub<2.0,>=0.16.4 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (1.5.0)\n", + "Requirement already satisfied: click>=8.2.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from typer>=0.9.0->chromadb>=0.5.0->skillmesh==0.1.0) (8.3.1)\n", + "Requirement already satisfied: shellingham>=1.3.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from typer>=0.9.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.5.4)\n", + "Requirement already satisfied: annotated-doc>=0.0.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from typer>=0.9.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.0.4)\n", + "Requirement already satisfied: httptools>=0.6.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (0.7.1)\n", + "Requirement already satisfied: python-dotenv>=0.13 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (1.2.1)\n", + "Requirement already satisfied: watchfiles>=0.20 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (1.1.1)\n", + "Requirement already satisfied: websockets>=10.4 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (16.0)\n", + "Requirement already satisfied: filelock>=3.10.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (3.24.3)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (2026.2.0)\n", + "Requirement already satisfied: hf-xet<2.0.0,>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (1.3.2)\n", + "Requirement already satisfied: zipp>=3.20 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from importlib-metadata<8.8.0,>=6.0->opentelemetry-api>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.23.0)\n", + "Requirement already satisfied: mdurl~=0.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.1.2)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from requests->kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.4.4)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from requests-oauthlib->kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.3.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from sympy->onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (1.3.0)\n", + "Building wheels for collected packages: skillmesh\n", + " Building editable for skillmesh (pyproject.toml): started\n", + " Building editable for skillmesh (pyproject.toml): finished with status 'done'\n", + " Created wheel for skillmesh: filename=skillmesh-0.1.0-0.editable-py3-none-any.whl size=5534 sha256=5c7a3f4fb3c8a58a9891fdc3c5f2bd3930d5bc38eb9d46b6a6d82bb26722dca0\n", + " Stored in directory: C:\\Users\\DELL\\AppData\\Local\\Temp\\pip-ephem-wheel-cache-dm_bov34\\wheels\\26\\fc\\af\\1aa2362d8ccbc2ed9db0df865eeffcbf5130dc61876fdaf17b\n", + "Successfully built skillmesh\n", + "Installing collected packages: skillmesh\n", + " Attempting uninstall: skillmesh\n", + " Found existing installation: skillmesh 0.1.0\n", + " Uninstalling skillmesh-0.1.0:\n", + " Successfully uninstalled skillmesh-0.1.0\n", + "Successfully installed skillmesh-0.1.0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "ERROR: file:///E:/ForOpensource/SkillMesh/examples/notebooks does not appear to be a Python project: neither 'setup.py' nor 'pyproject.toml' found.\n", "\n", "[notice] A new release of pip is available: 23.2.1 -> 26.0.1\n", "[notice] To update, run: python.exe -m pip install --upgrade pip\n" @@ -41,8 +149,8 @@ } ], "source": [ - "# Install SkillMesh dependencies\n", - "!pip install -e ." + "# Install SkillMesh dependencies from root folder\n", + "!pip install -e ../.." ] }, { @@ -50,7 +158,7 @@ "id": "0f34646f", "metadata": {}, "source": [ - "## 1.Load SkillMesh Registry\n", + "## 2.Load the Registry\n", "\n", "This cell loads the SkillMesh registry, which contains all expert cards.\n", "\n", @@ -60,7 +168,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 6, "id": "d7480e85", "metadata": {}, "outputs": [ @@ -70,7 +178,7 @@ "92" ] }, - "execution_count": 53, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -83,20 +191,12 @@ "len(registry)" ] }, - { - "cell_type": "markdown", - "id": "443289f7", - "metadata": {}, - "source": [ - "## 2. Retrieve Top-K Expert Cards" - ] - }, { "cell_type": "markdown", "id": "4129236f", "metadata": {}, "source": [ - "2. Retrieve Top-K Expert Cards\n", + "## 3. Retrieve Top-K Expert Cards\n", "\n", "- `SkillRetriever(registry)` creates a retriever object from the loaded registry.\n", "- `query` is the user request.\n", @@ -105,7 +205,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 7, "id": "3caa3116", "metadata": {}, "outputs": [ @@ -117,7 +217,7 @@ " RetrievalHit(card=ToolCard(id='ml.gradient-boosting', title='Gradient Boosting with XGBoost and LightGBM', domain='machine_learning', instruction_file='instructions/gradient-boosting-xgb-lgbm.md', description='High-performance boosting, tuning, and SHAP-based attribution.', tags=['xgboost', 'lightgbm', 'catboost', 'shap', 'boosting'], tool_hints=['xgboost', 'lightgbm', 'catboost', 'shap'], examples=['Tune XGBoost with early stopping', 'Compare LightGBM vs CatBoost'], aliases=['gbm', 'tree-boosting'], dependencies=['xgboost', 'lightgbm', 'catboost', 'shap', 'scikit-learn'], output_artifacts=['booster_model', 'feature_importance', 'shap_summary'], quality_checks=['early_stopping_enabled', 'class_imbalance_strategy', 'feature_leakage_review'], constraints=['consistent_validation_metric_across_models'], input_contract={'required': 'tabular train/validation datasets', 'optional': 'class weights and monotonic constraints'}, risk_level='medium', maturity='stable', metadata={'provider_support': ['codex', 'claude'], 'install_extra': 'boosting'}, instruction_text=\"# Gradient Boosting Expert (XGBoost / LightGBM / CatBoost)\\n\\nUse this expert for high-performance tabular modeling with gradient boosting frameworks, including classification, regression, and ranking tasks.\\n\\n## When to use this expert\\n- The task involves structured/tabular data where tree-based models are expected to excel.\\n- The user needs state-of-the-art predictive performance with feature attribution.\\n- Early stopping, hyperparameter tuning, or framework comparison is required.\\n- SHAP-based model interpretation or class imbalance handling is requested.\\n\\n## Execution behavior\\n\\n1. Build a clean train/validation/test split with leakage controls. For temporal data, split by time. For classification, use stratified splits to preserve class ratios.\\n2. Configure early stopping with a patience of 20-50 rounds on the validation set. Pass `eval_set` (XGBoost/LightGBM) or `eval_set` (CatBoost) explicitly.\\n3. Start with sensible defaults: `learning_rate=0.05`, `max_depth=6`, `n_estimators=2000` (relying on early stopping to find the right count). Tune in this order: (a) `n_estimators` via early stopping, (b) `max_depth` and `min_child_weight`, (c) `subsample` and `colsample_bytree`, (d) regularization (`reg_alpha`, `reg_lambda`), (e) `learning_rate` reduction with proportional `n_estimators` increase.\\n4. For multi-framework comparison, hold folds constant (pass the same `cv` splitter) and use identical metric definitions. Report results in a comparison table.\\n5. Compute SHAP values using `shap.TreeExplainer` for global and local feature attribution. Generate summary plots, dependence plots for top features, and force plots for individual predictions when interpretability is requested.\\n6. Handle class imbalance with `scale_pos_weight` (XGBoost), `is_unbalance` (LightGBM), or `auto_class_weights` (CatBoost). Compare against SMOTE-in-pipeline only if simple weighting underperforms.\\n7. For categorical features, prefer LightGBM or CatBoost native categorical handling over one-hot encoding when cardinality > 10.\\n8. Save the final model with native `.save_model()` format and record hyperparameters, best iteration, and validation metric in metadata.\\n\\n## Decision tree\\n- If dataset has > 100k rows and many categorical features -> prefer LightGBM for speed; use CatBoost if categoricals have high cardinality and natural ordering is absent.\\n- If dataset is small (< 5k rows) -> reduce `max_depth` to 3-4 and increase regularization to prevent overfitting; consider whether a simpler sklearn model might suffice.\\n- If the task is ranking -> use `XGBRanker` or `LGBMRanker` with `lambdarank` objective.\\n- If feature interactions matter for explanation -> use SHAP interaction values, not just main-effect importance.\\n- If prediction latency is critical -> export to ONNX or use LightGBM's `predict_disable_shape_check` for faster inference.\\n- If reproducibility is mandatory -> pin `random_state` in the booster AND the data split, and record library version.\\n\\n## Anti-patterns\\n- NEVER set `n_estimators` to a fixed value without early stopping. This either underfits or overfits by construction.\\n- NEVER tune hyperparameters on the test set. Use a validation set or inner cross-validation; the test set is touched exactly once.\\n- NEVER compare frameworks with different preprocessing (e.g., one-hot for XGBoost but native categoricals for CatBoost) and call it a fair comparison.\\n- NEVER ignore the `best_iteration` attribute after early stopping. Predictions must use `best_iteration` to avoid including over-trained trees.\\n- NEVER rely solely on `feature_importances_` (gain-based) for feature selection. Gain importance is biased toward high-cardinality and correlated features.\\n\\n## Common mistakes\\n- Using `eval_metric` that does not match the business objective (e.g., `logloss` for early stopping but reporting `F1`).\\n- Forgetting to pass `categorical_feature` to LightGBM, causing it to treat integer-encoded categoricals as continuous.\\n- Setting `scale_pos_weight` AND applying SMOTE simultaneously, which double-corrects for imbalance.\\n- Running SHAP on the training set instead of the validation/test set, which inflates apparent feature relevance.\\n- Not setting `verbosity=0` or `verbose=-1` during hyperparameter search, flooding logs with thousands of training lines.\\n- Using `pickle` instead of the framework's native `.save_model()`, which breaks across library version upgrades.\\n\\n## Output contract\\n- Report best hyperparameters, best iteration number, and validation metric trajectory (or at minimum start/best/final values).\\n- Include the class-imbalance strategy used and its rationale.\\n- Provide SHAP summary plots or feature importance rankings with the method explicitly named.\\n- Never report train-only metrics as final performance. Always include validation or test metrics.\\n- Record the framework name and version (e.g., `xgboost==2.0.3`) in artifact metadata.\\n- If multiple frameworks were compared, include a side-by-side metric table with identical folds.\\n- Save the model in native format alongside a JSON metadata sidecar.\\n\\n## Composability hints\\n- Before this expert -> use the **Data Cleaning Expert** for null handling and type coercion. Gradient boosters handle NaNs natively (XGBoost, LightGBM) but benefit from clean categoricals.\\n- Before this expert -> use the **Scikit-learn Modeling Expert** if a quick linear baseline is needed for comparison.\\n- After this expert -> use the **Visualization Expert** to plot SHAP summaries, learning curves, or metric comparisons.\\n- After this expert -> use the **Machine Learning Export Expert** to convert the model to ONNX or package it for serving.\\n- Related -> the **Statistics Expert** for post-hoc significance tests when comparing model performance across folds.\"), score=0.6425158381462097, sparse_score=0.6425158381462097, dense_score=None)]" ] }, - "execution_count": 54, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -138,7 +238,7 @@ "id": "5c85573e", "metadata": {}, "source": [ - "## 3. Emit Context for LLM\n", + "## 4. Emit provider-ready context \n", "\n", "This step formats the instructions from the top-K retrieval results into a single context string\n", "that could be passed to an AI model (like Claude or Codex) for answering the query." @@ -146,7 +246,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 8, "id": "47c4c7a6", "metadata": {}, "outputs": [ @@ -204,7 +304,7 @@ "id": "f232afa3", "metadata": {}, "source": [ - "## 5 Mock Agent Loop\n", + "## 5. Demonstrate a simple mock agent loop\n", "\n", "- Simulates multiple queries in a loop. \n", "- Retrieves top-K expert cards for each query. \n", @@ -213,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 9, "id": "08b5202d", "metadata": {}, "outputs": [ From f8cad68c7ca91b4ad26d865fe1a3d8d2f187ba7b Mon Sep 17 00:00:00 2001 From: prajwal Date: Sun, 1 Mar 2026 19:07:02 +0530 Subject: [PATCH 4/6] Fix notebook headers to match intro numbering --- examples/notebooks/getting-started.ipynb | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/examples/notebooks/getting-started.ipynb b/examples/notebooks/getting-started.ipynb index 421ccd2..6f2aace 100644 --- a/examples/notebooks/getting-started.ipynb +++ b/examples/notebooks/getting-started.ipynb @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "id": "a02b3ef1", "metadata": {}, "outputs": [ @@ -127,8 +127,8 @@ "Building wheels for collected packages: skillmesh\n", " Building editable for skillmesh (pyproject.toml): started\n", " Building editable for skillmesh (pyproject.toml): finished with status 'done'\n", - " Created wheel for skillmesh: filename=skillmesh-0.1.0-0.editable-py3-none-any.whl size=5534 sha256=5c7a3f4fb3c8a58a9891fdc3c5f2bd3930d5bc38eb9d46b6a6d82bb26722dca0\n", - " Stored in directory: C:\\Users\\DELL\\AppData\\Local\\Temp\\pip-ephem-wheel-cache-dm_bov34\\wheels\\26\\fc\\af\\1aa2362d8ccbc2ed9db0df865eeffcbf5130dc61876fdaf17b\n", + " Created wheel for skillmesh: filename=skillmesh-0.1.0-0.editable-py3-none-any.whl size=5534 sha256=6dfabd1ecad61c06188c5aa448167818c7e5ee7490ef689be1230fcc4dfc2684\n", + " Stored in directory: C:\\Users\\DELL\\AppData\\Local\\Temp\\pip-ephem-wheel-cache-w74lwmao\\wheels\\26\\fc\\af\\1aa2362d8ccbc2ed9db0df865eeffcbf5130dc61876fdaf17b\n", "Successfully built skillmesh\n", "Installing collected packages: skillmesh\n", " Attempting uninstall: skillmesh\n", @@ -158,7 +158,7 @@ "id": "0f34646f", "metadata": {}, "source": [ - "## 2.Load the Registry\n", + "## 2. Load SkillMesh Registry\n", "\n", "This cell loads the SkillMesh registry, which contains all expert cards.\n", "\n", @@ -168,7 +168,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 2, "id": "d7480e85", "metadata": {}, "outputs": [ @@ -178,7 +178,7 @@ "92" ] }, - "execution_count": 6, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -205,7 +205,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 3, "id": "3caa3116", "metadata": {}, "outputs": [ @@ -217,7 +217,7 @@ " RetrievalHit(card=ToolCard(id='ml.gradient-boosting', title='Gradient Boosting with XGBoost and LightGBM', domain='machine_learning', instruction_file='instructions/gradient-boosting-xgb-lgbm.md', description='High-performance boosting, tuning, and SHAP-based attribution.', tags=['xgboost', 'lightgbm', 'catboost', 'shap', 'boosting'], tool_hints=['xgboost', 'lightgbm', 'catboost', 'shap'], examples=['Tune XGBoost with early stopping', 'Compare LightGBM vs CatBoost'], aliases=['gbm', 'tree-boosting'], dependencies=['xgboost', 'lightgbm', 'catboost', 'shap', 'scikit-learn'], output_artifacts=['booster_model', 'feature_importance', 'shap_summary'], quality_checks=['early_stopping_enabled', 'class_imbalance_strategy', 'feature_leakage_review'], constraints=['consistent_validation_metric_across_models'], input_contract={'required': 'tabular train/validation datasets', 'optional': 'class weights and monotonic constraints'}, risk_level='medium', maturity='stable', metadata={'provider_support': ['codex', 'claude'], 'install_extra': 'boosting'}, instruction_text=\"# Gradient Boosting Expert (XGBoost / LightGBM / CatBoost)\\n\\nUse this expert for high-performance tabular modeling with gradient boosting frameworks, including classification, regression, and ranking tasks.\\n\\n## When to use this expert\\n- The task involves structured/tabular data where tree-based models are expected to excel.\\n- The user needs state-of-the-art predictive performance with feature attribution.\\n- Early stopping, hyperparameter tuning, or framework comparison is required.\\n- SHAP-based model interpretation or class imbalance handling is requested.\\n\\n## Execution behavior\\n\\n1. Build a clean train/validation/test split with leakage controls. For temporal data, split by time. For classification, use stratified splits to preserve class ratios.\\n2. Configure early stopping with a patience of 20-50 rounds on the validation set. Pass `eval_set` (XGBoost/LightGBM) or `eval_set` (CatBoost) explicitly.\\n3. Start with sensible defaults: `learning_rate=0.05`, `max_depth=6`, `n_estimators=2000` (relying on early stopping to find the right count). Tune in this order: (a) `n_estimators` via early stopping, (b) `max_depth` and `min_child_weight`, (c) `subsample` and `colsample_bytree`, (d) regularization (`reg_alpha`, `reg_lambda`), (e) `learning_rate` reduction with proportional `n_estimators` increase.\\n4. For multi-framework comparison, hold folds constant (pass the same `cv` splitter) and use identical metric definitions. Report results in a comparison table.\\n5. Compute SHAP values using `shap.TreeExplainer` for global and local feature attribution. Generate summary plots, dependence plots for top features, and force plots for individual predictions when interpretability is requested.\\n6. Handle class imbalance with `scale_pos_weight` (XGBoost), `is_unbalance` (LightGBM), or `auto_class_weights` (CatBoost). Compare against SMOTE-in-pipeline only if simple weighting underperforms.\\n7. For categorical features, prefer LightGBM or CatBoost native categorical handling over one-hot encoding when cardinality > 10.\\n8. Save the final model with native `.save_model()` format and record hyperparameters, best iteration, and validation metric in metadata.\\n\\n## Decision tree\\n- If dataset has > 100k rows and many categorical features -> prefer LightGBM for speed; use CatBoost if categoricals have high cardinality and natural ordering is absent.\\n- If dataset is small (< 5k rows) -> reduce `max_depth` to 3-4 and increase regularization to prevent overfitting; consider whether a simpler sklearn model might suffice.\\n- If the task is ranking -> use `XGBRanker` or `LGBMRanker` with `lambdarank` objective.\\n- If feature interactions matter for explanation -> use SHAP interaction values, not just main-effect importance.\\n- If prediction latency is critical -> export to ONNX or use LightGBM's `predict_disable_shape_check` for faster inference.\\n- If reproducibility is mandatory -> pin `random_state` in the booster AND the data split, and record library version.\\n\\n## Anti-patterns\\n- NEVER set `n_estimators` to a fixed value without early stopping. This either underfits or overfits by construction.\\n- NEVER tune hyperparameters on the test set. Use a validation set or inner cross-validation; the test set is touched exactly once.\\n- NEVER compare frameworks with different preprocessing (e.g., one-hot for XGBoost but native categoricals for CatBoost) and call it a fair comparison.\\n- NEVER ignore the `best_iteration` attribute after early stopping. Predictions must use `best_iteration` to avoid including over-trained trees.\\n- NEVER rely solely on `feature_importances_` (gain-based) for feature selection. Gain importance is biased toward high-cardinality and correlated features.\\n\\n## Common mistakes\\n- Using `eval_metric` that does not match the business objective (e.g., `logloss` for early stopping but reporting `F1`).\\n- Forgetting to pass `categorical_feature` to LightGBM, causing it to treat integer-encoded categoricals as continuous.\\n- Setting `scale_pos_weight` AND applying SMOTE simultaneously, which double-corrects for imbalance.\\n- Running SHAP on the training set instead of the validation/test set, which inflates apparent feature relevance.\\n- Not setting `verbosity=0` or `verbose=-1` during hyperparameter search, flooding logs with thousands of training lines.\\n- Using `pickle` instead of the framework's native `.save_model()`, which breaks across library version upgrades.\\n\\n## Output contract\\n- Report best hyperparameters, best iteration number, and validation metric trajectory (or at minimum start/best/final values).\\n- Include the class-imbalance strategy used and its rationale.\\n- Provide SHAP summary plots or feature importance rankings with the method explicitly named.\\n- Never report train-only metrics as final performance. Always include validation or test metrics.\\n- Record the framework name and version (e.g., `xgboost==2.0.3`) in artifact metadata.\\n- If multiple frameworks were compared, include a side-by-side metric table with identical folds.\\n- Save the model in native format alongside a JSON metadata sidecar.\\n\\n## Composability hints\\n- Before this expert -> use the **Data Cleaning Expert** for null handling and type coercion. Gradient boosters handle NaNs natively (XGBoost, LightGBM) but benefit from clean categoricals.\\n- Before this expert -> use the **Scikit-learn Modeling Expert** if a quick linear baseline is needed for comparison.\\n- After this expert -> use the **Visualization Expert** to plot SHAP summaries, learning curves, or metric comparisons.\\n- After this expert -> use the **Machine Learning Export Expert** to convert the model to ONNX or package it for serving.\\n- Related -> the **Statistics Expert** for post-hoc significance tests when comparing model performance across folds.\"), score=0.6425158381462097, sparse_score=0.6425158381462097, dense_score=None)]" ] }, - "execution_count": 7, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -246,7 +246,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "id": "47c4c7a6", "metadata": {}, "outputs": [ @@ -304,7 +304,7 @@ "id": "f232afa3", "metadata": {}, "source": [ - "## 5. Demonstrate a simple mock agent loop\n", + "## 5. Mock Agent Loop\n", "\n", "- Simulates multiple queries in a loop. \n", "- Retrieves top-K expert cards for each query. \n", @@ -313,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "id": "08b5202d", "metadata": {}, "outputs": [ From d46bd825f0b0e04f3f230693af505a7858c38fb2 Mon Sep 17 00:00:00 2001 From: prajwal Date: Sun, 1 Mar 2026 19:41:40 +0530 Subject: [PATCH 5/6] Clear notebook outputs to remove environment-specific logs --- examples/notebooks/getting-started.ipynb | 259 +---------------------- 1 file changed, 10 insertions(+), 249 deletions(-) diff --git a/examples/notebooks/getting-started.ipynb b/examples/notebooks/getting-started.ipynb index 6f2aace..fc50540 100644 --- a/examples/notebooks/getting-started.ipynb +++ b/examples/notebooks/getting-started.ipynb @@ -28,126 +28,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "a02b3ef1", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Obtaining file:///E:/ForOpensource/SkillMesh\n", - " Installing build dependencies: started\n", - " Installing build dependencies: finished with status 'done'\n", - " Checking if build backend supports build_editable: started\n", - " Checking if build backend supports build_editable: finished with status 'done'\n", - " Getting requirements to build editable: started\n", - " Getting requirements to build editable: finished with status 'done'\n", - " Preparing editable metadata (pyproject.toml): started\n", - " Preparing editable metadata (pyproject.toml): finished with status 'done'\n", - "Requirement already satisfied: numpy>=1.24 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (2.4.2)\n", - "Requirement already satisfied: PyYAML>=6.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (6.0.3)\n", - "Requirement already satisfied: rank-bm25>=0.2.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (0.2.2)\n", - "Requirement already satisfied: jsonschema>=4.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (4.26.0)\n", - "Requirement already satisfied: chromadb>=0.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (1.5.2)\n", - "Requirement already satisfied: build>=1.0.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.4.0)\n", - "Requirement already satisfied: pydantic>=1.9 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (2.12.5)\n", - "Requirement already satisfied: pybase64>=1.4.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.4.3)\n", - "Requirement already satisfied: uvicorn[standard]>=0.18.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.41.0)\n", - "Requirement already satisfied: posthog<6.0.0,>=2.4.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (5.4.0)\n", - "Requirement already satisfied: typing-extensions>=4.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (4.15.0)\n", - "Requirement already satisfied: onnxruntime>=1.14.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.24.2)\n", - "Requirement already satisfied: opentelemetry-api>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n", - "Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n", - "Requirement already satisfied: opentelemetry-sdk>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n", - "Requirement already satisfied: tokenizers>=0.13.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.22.2)\n", - "Requirement already satisfied: pypika>=0.48.9 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.51.1)\n", - "Requirement already satisfied: tqdm>=4.65.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (4.67.3)\n", - "Requirement already satisfied: overrides>=7.3.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (7.7.0)\n", - "Requirement already satisfied: importlib-resources in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (6.5.2)\n", - "Requirement already satisfied: grpcio>=1.58.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.78.0)\n", - "Requirement already satisfied: bcrypt>=4.0.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (5.0.0)\n", - "Requirement already satisfied: typer>=0.9.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.24.1)\n", - "Requirement already satisfied: kubernetes>=28.1.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (35.0.0)\n", - "Requirement already satisfied: tenacity>=8.2.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (9.1.4)\n", - "Requirement already satisfied: mmh3>=4.0.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (5.2.0)\n", - "Requirement already satisfied: orjson>=3.9.12 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (3.11.7)\n", - "Requirement already satisfied: httpx>=0.27.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.28.1)\n", - "Requirement already satisfied: rich>=10.11.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (14.3.3)\n", - "Requirement already satisfied: attrs>=22.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (25.4.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (2025.9.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (0.37.0)\n", - "Requirement already satisfied: rpds-py>=0.25.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (0.30.0)\n", - "Requirement already satisfied: packaging>=24.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from build>=1.0.3->chromadb>=0.5.0->skillmesh==0.1.0) (26.0)\n", - "Requirement already satisfied: pyproject_hooks in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from build>=1.0.3->chromadb>=0.5.0->skillmesh==0.1.0) (1.2.0)\n", - "Requirement already satisfied: colorama in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from build>=1.0.3->chromadb>=0.5.0->skillmesh==0.1.0) (0.4.6)\n", - "Requirement already satisfied: anyio in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (4.12.1)\n", - "Requirement already satisfied: certifi in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (2026.2.25)\n", - "Requirement already satisfied: httpcore==1.* in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.0.9)\n", - "Requirement already satisfied: idna in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.11)\n", - "Requirement already satisfied: h11>=0.16 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpcore==1.*->httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.16.0)\n", - "Requirement already satisfied: six>=1.9.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.17.0)\n", - "Requirement already satisfied: python-dateutil>=2.5.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.9.0.post0)\n", - "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.9.0)\n", - "Requirement already satisfied: requests in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.32.5)\n", - "Requirement already satisfied: requests-oauthlib in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.0.0)\n", - "Requirement already satisfied: urllib3!=2.6.0,>=1.24.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.6.3)\n", - "Requirement already satisfied: durationpy>=0.7 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.10)\n", - "Requirement already satisfied: flatbuffers in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (25.12.19)\n", - "Requirement already satisfied: protobuf in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (6.33.5)\n", - "Requirement already satisfied: sympy in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (1.14.0)\n", - "Requirement already satisfied: importlib-metadata<8.8.0,>=6.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-api>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (8.7.1)\n", - "Requirement already satisfied: googleapis-common-protos~=1.57 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.72.0)\n", - "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.39.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n", - "Requirement already satisfied: opentelemetry-proto==1.39.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n", - "Requirement already satisfied: opentelemetry-semantic-conventions==0.60b1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-sdk>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.60b1)\n", - "Requirement already satisfied: backoff>=1.10.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from posthog<6.0.0,>=2.4.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.2.1)\n", - "Requirement already satisfied: distro>=1.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from posthog<6.0.0,>=2.4.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.9.0)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from pydantic>=1.9->chromadb>=0.5.0->skillmesh==0.1.0) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.41.5 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from pydantic>=1.9->chromadb>=0.5.0->skillmesh==0.1.0) (2.41.5)\n", - "Requirement already satisfied: typing-inspection>=0.4.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from pydantic>=1.9->chromadb>=0.5.0->skillmesh==0.1.0) (0.4.2)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from rich>=10.11.0->chromadb>=0.5.0->skillmesh==0.1.0) (4.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from rich>=10.11.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.19.2)\n", - "Requirement already satisfied: huggingface-hub<2.0,>=0.16.4 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (1.5.0)\n", - "Requirement already satisfied: click>=8.2.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from typer>=0.9.0->chromadb>=0.5.0->skillmesh==0.1.0) (8.3.1)\n", - "Requirement already satisfied: shellingham>=1.3.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from typer>=0.9.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.5.4)\n", - "Requirement already satisfied: annotated-doc>=0.0.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from typer>=0.9.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.0.4)\n", - "Requirement already satisfied: httptools>=0.6.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (0.7.1)\n", - "Requirement already satisfied: python-dotenv>=0.13 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (1.2.1)\n", - "Requirement already satisfied: watchfiles>=0.20 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (1.1.1)\n", - "Requirement already satisfied: websockets>=10.4 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (16.0)\n", - "Requirement already satisfied: filelock>=3.10.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (3.24.3)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (2026.2.0)\n", - "Requirement already satisfied: hf-xet<2.0.0,>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (1.3.2)\n", - "Requirement already satisfied: zipp>=3.20 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from importlib-metadata<8.8.0,>=6.0->opentelemetry-api>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.23.0)\n", - "Requirement already satisfied: mdurl~=0.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.1.2)\n", - "Requirement already satisfied: charset_normalizer<4,>=2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from requests->kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.4.4)\n", - "Requirement already satisfied: oauthlib>=3.0.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from requests-oauthlib->kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.3.1)\n", - "Requirement already satisfied: mpmath<1.4,>=1.1.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from sympy->onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (1.3.0)\n", - "Building wheels for collected packages: skillmesh\n", - " Building editable for skillmesh (pyproject.toml): started\n", - " Building editable for skillmesh (pyproject.toml): finished with status 'done'\n", - " Created wheel for skillmesh: filename=skillmesh-0.1.0-0.editable-py3-none-any.whl size=5534 sha256=6dfabd1ecad61c06188c5aa448167818c7e5ee7490ef689be1230fcc4dfc2684\n", - " Stored in directory: C:\\Users\\DELL\\AppData\\Local\\Temp\\pip-ephem-wheel-cache-w74lwmao\\wheels\\26\\fc\\af\\1aa2362d8ccbc2ed9db0df865eeffcbf5130dc61876fdaf17b\n", - "Successfully built skillmesh\n", - "Installing collected packages: skillmesh\n", - " Attempting uninstall: skillmesh\n", - " Found existing installation: skillmesh 0.1.0\n", - " Uninstalling skillmesh-0.1.0:\n", - " Successfully uninstalled skillmesh-0.1.0\n", - "Successfully installed skillmesh-0.1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "[notice] A new release of pip is available: 23.2.1 -> 26.0.1\n", - "[notice] To update, run: python.exe -m pip install --upgrade pip\n" - ] - } - ], + "outputs": [], "source": [ "# Install SkillMesh dependencies from root folder\n", "!pip install -e ../.." @@ -168,21 +52,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "d7480e85", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "92" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from skill_registry_rag.registry import load_registry\n", "\n", @@ -205,23 +78,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "3caa3116", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[RetrievalHit(card=ToolCard(id='role.data-analyst', title='Data Analyst Role Orchestrator', domain='role_orchestrator', instruction_file='roles/data-analyst.md', description='Role expert that orchestrates profiling, cleaning, EDA, baseline modeling, and visualization for tabular analytics tasks.', tags=['role', 'data-analyst', 'eda', 'pandas', 'visualization', 'baseline-modeling'], tool_hints=['data.pandas-advanced', 'data.sql-queries', 'viz.matplotlib-seaborn', 'ml.sklearn-modeling', 'stats.scipy-statsmodels'], examples=['Profile messy dataset, clean it, and deliver insight dashboard', 'Run EDA plus baseline prediction with leakage-safe validation'], aliases=['role-data-analyst', 'analytics-orchestrator'], dependencies=['data.pandas-advanced', 'data.sql-queries', 'viz.matplotlib-seaborn', 'ml.sklearn-modeling', 'stats.scipy-statsmodels'], output_artifacts=['data_profile_summary', 'eda_findings', 'visual_report', 'baseline_model_report'], quality_checks=['missingness_and_dtype_audit_completed', 'insights_backed_by_numeric_evidence', 'charts_have_labels_units_and_titles'], constraints=['no_causal_claims_from_correlation_only', 'use_only_allowed_dependencies_unless_explicitly_authorized'], input_contract={'required': 'tabular dataset and analysis objective', 'optional': 'target metric, business context, and prediction requirement'}, risk_level='medium', maturity='beta', metadata={'provider_support': ['codex', 'claude'], 'owner': 'community', 'catalog_tier': 'roles', 'instruction_version': 'v1'}, instruction_text='# Data Analyst Role Expert\\n\\nUse this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\\n\\n## Allowed expert dependencies\\n\\n- `data.pandas-advanced`\\n- `data.sql-queries`\\n- `viz.matplotlib-seaborn`\\n- `ml.sklearn-modeling`\\n- `stats.scipy-statsmodels`\\n\\n## Execution behavior\\n\\n1. Start with a data quality audit:\\n nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\\n2. Normalize and clean data using reproducible transformations.\\n3. Produce concise EDA:\\n distributions, trends, segmentation, and relationship charts.\\n4. If prediction is requested, build a leakage-safe baseline model with validation metrics.\\n5. Explain findings in business terms:\\n what changed, how much, and what action is implied.\\n6. End with caveats and next steps.\\n\\n## Output contract\\n\\n- `profile_summary`: row/column counts, missingness, type issues, and anomalies.\\n- `eda_insights`: ranked insights with numeric evidence.\\n- `visuals`: labeled plots with clear units and titles.\\n- `model_section` (optional): baseline model, metrics, and limitations.\\n- `repro_steps`: commands/notebook steps to reproduce.\\n\\n## Guardrails\\n\\n- Do not skip data validation before insights.\\n- Do not claim causality from correlation.\\n- Do not use tools outside allowed dependencies unless explicitly approved.'), score=1.0, sparse_score=1.0, dense_score=None),\n", - " RetrievalHit(card=ToolCard(id='docs.slides-pptx', title='Slide Deck Creation (PPTX)', domain='presentation_generation', instruction_file='instructions/slide-creation.md', description='Narrative deck creation with PPTX output and chart integration.', tags=['slides', 'pptx', 'python-pptx', 'presentation', 'storytelling'], tool_hints=['python-pptx', 'matplotlib', 'pillow'], examples=['Executive summary deck', 'Findings deck with visuals'], aliases=['deck-generation', 'slide-authoring'], dependencies=['python-pptx', 'pillow', 'matplotlib'], output_artifacts=['pptx_deck', 'slide_outline', 'speaker_notes_stub'], quality_checks=['title_and_recommendation_slides_present', 'visual_hierarchy_consistent'], constraints=['one_core_message_per_slide'], input_contract={'required': 'structured storyline or slide outline', 'optional': 'brand template and theme colors'}, risk_level='low', maturity='stable', metadata={'provider_support': ['codex', 'claude'], 'install_extra': 'slides'}, instruction_text='# Slide Creation Expert (PPTX)\\n\\nUse this expert for executive summaries, project updates, and narrative decks.\\n\\n## Execution behavior\\n\\n1. Derive a slide storyline first (problem, analysis, findings, actions).\\n2. Allocate one key message per slide and keep text concise.\\n3. Generate PPTX using `python-pptx` with consistent templates.\\n4. Embed charts/tables as visuals instead of dense paragraphs.\\n5. Export final deck and register slide artifact with version/date.\\n\\n## Output contract\\n\\n- Include title slide, evidence slides, and recommendation slide.\\n- Keep visual hierarchy consistent across slides.\\n- Add speaker-note placeholders for critical assumptions.\\n- Ensure deck can be opened in standard PowerPoint clients.'), score=0.667872965335846, sparse_score=0.667872965335846, dense_score=None),\n", - " RetrievalHit(card=ToolCard(id='ml.gradient-boosting', title='Gradient Boosting with XGBoost and LightGBM', domain='machine_learning', instruction_file='instructions/gradient-boosting-xgb-lgbm.md', description='High-performance boosting, tuning, and SHAP-based attribution.', tags=['xgboost', 'lightgbm', 'catboost', 'shap', 'boosting'], tool_hints=['xgboost', 'lightgbm', 'catboost', 'shap'], examples=['Tune XGBoost with early stopping', 'Compare LightGBM vs CatBoost'], aliases=['gbm', 'tree-boosting'], dependencies=['xgboost', 'lightgbm', 'catboost', 'shap', 'scikit-learn'], output_artifacts=['booster_model', 'feature_importance', 'shap_summary'], quality_checks=['early_stopping_enabled', 'class_imbalance_strategy', 'feature_leakage_review'], constraints=['consistent_validation_metric_across_models'], input_contract={'required': 'tabular train/validation datasets', 'optional': 'class weights and monotonic constraints'}, risk_level='medium', maturity='stable', metadata={'provider_support': ['codex', 'claude'], 'install_extra': 'boosting'}, instruction_text=\"# Gradient Boosting Expert (XGBoost / LightGBM / CatBoost)\\n\\nUse this expert for high-performance tabular modeling with gradient boosting frameworks, including classification, regression, and ranking tasks.\\n\\n## When to use this expert\\n- The task involves structured/tabular data where tree-based models are expected to excel.\\n- The user needs state-of-the-art predictive performance with feature attribution.\\n- Early stopping, hyperparameter tuning, or framework comparison is required.\\n- SHAP-based model interpretation or class imbalance handling is requested.\\n\\n## Execution behavior\\n\\n1. Build a clean train/validation/test split with leakage controls. For temporal data, split by time. For classification, use stratified splits to preserve class ratios.\\n2. Configure early stopping with a patience of 20-50 rounds on the validation set. Pass `eval_set` (XGBoost/LightGBM) or `eval_set` (CatBoost) explicitly.\\n3. Start with sensible defaults: `learning_rate=0.05`, `max_depth=6`, `n_estimators=2000` (relying on early stopping to find the right count). Tune in this order: (a) `n_estimators` via early stopping, (b) `max_depth` and `min_child_weight`, (c) `subsample` and `colsample_bytree`, (d) regularization (`reg_alpha`, `reg_lambda`), (e) `learning_rate` reduction with proportional `n_estimators` increase.\\n4. For multi-framework comparison, hold folds constant (pass the same `cv` splitter) and use identical metric definitions. Report results in a comparison table.\\n5. Compute SHAP values using `shap.TreeExplainer` for global and local feature attribution. Generate summary plots, dependence plots for top features, and force plots for individual predictions when interpretability is requested.\\n6. Handle class imbalance with `scale_pos_weight` (XGBoost), `is_unbalance` (LightGBM), or `auto_class_weights` (CatBoost). Compare against SMOTE-in-pipeline only if simple weighting underperforms.\\n7. For categorical features, prefer LightGBM or CatBoost native categorical handling over one-hot encoding when cardinality > 10.\\n8. Save the final model with native `.save_model()` format and record hyperparameters, best iteration, and validation metric in metadata.\\n\\n## Decision tree\\n- If dataset has > 100k rows and many categorical features -> prefer LightGBM for speed; use CatBoost if categoricals have high cardinality and natural ordering is absent.\\n- If dataset is small (< 5k rows) -> reduce `max_depth` to 3-4 and increase regularization to prevent overfitting; consider whether a simpler sklearn model might suffice.\\n- If the task is ranking -> use `XGBRanker` or `LGBMRanker` with `lambdarank` objective.\\n- If feature interactions matter for explanation -> use SHAP interaction values, not just main-effect importance.\\n- If prediction latency is critical -> export to ONNX or use LightGBM's `predict_disable_shape_check` for faster inference.\\n- If reproducibility is mandatory -> pin `random_state` in the booster AND the data split, and record library version.\\n\\n## Anti-patterns\\n- NEVER set `n_estimators` to a fixed value without early stopping. This either underfits or overfits by construction.\\n- NEVER tune hyperparameters on the test set. Use a validation set or inner cross-validation; the test set is touched exactly once.\\n- NEVER compare frameworks with different preprocessing (e.g., one-hot for XGBoost but native categoricals for CatBoost) and call it a fair comparison.\\n- NEVER ignore the `best_iteration` attribute after early stopping. Predictions must use `best_iteration` to avoid including over-trained trees.\\n- NEVER rely solely on `feature_importances_` (gain-based) for feature selection. Gain importance is biased toward high-cardinality and correlated features.\\n\\n## Common mistakes\\n- Using `eval_metric` that does not match the business objective (e.g., `logloss` for early stopping but reporting `F1`).\\n- Forgetting to pass `categorical_feature` to LightGBM, causing it to treat integer-encoded categoricals as continuous.\\n- Setting `scale_pos_weight` AND applying SMOTE simultaneously, which double-corrects for imbalance.\\n- Running SHAP on the training set instead of the validation/test set, which inflates apparent feature relevance.\\n- Not setting `verbosity=0` or `verbose=-1` during hyperparameter search, flooding logs with thousands of training lines.\\n- Using `pickle` instead of the framework's native `.save_model()`, which breaks across library version upgrades.\\n\\n## Output contract\\n- Report best hyperparameters, best iteration number, and validation metric trajectory (or at minimum start/best/final values).\\n- Include the class-imbalance strategy used and its rationale.\\n- Provide SHAP summary plots or feature importance rankings with the method explicitly named.\\n- Never report train-only metrics as final performance. Always include validation or test metrics.\\n- Record the framework name and version (e.g., `xgboost==2.0.3`) in artifact metadata.\\n- If multiple frameworks were compared, include a side-by-side metric table with identical folds.\\n- Save the model in native format alongside a JSON metadata sidecar.\\n\\n## Composability hints\\n- Before this expert -> use the **Data Cleaning Expert** for null handling and type coercion. Gradient boosters handle NaNs natively (XGBoost, LightGBM) but benefit from clean categoricals.\\n- Before this expert -> use the **Scikit-learn Modeling Expert** if a quick linear baseline is needed for comparison.\\n- After this expert -> use the **Visualization Expert** to plot SHAP summaries, learning curves, or metric comparisons.\\n- After this expert -> use the **Machine Learning Export Expert** to convert the model to ONNX or package it for serving.\\n- Related -> the **Statistics Expert** for post-hoc significance tests when comparing model performance across folds.\"), score=0.6425158381462097, sparse_score=0.6425158381462097, dense_score=None)]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from skill_registry_rag.retriever import SkillRetriever\n", "\n", @@ -246,49 +106,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "47c4c7a6", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "USER QUERY:\n", - "clean messy sales data and generate charts\n", - "\n", - "RETRIEVED CONTEXT:\n", - "# Data Analyst Role Expert\n", - "\n", - "Use this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\n", - "\n", - "## Allowed expert dependencies\n", - "\n", - "- `data.pandas-advanced`\n", - "- `data.sql-queries`\n", - "- `viz.matplotlib-seaborn`\n", - "- `ml.sklearn-modeling`\n", - "- `stats.scipy-statsmodels`\n", - "\n", - "## Execution behavior\n", - "\n", - "1. Start with a data quality audit:\n", - " nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\n", - "2. Normalize and clean data using reproducible transformations.\n", - "3. Produce concise EDA:\n", - " distributions, trends, segmentation, and relationship charts.\n", - "4. If prediction is requested, build a leakage-safe baseline model with validation metrics.\n", - "5. Explain findings in business terms:\n", - " what changed, how much, and what action is implied.\n", - "6. End with caveats and next steps.\n", - "\n", - "## Output contract\n", - "\n", - "- `profile_summary`: row/column counts, missingness, type issues, and anomalies.\n", - "- `eda_insights`: ranked insights \n" - ] - } - ], + "outputs": [], "source": [ "context = \"\\n\\n\".join(hit.card.instruction_text for hit in hits)\n", "\n", @@ -313,70 +134,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "08b5202d", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Query: clean messy sales data\n", - "# Data Analyst Role Expert\n", - "\n", - "Use this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\n", - "\n", - "## Allowed expert dependencies\n", - "\n", - "- `data.pandas-advanced`\n", - "- `data.sql-queries`\n", - "- `viz.matplotlib-seaborn`\n", - "- `ml.sklearn-modeling`\n", - "- `stats.scipy-statsmodels`\n", - "\n", - "## Execution behavior\n", - "\n", - "1. Start with a data quality audit:\n", - " nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\n", - "2. Normaliz\n", - "--------------------------------------------------\n", - "Query: generate charts for sales\n", - "# Slide Creation Expert (PPTX)\n", - "\n", - "Use this expert for executive summaries, project updates, and narrative decks.\n", - "\n", - "## Execution behavior\n", - "\n", - "1. Derive a slide storyline first (problem, analysis, findings, actions).\n", - "2. Allocate one key message per slide and keep text concise.\n", - "3. Generate PPTX using `python-pptx` with consistent templates.\n", - "4. Embed charts/tables as visuals instead of dense paragraphs.\n", - "5. Export final deck and register slide artifact with version/date.\n", - "\n", - "## Output contract\n", - "\n", - "- Include titl\n", - "--------------------------------------------------\n", - "Query: summarize findings\n", - "# Slide Creation Expert (PPTX)\n", - "\n", - "Use this expert for executive summaries, project updates, and narrative decks.\n", - "\n", - "## Execution behavior\n", - "\n", - "1. Derive a slide storyline first (problem, analysis, findings, actions).\n", - "2. Allocate one key message per slide and keep text concise.\n", - "3. Generate PPTX using `python-pptx` with consistent templates.\n", - "4. Embed charts/tables as visuals instead of dense paragraphs.\n", - "5. Export final deck and register slide artifact with version/date.\n", - "\n", - "## Output contract\n", - "\n", - "- Include titl\n", - "--------------------------------------------------\n" - ] - } - ], + "outputs": [], "source": [ "queries = [\n", " \"clean messy sales data\",\n", From ae6754e1ed85a7e2849a4f392d7d2d5a2298623f Mon Sep 17 00:00:00 2001 From: prajwal Date: Sun, 1 Mar 2026 22:09:48 +0530 Subject: [PATCH 6/6] Improve getting-started notebook by adding structured markdown sample outputs --- examples/notebooks/getting-started.ipynb | 402 ++++++++++++++++++++++- 1 file changed, 392 insertions(+), 10 deletions(-) diff --git a/examples/notebooks/getting-started.ipynb b/examples/notebooks/getting-started.ipynb index fc50540..4471aa1 100644 --- a/examples/notebooks/getting-started.ipynb +++ b/examples/notebooks/getting-started.ipynb @@ -28,15 +28,144 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "a02b3ef1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Obtaining file:///E:/ForOpensource/SkillMesh\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Checking if build backend supports build_editable: started\n", + " Checking if build backend supports build_editable: finished with status 'done'\n", + " Getting requirements to build editable: started\n", + " Getting requirements to build editable: finished with status 'done'\n", + " Preparing editable metadata (pyproject.toml): started\n", + " Preparing editable metadata (pyproject.toml): finished with status 'done'\n", + "Requirement already satisfied: numpy>=1.24 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (2.4.2)\n", + "Requirement already satisfied: PyYAML>=6.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (6.0.3)\n", + "Requirement already satisfied: rank-bm25>=0.2.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (0.2.2)\n", + "Requirement already satisfied: jsonschema>=4.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (4.26.0)\n", + "Requirement already satisfied: chromadb>=0.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from skillmesh==0.1.0) (1.5.2)\n", + "Requirement already satisfied: build>=1.0.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.4.0)\n", + "Requirement already satisfied: pydantic>=1.9 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (2.12.5)\n", + "Requirement already satisfied: pybase64>=1.4.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.4.3)\n", + "Requirement already satisfied: uvicorn[standard]>=0.18.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.41.0)\n", + "Requirement already satisfied: posthog<6.0.0,>=2.4.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (5.4.0)\n", + "Requirement already satisfied: typing-extensions>=4.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (4.15.0)\n", + "Requirement already satisfied: onnxruntime>=1.14.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.24.2)\n", + "Requirement already satisfied: opentelemetry-api>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n", + "Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n", + "Requirement already satisfied: opentelemetry-sdk>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n", + "Requirement already satisfied: tokenizers>=0.13.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.22.2)\n", + "Requirement already satisfied: pypika>=0.48.9 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.51.1)\n", + "Requirement already satisfied: tqdm>=4.65.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (4.67.3)\n", + "Requirement already satisfied: overrides>=7.3.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (7.7.0)\n", + "Requirement already satisfied: importlib-resources in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (6.5.2)\n", + "Requirement already satisfied: grpcio>=1.58.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (1.78.0)\n", + "Requirement already satisfied: bcrypt>=4.0.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (5.0.0)\n", + "Requirement already satisfied: typer>=0.9.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.24.1)\n", + "Requirement already satisfied: kubernetes>=28.1.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (35.0.0)\n", + "Requirement already satisfied: tenacity>=8.2.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (9.1.4)\n", + "Requirement already satisfied: mmh3>=4.0.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (5.2.0)\n", + "Requirement already satisfied: orjson>=3.9.12 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (3.11.7)\n", + "Requirement already satisfied: httpx>=0.27.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (0.28.1)\n", + "Requirement already satisfied: rich>=10.11.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from chromadb>=0.5.0->skillmesh==0.1.0) (14.3.3)\n", + "Requirement already satisfied: attrs>=22.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (25.4.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (2025.9.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (0.37.0)\n", + "Requirement already satisfied: rpds-py>=0.25.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from jsonschema>=4.0->skillmesh==0.1.0) (0.30.0)\n", + "Requirement already satisfied: packaging>=24.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from build>=1.0.3->chromadb>=0.5.0->skillmesh==0.1.0) (26.0)\n", + "Requirement already satisfied: pyproject_hooks in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from build>=1.0.3->chromadb>=0.5.0->skillmesh==0.1.0) (1.2.0)\n", + "Requirement already satisfied: colorama in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from build>=1.0.3->chromadb>=0.5.0->skillmesh==0.1.0) (0.4.6)\n", + "Requirement already satisfied: anyio in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (4.12.1)\n", + "Requirement already satisfied: certifi in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (2026.2.25)\n", + "Requirement already satisfied: httpcore==1.* in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.0.9)\n", + "Requirement already satisfied: idna in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.11)\n", + "Requirement already satisfied: h11>=0.16 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from httpcore==1.*->httpx>=0.27.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.16.0)\n", + "Requirement already satisfied: six>=1.9.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.17.0)\n", + "Requirement already satisfied: python-dateutil>=2.5.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.9.0.post0)\n", + "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.9.0)\n", + "Requirement already satisfied: requests in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.32.5)\n", + "Requirement already satisfied: requests-oauthlib in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.0.0)\n", + "Requirement already satisfied: urllib3!=2.6.0,>=1.24.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.6.3)\n", + "Requirement already satisfied: durationpy>=0.7 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.10)\n", + "Requirement already satisfied: flatbuffers in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (25.12.19)\n", + "Requirement already satisfied: protobuf in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (6.33.5)\n", + "Requirement already satisfied: sympy in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (1.14.0)\n", + "Requirement already satisfied: importlib-metadata<8.8.0,>=6.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-api>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (8.7.1)\n", + "Requirement already satisfied: googleapis-common-protos~=1.57 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.72.0)\n", + "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.39.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n", + "Requirement already satisfied: opentelemetry-proto==1.39.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.39.1)\n", + "Requirement already satisfied: opentelemetry-semantic-conventions==0.60b1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from opentelemetry-sdk>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.60b1)\n", + "Requirement already satisfied: backoff>=1.10.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from posthog<6.0.0,>=2.4.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.2.1)\n", + "Requirement already satisfied: distro>=1.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from posthog<6.0.0,>=2.4.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.9.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from pydantic>=1.9->chromadb>=0.5.0->skillmesh==0.1.0) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.41.5 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from pydantic>=1.9->chromadb>=0.5.0->skillmesh==0.1.0) (2.41.5)\n", + "Requirement already satisfied: typing-inspection>=0.4.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from pydantic>=1.9->chromadb>=0.5.0->skillmesh==0.1.0) (0.4.2)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from rich>=10.11.0->chromadb>=0.5.0->skillmesh==0.1.0) (4.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from rich>=10.11.0->chromadb>=0.5.0->skillmesh==0.1.0) (2.19.2)\n", + "Requirement already satisfied: huggingface-hub<2.0,>=0.16.4 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (1.5.0)\n", + "Requirement already satisfied: click>=8.2.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from typer>=0.9.0->chromadb>=0.5.0->skillmesh==0.1.0) (8.3.1)\n", + "Requirement already satisfied: shellingham>=1.3.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from typer>=0.9.0->chromadb>=0.5.0->skillmesh==0.1.0) (1.5.4)\n", + "Requirement already satisfied: annotated-doc>=0.0.2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from typer>=0.9.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.0.4)\n", + "Requirement already satisfied: httptools>=0.6.3 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (0.7.1)\n", + "Requirement already satisfied: python-dotenv>=0.13 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (1.2.1)\n", + "Requirement already satisfied: watchfiles>=0.20 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (1.1.1)\n", + "Requirement already satisfied: websockets>=10.4 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from uvicorn[standard]>=0.18.3->chromadb>=0.5.0->skillmesh==0.1.0) (16.0)\n", + "Requirement already satisfied: filelock>=3.10.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (3.24.3)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (2026.2.0)\n", + "Requirement already satisfied: hf-xet<2.0.0,>=1.2.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb>=0.5.0->skillmesh==0.1.0) (1.3.2)\n", + "Requirement already satisfied: zipp>=3.20 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from importlib-metadata<8.8.0,>=6.0->opentelemetry-api>=1.2.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.23.0)\n", + "Requirement already satisfied: mdurl~=0.1 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->chromadb>=0.5.0->skillmesh==0.1.0) (0.1.2)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from requests->kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.4.4)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from requests-oauthlib->kubernetes>=28.1.0->chromadb>=0.5.0->skillmesh==0.1.0) (3.3.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in e:\\foropensource\\skillmesh\\.venv\\lib\\site-packages (from sympy->onnxruntime>=1.14.1->chromadb>=0.5.0->skillmesh==0.1.0) (1.3.0)\n", + "Building wheels for collected packages: skillmesh\n", + " Building editable for skillmesh (pyproject.toml): started\n", + " Building editable for skillmesh (pyproject.toml): finished with status 'done'\n", + " Created wheel for skillmesh: filename=skillmesh-0.1.0-0.editable-py3-none-any.whl size=5534 sha256=bab34fbd5a9a5befaac3d36a4f19317fe9418a366d0debf96c3916181cd7cabe\n", + " Stored in directory: C:\\Users\\DELL\\AppData\\Local\\Temp\\pip-ephem-wheel-cache-ld87_yh7\\wheels\\26\\fc\\af\\1aa2362d8ccbc2ed9db0df865eeffcbf5130dc61876fdaf17b\n", + "Successfully built skillmesh\n", + "Installing collected packages: skillmesh\n", + " Attempting uninstall: skillmesh\n", + " Found existing installation: skillmesh 0.1.0\n", + " Uninstalling skillmesh-0.1.0:\n", + " Successfully uninstalled skillmesh-0.1.0\n", + "Successfully installed skillmesh-0.1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "[notice] A new release of pip is available: 23.2.1 -> 26.0.1\n", + "[notice] To update, run: python.exe -m pip install --upgrade pip\n" + ] + } + ], "source": [ "# Install SkillMesh dependencies from root folder\n", "!pip install -e ../.." ] }, + { + "cell_type": "markdown", + "id": "f8624728", + "metadata": {}, + "source": [ + "### Sample Output\n", + "\n", + "```text\n", + "Obtaining skillmesh\n", + "Successfully installed skillmesh-0.1.0\n", + "```" + ] + }, { "cell_type": "markdown", "id": "0f34646f", @@ -52,10 +181,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "d7480e85", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "92" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from skill_registry_rag.registry import load_registry\n", "\n", @@ -64,6 +204,18 @@ "len(registry)" ] }, + { + "cell_type": "markdown", + "id": "f742e66d", + "metadata": {}, + "source": [ + "### Sample output:\n", + "\n", + "```text\n", + "92\n", + "```" + ] + }, { "cell_type": "markdown", "id": "4129236f", @@ -78,10 +230,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "3caa3116", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[RetrievalHit(card=ToolCard(id='role.data-analyst', title='Data Analyst Role Orchestrator', domain='role_orchestrator', instruction_file='roles/data-analyst.md', description='Role expert that orchestrates profiling, cleaning, EDA, baseline modeling, and visualization for tabular analytics tasks.', tags=['role', 'data-analyst', 'eda', 'pandas', 'visualization', 'baseline-modeling'], tool_hints=['data.pandas-advanced', 'data.sql-queries', 'viz.matplotlib-seaborn', 'ml.sklearn-modeling', 'stats.scipy-statsmodels'], examples=['Profile messy dataset, clean it, and deliver insight dashboard', 'Run EDA plus baseline prediction with leakage-safe validation'], aliases=['role-data-analyst', 'analytics-orchestrator'], dependencies=['data.pandas-advanced', 'data.sql-queries', 'viz.matplotlib-seaborn', 'ml.sklearn-modeling', 'stats.scipy-statsmodels'], output_artifacts=['data_profile_summary', 'eda_findings', 'visual_report', 'baseline_model_report'], quality_checks=['missingness_and_dtype_audit_completed', 'insights_backed_by_numeric_evidence', 'charts_have_labels_units_and_titles'], constraints=['no_causal_claims_from_correlation_only', 'use_only_allowed_dependencies_unless_explicitly_authorized'], input_contract={'required': 'tabular dataset and analysis objective', 'optional': 'target metric, business context, and prediction requirement'}, risk_level='medium', maturity='beta', metadata={'provider_support': ['codex', 'claude'], 'owner': 'community', 'catalog_tier': 'roles', 'instruction_version': 'v1'}, instruction_text='# Data Analyst Role Expert\\n\\nUse this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\\n\\n## Allowed expert dependencies\\n\\n- `data.pandas-advanced`\\n- `data.sql-queries`\\n- `viz.matplotlib-seaborn`\\n- `ml.sklearn-modeling`\\n- `stats.scipy-statsmodels`\\n\\n## Execution behavior\\n\\n1. Start with a data quality audit:\\n nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\\n2. Normalize and clean data using reproducible transformations.\\n3. Produce concise EDA:\\n distributions, trends, segmentation, and relationship charts.\\n4. If prediction is requested, build a leakage-safe baseline model with validation metrics.\\n5. Explain findings in business terms:\\n what changed, how much, and what action is implied.\\n6. End with caveats and next steps.\\n\\n## Output contract\\n\\n- `profile_summary`: row/column counts, missingness, type issues, and anomalies.\\n- `eda_insights`: ranked insights with numeric evidence.\\n- `visuals`: labeled plots with clear units and titles.\\n- `model_section` (optional): baseline model, metrics, and limitations.\\n- `repro_steps`: commands/notebook steps to reproduce.\\n\\n## Guardrails\\n\\n- Do not skip data validation before insights.\\n- Do not claim causality from correlation.\\n- Do not use tools outside allowed dependencies unless explicitly approved.'), score=1.0, sparse_score=1.0, dense_score=None),\n", + " RetrievalHit(card=ToolCard(id='docs.slides-pptx', title='Slide Deck Creation (PPTX)', domain='presentation_generation', instruction_file='instructions/slide-creation.md', description='Narrative deck creation with PPTX output and chart integration.', tags=['slides', 'pptx', 'python-pptx', 'presentation', 'storytelling'], tool_hints=['python-pptx', 'matplotlib', 'pillow'], examples=['Executive summary deck', 'Findings deck with visuals'], aliases=['deck-generation', 'slide-authoring'], dependencies=['python-pptx', 'pillow', 'matplotlib'], output_artifacts=['pptx_deck', 'slide_outline', 'speaker_notes_stub'], quality_checks=['title_and_recommendation_slides_present', 'visual_hierarchy_consistent'], constraints=['one_core_message_per_slide'], input_contract={'required': 'structured storyline or slide outline', 'optional': 'brand template and theme colors'}, risk_level='low', maturity='stable', metadata={'provider_support': ['codex', 'claude'], 'install_extra': 'slides'}, instruction_text='# Slide Creation Expert (PPTX)\\n\\nUse this expert for executive summaries, project updates, and narrative decks.\\n\\n## Execution behavior\\n\\n1. Derive a slide storyline first (problem, analysis, findings, actions).\\n2. Allocate one key message per slide and keep text concise.\\n3. Generate PPTX using `python-pptx` with consistent templates.\\n4. Embed charts/tables as visuals instead of dense paragraphs.\\n5. Export final deck and register slide artifact with version/date.\\n\\n## Output contract\\n\\n- Include title slide, evidence slides, and recommendation slide.\\n- Keep visual hierarchy consistent across slides.\\n- Add speaker-note placeholders for critical assumptions.\\n- Ensure deck can be opened in standard PowerPoint clients.'), score=0.667872965335846, sparse_score=0.667872965335846, dense_score=None),\n", + " RetrievalHit(card=ToolCard(id='ml.gradient-boosting', title='Gradient Boosting with XGBoost and LightGBM', domain='machine_learning', instruction_file='instructions/gradient-boosting-xgb-lgbm.md', description='High-performance boosting, tuning, and SHAP-based attribution.', tags=['xgboost', 'lightgbm', 'catboost', 'shap', 'boosting'], tool_hints=['xgboost', 'lightgbm', 'catboost', 'shap'], examples=['Tune XGBoost with early stopping', 'Compare LightGBM vs CatBoost'], aliases=['gbm', 'tree-boosting'], dependencies=['xgboost', 'lightgbm', 'catboost', 'shap', 'scikit-learn'], output_artifacts=['booster_model', 'feature_importance', 'shap_summary'], quality_checks=['early_stopping_enabled', 'class_imbalance_strategy', 'feature_leakage_review'], constraints=['consistent_validation_metric_across_models'], input_contract={'required': 'tabular train/validation datasets', 'optional': 'class weights and monotonic constraints'}, risk_level='medium', maturity='stable', metadata={'provider_support': ['codex', 'claude'], 'install_extra': 'boosting'}, instruction_text=\"# Gradient Boosting Expert (XGBoost / LightGBM / CatBoost)\\n\\nUse this expert for high-performance tabular modeling with gradient boosting frameworks, including classification, regression, and ranking tasks.\\n\\n## When to use this expert\\n- The task involves structured/tabular data where tree-based models are expected to excel.\\n- The user needs state-of-the-art predictive performance with feature attribution.\\n- Early stopping, hyperparameter tuning, or framework comparison is required.\\n- SHAP-based model interpretation or class imbalance handling is requested.\\n\\n## Execution behavior\\n\\n1. Build a clean train/validation/test split with leakage controls. For temporal data, split by time. For classification, use stratified splits to preserve class ratios.\\n2. Configure early stopping with a patience of 20-50 rounds on the validation set. Pass `eval_set` (XGBoost/LightGBM) or `eval_set` (CatBoost) explicitly.\\n3. Start with sensible defaults: `learning_rate=0.05`, `max_depth=6`, `n_estimators=2000` (relying on early stopping to find the right count). Tune in this order: (a) `n_estimators` via early stopping, (b) `max_depth` and `min_child_weight`, (c) `subsample` and `colsample_bytree`, (d) regularization (`reg_alpha`, `reg_lambda`), (e) `learning_rate` reduction with proportional `n_estimators` increase.\\n4. For multi-framework comparison, hold folds constant (pass the same `cv` splitter) and use identical metric definitions. Report results in a comparison table.\\n5. Compute SHAP values using `shap.TreeExplainer` for global and local feature attribution. Generate summary plots, dependence plots for top features, and force plots for individual predictions when interpretability is requested.\\n6. Handle class imbalance with `scale_pos_weight` (XGBoost), `is_unbalance` (LightGBM), or `auto_class_weights` (CatBoost). Compare against SMOTE-in-pipeline only if simple weighting underperforms.\\n7. For categorical features, prefer LightGBM or CatBoost native categorical handling over one-hot encoding when cardinality > 10.\\n8. Save the final model with native `.save_model()` format and record hyperparameters, best iteration, and validation metric in metadata.\\n\\n## Decision tree\\n- If dataset has > 100k rows and many categorical features -> prefer LightGBM for speed; use CatBoost if categoricals have high cardinality and natural ordering is absent.\\n- If dataset is small (< 5k rows) -> reduce `max_depth` to 3-4 and increase regularization to prevent overfitting; consider whether a simpler sklearn model might suffice.\\n- If the task is ranking -> use `XGBRanker` or `LGBMRanker` with `lambdarank` objective.\\n- If feature interactions matter for explanation -> use SHAP interaction values, not just main-effect importance.\\n- If prediction latency is critical -> export to ONNX or use LightGBM's `predict_disable_shape_check` for faster inference.\\n- If reproducibility is mandatory -> pin `random_state` in the booster AND the data split, and record library version.\\n\\n## Anti-patterns\\n- NEVER set `n_estimators` to a fixed value without early stopping. This either underfits or overfits by construction.\\n- NEVER tune hyperparameters on the test set. Use a validation set or inner cross-validation; the test set is touched exactly once.\\n- NEVER compare frameworks with different preprocessing (e.g., one-hot for XGBoost but native categoricals for CatBoost) and call it a fair comparison.\\n- NEVER ignore the `best_iteration` attribute after early stopping. Predictions must use `best_iteration` to avoid including over-trained trees.\\n- NEVER rely solely on `feature_importances_` (gain-based) for feature selection. Gain importance is biased toward high-cardinality and correlated features.\\n\\n## Common mistakes\\n- Using `eval_metric` that does not match the business objective (e.g., `logloss` for early stopping but reporting `F1`).\\n- Forgetting to pass `categorical_feature` to LightGBM, causing it to treat integer-encoded categoricals as continuous.\\n- Setting `scale_pos_weight` AND applying SMOTE simultaneously, which double-corrects for imbalance.\\n- Running SHAP on the training set instead of the validation/test set, which inflates apparent feature relevance.\\n- Not setting `verbosity=0` or `verbose=-1` during hyperparameter search, flooding logs with thousands of training lines.\\n- Using `pickle` instead of the framework's native `.save_model()`, which breaks across library version upgrades.\\n\\n## Output contract\\n- Report best hyperparameters, best iteration number, and validation metric trajectory (or at minimum start/best/final values).\\n- Include the class-imbalance strategy used and its rationale.\\n- Provide SHAP summary plots or feature importance rankings with the method explicitly named.\\n- Never report train-only metrics as final performance. Always include validation or test metrics.\\n- Record the framework name and version (e.g., `xgboost==2.0.3`) in artifact metadata.\\n- If multiple frameworks were compared, include a side-by-side metric table with identical folds.\\n- Save the model in native format alongside a JSON metadata sidecar.\\n\\n## Composability hints\\n- Before this expert -> use the **Data Cleaning Expert** for null handling and type coercion. Gradient boosters handle NaNs natively (XGBoost, LightGBM) but benefit from clean categoricals.\\n- Before this expert -> use the **Scikit-learn Modeling Expert** if a quick linear baseline is needed for comparison.\\n- After this expert -> use the **Visualization Expert** to plot SHAP summaries, learning curves, or metric comparisons.\\n- After this expert -> use the **Machine Learning Export Expert** to convert the model to ONNX or package it for serving.\\n- Related -> the **Statistics Expert** for post-hoc significance tests when comparing model performance across folds.\"), score=0.6425158381462097, sparse_score=0.6425158381462097, dense_score=None)]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from skill_registry_rag.retriever import SkillRetriever\n", "\n", @@ -93,6 +258,31 @@ "hits # shows top 3 relevant expert cards" ] }, + { + "cell_type": "markdown", + "id": "de519191", + "metadata": {}, + "source": [ + "### Sample Output\n", + "\n", + "```text\n", + "Top-3 Retrieval Results:\n", + "\n", + "1. role.data-analyst\n", + " Title: Data Analyst Role Orchestrator\n", + " Score: 1.00\n", + " Provider Support: codex, claude\n", + "\n", + "2. docs.slides-pptx\n", + " Title: Slide Deck Creation (PPTX)\n", + " Score: 0.67\n", + "\n", + "3. ml.gradient-boosting\n", + " Title: Gradient Boosting with XGBoost and LightGBM\n", + " Score: 0.64\n", + "```" + ] + }, { "cell_type": "markdown", "id": "5c85573e", @@ -106,10 +296,49 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "47c4c7a6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "USER QUERY:\n", + "clean messy sales data and generate charts\n", + "\n", + "RETRIEVED CONTEXT:\n", + "# Data Analyst Role Expert\n", + "\n", + "Use this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\n", + "\n", + "## Allowed expert dependencies\n", + "\n", + "- `data.pandas-advanced`\n", + "- `data.sql-queries`\n", + "- `viz.matplotlib-seaborn`\n", + "- `ml.sklearn-modeling`\n", + "- `stats.scipy-statsmodels`\n", + "\n", + "## Execution behavior\n", + "\n", + "1. Start with a data quality audit:\n", + " nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\n", + "2. Normalize and clean data using reproducible transformations.\n", + "3. Produce concise EDA:\n", + " distributions, trends, segmentation, and relationship charts.\n", + "4. If prediction is requested, build a leakage-safe baseline model with validation metrics.\n", + "5. Explain findings in business terms:\n", + " what changed, how much, and what action is implied.\n", + "6. End with caveats and next steps.\n", + "\n", + "## Output contract\n", + "\n", + "- `profile_summary`: row/column counts, missingness, type issues, and anomalies.\n", + "- `eda_insights`: ranked insights \n" + ] + } + ], "source": [ "context = \"\\n\\n\".join(hit.card.instruction_text for hit in hits)\n", "\n", @@ -120,6 +349,40 @@ "print(context[:1000]) # prints first 1000 characters of combined instructions" ] }, + { + "cell_type": "markdown", + "id": "9f832f4c", + "metadata": {}, + "source": [ + "### Sample Output\n", + "\n", + "```text\n", + "USER QUERY:\n", + "clean messy sales data and generate charts\n", + "\n", + "RETRIEVED CONTEXT:\n", + "# Data Analyst Role Expert\n", + "\n", + "Use this role when the request needs end-to-end tabular analysis:\n", + "data profiling, cleaning, exploratory analysis, baseline modeling,\n", + "and clear visual communication.\n", + "\n", + "Allowed expert dependencies:\n", + "- data.pandas-advanced\n", + "- data.sql-queries\n", + "- viz.matplotlib-seaborn\n", + "- ml.sklearn-modeling\n", + "- stats.scipy-statsmodels\n", + "\n", + "Execution behavior:\n", + "1. Perform data quality audit\n", + "2. Normalize and clean data\n", + "3. Produce EDA with charts\n", + "4. Build baseline model if required\n", + "5. Explain findings in business terms\n", + "```" + ] + }, { "cell_type": "markdown", "id": "f232afa3", @@ -134,10 +397,70 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "08b5202d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Query: clean messy sales data\n", + "# Data Analyst Role Expert\n", + "\n", + "Use this role when the request needs end-to-end tabular analysis: data profiling, cleaning, exploratory analysis, baseline modeling, and clear visual communication.\n", + "\n", + "## Allowed expert dependencies\n", + "\n", + "- `data.pandas-advanced`\n", + "- `data.sql-queries`\n", + "- `viz.matplotlib-seaborn`\n", + "- `ml.sklearn-modeling`\n", + "- `stats.scipy-statsmodels`\n", + "\n", + "## Execution behavior\n", + "\n", + "1. Start with a data quality audit:\n", + " nulls, dtypes, duplicates, outliers, key integrity, and temporal coverage.\n", + "2. Normaliz\n", + "--------------------------------------------------\n", + "Query: generate charts for sales\n", + "# Slide Creation Expert (PPTX)\n", + "\n", + "Use this expert for executive summaries, project updates, and narrative decks.\n", + "\n", + "## Execution behavior\n", + "\n", + "1. Derive a slide storyline first (problem, analysis, findings, actions).\n", + "2. Allocate one key message per slide and keep text concise.\n", + "3. Generate PPTX using `python-pptx` with consistent templates.\n", + "4. Embed charts/tables as visuals instead of dense paragraphs.\n", + "5. Export final deck and register slide artifact with version/date.\n", + "\n", + "## Output contract\n", + "\n", + "- Include titl\n", + "--------------------------------------------------\n", + "Query: summarize findings\n", + "# Slide Creation Expert (PPTX)\n", + "\n", + "Use this expert for executive summaries, project updates, and narrative decks.\n", + "\n", + "## Execution behavior\n", + "\n", + "1. Derive a slide storyline first (problem, analysis, findings, actions).\n", + "2. Allocate one key message per slide and keep text concise.\n", + "3. Generate PPTX using `python-pptx` with consistent templates.\n", + "4. Embed charts/tables as visuals instead of dense paragraphs.\n", + "5. Export final deck and register slide artifact with version/date.\n", + "\n", + "## Output contract\n", + "\n", + "- Include titl\n", + "--------------------------------------------------\n" + ] + } + ], "source": [ "queries = [\n", " \"clean messy sales data\",\n", @@ -152,6 +475,65 @@ " print(context[:500])\n", " print(\"-\"*50)" ] + }, + { + "cell_type": "markdown", + "id": "bbabd1f3", + "metadata": {}, + "source": [ + "### Sample Output\n", + "\n", + "---\n", + "\n", + "## Query: clean messy sales data\n", + "\n", + "### Retrieved Expert: Data Analyst Role Expert\n", + "\n", + "Use this role when the request needs end-to-end tabular analysis:\n", + "data profiling, cleaning, exploratory analysis, baseline modeling,\n", + "and clear visual communication.\n", + "\n", + "#### Allowed Expert Dependencies\n", + "- data.pandas-advanced\n", + "- data.sql-queries\n", + "- viz.matplotlib-seaborn\n", + "- ml.sklearn-modeling\n", + "- stats.scipy-statsmodels\n", + "\n", + "#### Execution Behavior\n", + "1. Start with a data quality audit:\n", + " - Null values\n", + " - Data types\n", + " - Duplicates\n", + " - Outliers\n", + " - Key integrity\n", + " - Temporal coverage\n", + "2. Normalize and clean data\n", + "3. Produce exploratory data analysis (EDA)\n", + "4. Build baseline model if required\n", + "5. Explain findings in business terms\n", + "\n", + "---\n", + "\n", + "## Query: generate charts for sales\n", + "\n", + "### Retrieved Expert: Slide Creation Expert (PPTX)\n", + "\n", + "Use this expert for executive summaries, project updates,\n", + "and narrative decks.\n", + "\n", + "#### Execution Behavior\n", + "- Build structured slide decks\n", + "- Embed charts and visuals\n", + "- Export PPTX file\n", + "\n", + "#### Output Contract\n", + "- Title slide\n", + "- Evidence slides with charts\n", + "- Recommendation slide\n", + "\n", + "---" + ] } ], "metadata": {