Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions samples/ml/ml_jobs/llm_finetune/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,13 @@ python scripts/run_eval.py \
--schema PUBLIC
```

The evaluation script:
- Retrieves the trained model checkpoint from the training job's stage
- Generates SOAP notes for each test example using the fine-tuned model
- Uses an LLM-as-judge approach (Qwen3-8B) to compare predictions against ground truth
- Reports pass/fail accuracy for each SOAP section


### Step 4: Log, deploy & test model

After training and eval, log the model and deploy as a service. This service can be used for rest API as well.
Expand All @@ -212,12 +219,6 @@ python scripts/run_log_n_deploy_model.py \

Replace `<TRAIN_JOB_ID>` with the job ID from the training step (e.g., `LLM_DEMO.PUBLIC.ARCTIC_TRAINING_XXXXXXX`).

The evaluation script:
- Retrieves the trained model checkpoint from the training job's stage
- Generates SOAP notes for each test example using the fine-tuned model
- Uses an LLM-as-judge approach (Qwen3-8B) to compare predictions against ground truth
- Reports pass/fail accuracy for each SOAP section

## Evaluation Notes

The evaluation script uses an LLM-as-judge approach to assess the quality of generated SOAP notes. For each test example, the fine-tuned model generates predictions which are then compared against ground truth by a larger judge model (Qwen3-8B). This provides more nuanced evaluation than simple text matching, accounting for valid paraphrasing and semantic equivalence.
Expand Down
3 changes: 2 additions & 1 deletion samples/ml/ml_jobs/llm_finetune/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
snowflake-ml-python>=1.21.0
datasets
datasets
peft
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
schema=args.schema,
external_access_integrations=args.external_access_integrations,
session=session,
pip_requirements=["transformers", "snowflake-ml-python"],
pip_requirements=["transformers", "snowflake-ml-python>=1.26", "peft"],
)

print(f"Job submitted with ID: {job.id}")
Expand Down
45 changes: 34 additions & 11 deletions samples/ml/ml_jobs/llm_finetune/src/log_n_deploy_model.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import argparse
import logging
from pathlib import Path
import transformers
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from typing import Optional
import pandas as pd

Expand Down Expand Up @@ -114,23 +115,45 @@ def main():


# --------- 1) Log model ----------
if args.lora_path:
raise ValueError("LoRA path is not supported yet")

version_name = "lora" if args.lora_path else "base"
model_registry = Registry(session=session)
try:
mv = model_registry.get_model(args.model_name).version("v1")
mv = model_registry.get_model(args.model_name).version(version_name)
except Exception as e:
# Resolve checkpoint paths to latest global_step if applicable
model_path = resolve_path_global_step(resolve_stage_path(session, args.model_name_or_path))
logger.info(f"Model {args.model_name} not found, logging to model registry")
logger.info(f"Hydrating model from {model_path}")
pipe = transformers.pipeline("text-generation", model=model_path)
logger.info(f"Hydrated model {args.model_name} from {model_path}, logging to model registry")
if args.lora_path:
# 1. Load the base model
logger.info(f"Loading base model from {args.model_name_or_path}")
base_model = AutoModelForCausalLM.from_pretrained(
args.model_name_or_path,
torch_dtype="auto",
device_map="auto"
)
# 2. Load the LoRA adapter on top of it
lora_path = resolve_path_global_step(resolve_stage_path(session, args.lora_path))
logger.info(f"Loading LoRA adapter from {lora_path}")
model = PeftModel.from_pretrained(base_model, lora_path)
# 3. Merge LoRA weights into base model and unload adapter
fused_model = model.merge_and_unload()
logger.info(f"Merged LoRA weights into base model")

# Create pipeline with merged model
tokenizer = AutoTokenizer.from_pretrained(lora_path)
logger.info(f"Creating transformer pipeline with merged model")
pipe = pipeline("text-generation", model=fused_model, tokenizer=tokenizer)
logger.info(f"Created transformer pipeline with merged model")
else:
model_path = resolve_path_global_step(resolve_stage_path(session, args.model_name_or_path))
logger.info(f"Model {args.model_name} not found, logging to model registry")
logger.info(f"Hydrating model from {model_path}")
pipe = pipeline("text-generation", model=model_path)
logger.info(f"Created transformer pipeline with model")

logger.info(f"Logging hydrated model {args.model_name} version {version_name} to model registry")
mv = model_registry.log_model(
pipe,
model_name=args.model_name,
version_name="v1",
version_name=version_name,
signatures=openai_signatures.OPENAI_CHAT_SIGNATURE)
logger.info(f"Model {args.model_name} version {mv.version_name} logged successfully")

Expand Down