Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,31 @@
## LLM models

- We recommmend 4-bit quantized Gemma 2b model, which can be Downloaded from [HuggingFace](https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF/blob/main/gemma-2b-it-q4_k_m.gguf).
- In practice, the above Gemma 2b model is too weak to generate accurate responses. We use Mistral-7B model from [ollama](https://github.com/ollama/ollama).

## LM Model

- We recommend a light BERT-like model all-MiniLM-L6-v2 to make sentence embedding, which can be obtained directly from [HuggingFace](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2).
- The embedding function during constructing the VectorStore database include GPT4AllEmbedding and HuggingFaceEmbedding.

## Retrieval Augmented Generation (RAG)
- RAG is utilized to refine the generation process for more accurate prediction and less hallucinated results.
- The implementation code referes the paper [Self-RAG: Learning to Retrieve, Generate and Critique through Self-Reflection](http://arxiv.org/abs/2310.11511)
- Self-RAG is a strategy for RAG that incorporates self-reflection(grading) on retrieved documents and generations. In the paper, a few decisions are made:
- Should I retrieve documents
- Input: `x (question)`, `y (generation)`
- Decides when to retrieve `D` chunks with `R`
- Output: `{yes, no, continue}`
- Are the retrieved passages `D` relevant to the question `x`
- Input: `x (question)`, `d(chunk)` for `d` in `D`
- `d` provides useful information to solve `x`
- output: `{relevant, irrelevant}`
- Are the LLM generation from each chunk in D is relevant to the chunk (hallucinations, etc) -
- Input: `x (question)`, `d (chunk)`, `y (generation)` for `d` in `D`
- All of the verification-worthy statements in y (generation) are supported by d
- Output: {fully supported, partially supported, no support
- The LLM generation from each chunk in D is a useful response to x (question) -
- Input: `x (question)`, `y (generation)` for `d` in `D`
- `y (generation)` is a useful response to `x (question)`.
- Output: `{yes, no}`

Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# -*- coding: ascii -*-

# RAG-enhanced llm pipeline for classification task in rel-movielens1M
# Paper: Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection https://arxiv.org/abs/2310.11511
# Title only: macro_f1: 0.251, micro_f1: 0.387
# Full info: macro_f1: 0.892, micro_f1: 0.884
# Runtime: Title only: 2990s; Full info: 6757s (on a single 6G GPU)
# Cost: Title only: $0.2722; Full info: $0.5996
# Description: Give llm movie name and limited genres, relevant documents are retrieved from wikipedia database to assist llm in predicting the genres of movies. We introduce self-rag to critique the retrieval and generation with critique tokens.
# Usage: python rag-movielens1m_clf.py --prompt title/all

# Append rllm to search path
import sys
sys.path.append("../../../../")
import time
import argparse

import pandas as pd

from tqdm import tqdm
from sklearn.preprocessing import MultiLabelBinarizer

from langchain_community.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain.schema import BaseOutputParser
from itertools import islice
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.embeddings import HuggingFaceEmbeddings, GPT4AllEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from rllm.utils import macro_f1_score, micro_f1_score, get_llm_chat_cost
from rllm.selfrag_func import self_rag
##### Parse argument
parser = argparse.ArgumentParser()
parser.add_argument('--prompt', choices=['title', 'all'],
default='title', help='Choose prompt type.')
args = parser.parse_args()

##### Start time
time_start = time.time()

##### Global variables
total_cost = 0
test_path = "/home/qinghua_mao/work/rllm/rllm/datasets/rel-movielens1m/classification/movies/test.csv"

def parse(text: str):
"""Parse the output of LLM call."""
genres = text.split('::')[-1]
genre_list = [genre.strip() for genre in genres.split(',')]
return genre_list

class GenreOutputParser(BaseOutputParser):
"""Parse the output of LLM to a genre list"""

def parse(self, text: str):
"""Parse the output of LLM call."""
genres = text.split('::')[-1]
genre_list = [genre.strip() for genre in genres.split(',')]
return genre_list

output_parser = GenreOutputParser()

# Load documents from persist directory of vectorstore.
model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf"
gpt4all_kwargs = {'allow_download': 'True'}
embeddings = GPT4AllEmbeddings(
model_name = model_name,
gpt4all_kwargs = gpt4all_kwargs
)
vectorstore = Chroma(persist_directory="/home/qinghua_mao/work/rllm/chroma_wiki", collection_name="rag-chroma", embedding_function=embeddings)

# retrieve and generate using the relevant snippets of the blog
retriever = vectorstore.as_retriever()

##### 2. LLM prediction
movie_df = pd.read_csv(test_path)

pred_genre_list = []
if args.prompt == 'title':
for index, row in tqdm(islice(movie_df.iterrows(),300), total=min(len(movie_df),300), desc="Processing Movies"):
pred, prompt_cost = self_rag(movie_name=row['Title'], prompt="title", retriever=retriever)
total_cost = total_cost + prompt_cost
pred_genre_list.append(parse(pred))

total_cost = total_cost + get_llm_chat_cost(','.join(parse(pred)), 'output')
else:
for index, row in tqdm(islice(movie_df.iterrows(),300), total=min(len(movie_df),300), desc="Processing Movies"):

pred, prompt_cost = self_rag_all(prompt="all", retriever=retriever, Title=row['Title'], Director=row['Director'], Year=row['Year'],
Genre=row['Genre'], Cast=row['Cast'], Runtime=row['Runtime'],
Languages=row['Languages'], Certificate=row['Certificate'],
Plot=row['Plot'])
total_cost = total_cost + prompt_cost
pred_genre_list.append(parse(pred))

total_cost = total_cost + get_llm_chat_cost(','.join(parse(pred)), 'output')

##### 3. Calculate macro f1 score
# Get all genres
movie_genres = movie_df["Genre"].iloc[:300].str.split("|")
all_genres = list(set([genre for genres in movie_genres for genre in genres]))

mlb = MultiLabelBinarizer(classes=all_genres)
real_genres_matrix = mlb.fit_transform(movie_genres)
pred_genres_matrix = mlb.fit_transform(pred_genre_list)
macro_f1 = macro_f1_score(real_genres_matrix, pred_genres_matrix)
micro_f1 = micro_f1_score(real_genres_matrix, pred_genres_matrix)

##### End time
time_end = time.time()

print(f"macro_f1: {macro_f1}")
print(f"micro_f1: {micro_f1}")
print(f"Total time: {time_end - time_start}s")
print(f"Total USD$: {total_cost}")
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# -*- coding: ascii -*-

# RAG-enhanced llm pipeline for classification task in rel-movielens1M
# Paper: Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection https://arxiv.org/abs/2310.11511
# Title only: macro_f1: 0.251, micro_f1: 0.387
# Full info: macro_f1: 0.892, micro_f1: 0.884
# Runtime: Title only: 2990s; Full info: 6757s (on a single 6G GPU)
# Cost: Title only: $0.2722; Full info: $0.5996
# Description: Give llm movie name and limited genres, relevant documents are retrieved from wikipedia database to assist llm in predicting the genres of movies. We introduce self-rag to critique the retrieval and generation with critique tokens.
# Usage: python rag-movielens1m_clf.py --prompt title/all

# Append rllm to search path
import sys
sys.path.append("../../../../")
import time
import argparse

import pandas as pd

from tqdm import tqdm
from sklearn.preprocessing import MultiLabelBinarizer

from langchain_community.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain.schema import BaseOutputParser
from itertools import islice
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.embeddings import HuggingFaceEmbeddings, GPT4AllEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from rllm.utils import macro_f1_score, micro_f1_score, get_llm_chat_cost
from rllm.selfrag_func import self_rag
from rllm.selfrag_func_all import self_rag_all
##### Parse argument
parser = argparse.ArgumentParser()
parser.add_argument('--prompt', choices=['title', 'all'],
default='title', help='Choose prompt type.')
args = parser.parse_args()

##### Start time
time_start = time.time()

##### Global variables
total_cost = 0
test_path = "/home/qinghua_mao/work/rllm/rllm/datasets/rel-movielens1m/classification/movies/test.csv"

def parse(text: str):
"""Parse the output of LLM call."""
genres = text.split('::')[-1]
genre_list = [genre.strip() for genre in genres.split(',')]
return genre_list

class GenreOutputParser(BaseOutputParser):
"""Parse the output of LLM to a genre list"""

def parse(self, text: str):
"""Parse the output of LLM call."""
genres = text.split('::')[-1]
genre_list = [genre.strip() for genre in genres.split(',')]
return genre_list

output_parser = GenreOutputParser()

# Load documents from persist directory of vectorstore.
model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf"
gpt4all_kwargs = {'allow_download': 'True'}
embeddings = GPT4AllEmbeddings(
model_name = model_name,
gpt4all_kwargs = gpt4all_kwargs
)
vectorstore = Chroma(persist_directory="/home/qinghua_mao/work/rllm/chroma_wiki", collection_name="rag-chroma", embedding_function=embeddings)

# retrieve and generate using the relevant snippets of the blog
retriever = vectorstore.as_retriever()

##### 2. LLM prediction
movie_df = pd.read_csv(test_path)

pred_genre_list = []
if args.prompt == 'title':
for index, row in tqdm(islice(movie_df.iterrows(),5), total=min(len(movie_df),5), desc="Processing Movies"):
pred, prompt_cost = self_rag(movie_name=row['Title'], prompt="title", retriever=retriever)
total_cost = total_cost + prompt_cost
pred_genre_list.append(parse(pred))

total_cost = total_cost + get_llm_chat_cost(','.join(parse(pred)), 'output')
else:
for index, row in tqdm(islice(movie_df.iterrows(),5), total=min(len(movie_df),5), desc="Processing Movies"):

pred, prompt_cost = self_rag_all(prompt="all", retriever=retriever, Title=row['Title'], Director=row['Director'], Year=row['Year'],
Genre=row['Genre'], Cast=row['Cast'], Runtime=row['Runtime'],
Languages=row['Languages'], Certificate=row['Certificate'],
Plot=row['Plot'])
total_cost = total_cost + prompt_cost
pred_genre_list.append(parse(pred))

total_cost = total_cost + get_llm_chat_cost(','.join(parse(pred)), 'output')

##### 3. Calculate macro f1 score
# Get all genres
movie_genres = movie_df["Genre"].iloc[:5].str.split("|")
all_genres = list(set([genre for genres in movie_genres for genre in genres]))

mlb = MultiLabelBinarizer(classes=all_genres)
real_genres_matrix = mlb.fit_transform(movie_genres)
pred_genres_matrix = mlb.fit_transform(pred_genre_list)
macro_f1 = macro_f1_score(real_genres_matrix, pred_genres_matrix)
micro_f1 = micro_f1_score(real_genres_matrix, pred_genres_matrix)

##### End time
time_end = time.time()

print(f"macro_f1: {macro_f1}")
print(f"micro_f1: {micro_f1}")
print(f"Total time: {time_end - time_start}s")
print(f"Total USD$: {total_cost}")
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@
from langchain.prompts import PromptTemplate
from langchain.schema import BaseOutputParser

import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from rllm.utils import macro_f1_score, micro_f1_score, get_llm_chat_cost

##### Parse argument
Expand All @@ -37,8 +45,23 @@

##### Global variables
total_cost = 0
test_path = "your/test_file/path"
llm_model_path = "your/llm/path"
test_path = "/home/qinghua_mao/work/rllm/rllm/datasets/rel-movielens1m/classification/movies/test.csv"
llm_model_path = "/home/qinghua_mao/work/rllm/gemma-2b-it-GGUF/gemma-2b-it-q4_k_m.gguf"
embed_path = "/home/qinghua_mao/work/rllm/all-MiniLM-L6-v2"

from langchain.embeddings.base import Embeddings
from sentence_transformers import SentenceTransformer
from typing import List

class CustomEmbeddings(Embeddings):
def __init__(self, model_name: str):
self.model = SentenceTransformer(model_name)

def embed_documents(self, documents: List[str]) -> List[List[float]]:
return [self.model.encode(d).tolist() for d in documents]

def embed_query(self, query: str) -> List[float]:
return self.model.encode([query])[0].tolist()

##### 1. Construct LLM chain
# Load model
Expand Down Expand Up @@ -104,6 +127,7 @@ def parse(self, text: str):
total_cost = total_cost + get_llm_chat_cost(prompt_title_template.invoke({"movie_name": row['Title']}).text, 'input')

pred = chain.invoke({"movie_name": row['Title']})
print(pred)
pred_genre_list.append(pred)

total_cost = total_cost + get_llm_chat_cost(','.join(pred), 'output')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,25 @@

##### Global variables
total_cost = 0
train_path = "your/train_file/path"
movie_path = "your/movie_file/path"
test_path = "your/test_file/path"
llm_model_path = "your/llm/path"
train_path = "/home/qinghua_mao/work/rllm/rllm/datasets/rel-movielens1m/regression/ratings/train.csv"
movie_path = "/home/qinghua_mao/work/rllm/rllm/datasets/rel-movielens1m/regression/movies.csv"
test_path = "/home/qinghua_mao/work/rllm/rllm/datasets/rel-movielens1m/regression/ratings/test.csv"
llm_model_path = "/home/qinghua_mao/work/rllm/gemma-2b-it-GGUF/gemma-2b-it-q4_k_m.gguf"
embed_path = "/home/qinghua_mao/work/rllm/all-MiniLM-L6-v2"

from langchain.embeddings.base import Embeddings
from sentence_transformers import SentenceTransformer
from typing import List

class CustomEmbeddings(Embeddings):
def __init__(self, model_name: str):
self.model = SentenceTransformer(model_name)

def embed_documents(self, documents: List[str]) -> List[List[float]]:
return [self.model.encode(d).tolist() for d in documents]

def embed_query(self, query: str) -> List[float]:
return self.model.encode([query])[0].tolist()

##### 1. Construct LLM chain
# Load model
Expand Down
Loading