From 18570ba29641537ec80f3b0e2300ae4c9c919bd3 Mon Sep 17 00:00:00 2001
From: Vee <vsyengo@middlebury.edu>
Date: Thu, 1 May 2025 18:05:39 -0400
Subject: [PATCH 1/2] reorganized files and implemented VADER and BilSTM, both
 of which need more work

---
 test_data.csv => data/test_data.csv           |  0
 train_data.csv => data/train_data.csv         |  0
 train_test.py => data/train_test.py           |  0
 .../twitter_sentiment_data.csv                |  0
 evaluate.py => models/BERT/evaluate.py        |  4 +-
 model.py => models/BERT/model.py              | 52 ----------------
 .../BERT/run_training.py                      |  2 +-
 train.py => models/BERT/train.py              |  2 +-
 models/BERT/utils.py                          | 53 ++++++++++++++++
 models/BilSTM/dataset.py                      | 16 +++++
 models/BilSTM/evaluate.py                     | 62 +++++++++++++++++++
 models/BilSTM/main.py                         | 59 ++++++++++++++++++
 models/BilSTM/model.py                        | 24 +++++++
 models/BilSTM/train.py                        | 24 +++++++
 models/BilSTM/utils.py                        | 20 ++++++
 models/VADER/vader.py                         | 51 +++++++++++++++
 16 files changed, 313 insertions(+), 56 deletions(-)
 rename test_data.csv => data/test_data.csv (100%)
 rename train_data.csv => data/train_data.csv (100%)
 rename train_test.py => data/train_test.py (100%)
 rename twitter_sentiment_data.csv => data/twitter_sentiment_data.csv (100%)
 rename evaluate.py => models/BERT/evaluate.py (97%)
 rename model.py => models/BERT/model.py (51%)
 rename run_training.py => models/BERT/run_training.py (83%)
 rename train.py => models/BERT/train.py (96%)
 create mode 100644 models/BERT/utils.py
 create mode 100644 models/BilSTM/dataset.py
 create mode 100644 models/BilSTM/evaluate.py
 create mode 100644 models/BilSTM/main.py
 create mode 100644 models/BilSTM/model.py
 create mode 100644 models/BilSTM/train.py
 create mode 100644 models/BilSTM/utils.py
 create mode 100644 models/VADER/vader.py

diff --git a/test_data.csv b/data/test_data.csv
similarity index 100%
rename from test_data.csv
rename to data/test_data.csv
diff --git a/train_data.csv b/data/train_data.csv
similarity index 100%
rename from train_data.csv
rename to data/train_data.csv
diff --git a/train_test.py b/data/train_test.py
similarity index 100%
rename from train_test.py
rename to data/train_test.py
diff --git a/twitter_sentiment_data.csv b/data/twitter_sentiment_data.csv
similarity index 100%
rename from twitter_sentiment_data.csv
rename to data/twitter_sentiment_data.csv
diff --git a/evaluate.py b/models/BERT/evaluate.py
similarity index 97%
rename from evaluate.py
rename to models/BERT/evaluate.py
index b2030ef..7c07f32 100644
--- a/evaluate.py
+++ b/models/BERT/evaluate.py
@@ -1,11 +1,11 @@
 import pandas as pd
-import torch
+import torch 
 import numpy as np
 from transformers import AutoTokenizer
 from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
 import matplotlib.pyplot as plt
 import seaborn as sns
-from model import ClimateModel
+from NLP_climate_analysis.models.BERT.model import ClimateModel
 
 def load_model(model_path):
     """Load the saved model"""
diff --git a/model.py b/models/BERT/model.py
similarity index 51%
rename from model.py
rename to models/BERT/model.py
index ba0ba00..abc207b 100644
--- a/model.py
+++ b/models/BERT/model.py
@@ -50,59 +50,7 @@ def __getitem__(self, idx):
     def __len__(self):
         return len(self.labels)
 
-# Function to compute metrics for evaluation
-def compute_metrics(eval_pred):
-    from sklearn.metrics import accuracy_score, precision_recall_fscore_support
-    
-    predictions, labels = eval_pred
-    predictions = predictions.argmax(axis=1)
-    
-    accuracy = accuracy_score(labels, predictions)
-    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
-    
-    return {
-        'accuracy': accuracy,
-        'precision': precision,
-        'recall': recall,
-        'f1': f1
-    }
 
-# Training arguments for the Trainer
-def get_training_args(output_dir='./results'):
-    from transformers import TrainingArguments
-    
-    return TrainingArguments(
-        output_dir=output_dir,
-        eval_strategy='epoch',
-        save_strategy='epoch',
-        learning_rate=2e-5,
-        per_device_train_batch_size=16,
-        per_device_eval_batch_size=32,
-        num_train_epochs=3,
-        weight_decay=0.01,
-        load_best_model_at_end=True,
-        metric_for_best_model='accuracy',
-        logging_dir='./logs',
-        logging_steps=10
-    )
 
-# Prediction function to use after training
-def predict_sentiment(model, tokenizer, texts):
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    model.to(device)
-    model.eval()
 
-    encodings = tokenizer(texts, padding=True, truncation=True, max_length=128, return_tensors='pt')
-    encodings = {key: val.to(device) for key, val in encodings.items()}
-
-    with torch.no_grad():
-        # Pass only input_ids and attention_mask for prediction
-        outputs = model(input_ids=encodings['input_ids'], attention_mask=encodings['attention_mask'])
-        # The model should return only logits when labels are not provided
-        logits = outputs
-
-    predictions = torch.argmax(logits, dim=1).cpu().numpy()
-    # Ensure the sentiment map covers all possible prediction indices (0, 1, 2)
-    sentiment_map = {0: 'anti', 1: 'neutral', 2: 'pro'}
-    return [sentiment_map[pred] for pred in predictions]
 
diff --git a/run_training.py b/models/BERT/run_training.py
similarity index 83%
rename from run_training.py
rename to models/BERT/run_training.py
index 4744b6c..8ee4a05 100644
--- a/run_training.py
+++ b/models/BERT/run_training.py
@@ -6,7 +6,7 @@
 
 # Import the training script and run it
 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-from train import main
+from NLP_climate_analysis.models.BERT.train import main
 
 # Run the main function
 if __name__ == "__main__":
diff --git a/train.py b/models/BERT/train.py
similarity index 96%
rename from train.py
rename to models/BERT/train.py
index ef18b37..3399215 100644
--- a/train.py
+++ b/models/BERT/train.py
@@ -2,7 +2,7 @@
 import torch
 from transformers import AutoTokenizer, Trainer
 from sklearn.metrics import classification_report
-from model import ClimateModel, ClimateDataset, compute_metrics, get_training_args, predict_sentiment
+from NLP_climate_analysis.models.BERT.model import ClimateModel, ClimateDataset, compute_metrics, get_training_args, predict_sentiment
 
 def main():
     # Check for CUDA
diff --git a/models/BERT/utils.py b/models/BERT/utils.py
new file mode 100644
index 0000000..2fe869c
--- /dev/null
+++ b/models/BERT/utils.py
@@ -0,0 +1,53 @@
+import torch
+from sklearn.metrics import accuracy_score, precision_recall_fscore_support
+from transformers import TrainingArguments
+
+# Function to compute metrics for evaluation
+def compute_metrics(eval_pred):
+    predictions, labels = eval_pred
+    predictions = predictions.argmax(axis=1)
+    accuracy = accuracy_score(labels, predictions)
+    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
+    return {
+        'accuracy': accuracy,
+        'precision': precision,
+        'recall': recall,
+        'f1': f1
+    }
+
+# Training arguments for the Trainer
+def get_training_args(output_dir='./results'):
+    return TrainingArguments(
+        output_dir=output_dir,
+        evaluation_strategy='epoch',
+        save_strategy='epoch',
+        learning_rate=2e-5,
+        per_device_train_batch_size=16,
+        per_device_eval_batch_size=32,
+        num_train_epochs=3,
+        weight_decay=0.01,
+        load_best_model_at_end=True,
+        metric_for_best_model='accuracy',
+        logging_dir='./logs',
+        logging_steps=10
+    )
+
+# Prediction function to use after training
+def predict_sentiment(model, tokenizer, texts):
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    model.to(device)
+    model.eval()
+
+    encodings = tokenizer(texts, padding=True, truncation=True, max_length=128, return_tensors='pt')
+    encodings = {key: val.to(device) for key, val in encodings.items()}
+
+    with torch.no_grad():
+        # Pass only input_ids and attention_mask for prediction
+        outputs = model(input_ids=encodings['input_ids'], attention_mask=encodings['attention_mask'])
+        # The model should return only logits when labels are not provided
+        logits = outputs
+
+    predictions = torch.argmax(logits, dim=1).cpu().numpy()
+    # Ensure the sentiment map covers all possible prediction indices (0, 1, 2)
+    sentiment_map = {0: 'anti', 1: 'neutral', 2: 'pro'}
+    return [sentiment_map[pred] for pred in predictions]
\ No newline at end of file
diff --git a/models/BilSTM/dataset.py b/models/BilSTM/dataset.py
new file mode 100644
index 0000000..bbe2aa2
--- /dev/null
+++ b/models/BilSTM/dataset.py
@@ -0,0 +1,16 @@
+import torch
+from torch.utils.data import Dataset
+
+class TweetDataset(Dataset):
+    """
+    Custom PyTorch dataset for sentiment classification.
+    """
+    def __init__(self, inputs, labels):
+        self.inputs = torch.tensor(inputs, dtype=torch.long)
+        self.labels = torch.tensor(labels, dtype=torch.long)
+
+    def __len__(self):
+        return len(self.labels)
+
+    def __getitem__(self, idx):
+        return self.inputs[idx], self.labels[idx]
diff --git a/models/BilSTM/evaluate.py b/models/BilSTM/evaluate.py
new file mode 100644
index 0000000..8915ad7
--- /dev/null
+++ b/models/BilSTM/evaluate.py
@@ -0,0 +1,62 @@
+import torch
+from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+def predict(model, tokenizer, text_list, word_index, embedding_dim=100, max_len=100, device='cpu'):
+    """
+    Predict sentiment for a list of input texts using the trained BiLSTM model.
+    Returns the predicted sentiment labels.
+    """
+    from keras.preprocessing.sequence import pad_sequences # type: ignore
+    from keras.preprocessing.text import Tokenizer # type: ignore
+
+    model.eval()
+    model.to(device)
+
+    # Use the same tokenizer that was fit on training data
+    sequences = tokenizer.texts_to_sequences(text_list)
+    padded = pad_sequences(sequences, maxlen=max_len)
+    inputs = torch.tensor(padded, dtype=torch.long).to(device)
+
+    with torch.no_grad():
+        outputs = model(inputs)
+        preds = torch.argmax(outputs, dim=1).cpu().numpy()
+
+    label_map = {0: 'anti', 1: 'neutral', 2: 'pro'}
+    return [label_map[p] for p in preds]
+
+def evaluate_model(model, test_loader, device, label_names):
+    """
+    Evaluate the model on the test set and print metrics.
+    """
+    model.eval()
+    all_preds = []
+    all_labels = []
+
+    with torch.no_grad():
+        for inputs, labels in test_loader:
+            inputs = inputs.to(device)
+            outputs = model(inputs)
+            preds = torch.argmax(outputs, dim=1).cpu().numpy()
+            all_preds.extend(preds)
+            all_labels.extend(labels.numpy())
+
+    print("\nClassification Report:")
+    print(classification_report(all_labels, all_preds, target_names=label_names))
+    print(f"Accuracy: {accuracy_score(all_labels, all_preds):.4f}")
+
+    cm = confusion_matrix(all_labels, all_preds)
+    plot_confusion_matrix(cm, label_names)
+
+
+def plot_confusion_matrix(cm, class_names):
+    plt.figure(figsize=(8, 6))
+    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
+                xticklabels=class_names, yticklabels=class_names)
+    plt.xlabel("Predicted")
+    plt.ylabel("True")
+    plt.title("Confusion Matrix")
+    plt.tight_layout()
+    plt.savefig("bilstm_confusion_matrix.png")
+    plt.close()
diff --git a/models/BilSTM/main.py b/models/BilSTM/main.py
new file mode 100644
index 0000000..f0fca36
--- /dev/null
+++ b/models/BilSTM/main.py
@@ -0,0 +1,59 @@
+import pandas as pd
+import torch
+import numpy as np
+from tensorflow.keras.preprocessing.text import Tokenizer # type: ignore
+from tensorflow.keras.preprocessing.sequence import pad_sequences # type: ignore
+from torch.utils.data import DataLoader
+import torch.nn as nn
+import torch.optim as optim
+
+from BilSTM.dataset import TweetDataset
+from BilSTM.model import BiLSTMModel
+from BilSTM.train import train_model
+from BilSTM.evaluate import evaluate_model
+from BilSTM.utils import load_glove_embeddings
+
+def main():
+    MAX_LEN = 100
+    BATCH_SIZE = 64
+    EMBEDDING_DIM = 100
+    NUM_EPOCHS = 5
+    GLOVE_PATH = "embeddings/glove.6B.100d.txt"
+    CLASS_NAMES = ['anti', 'neutral', 'pro']
+
+    # Load data
+    train_df = pd.read_csv("data/train_data.csv")
+    test_df = pd.read_csv("data/test_data.csv")
+
+    tokenizer = Tokenizer()
+    tokenizer.fit_on_texts(train_df['message'])
+    word_index = tokenizer.word_index
+
+    X_train = pad_sequences(tokenizer.texts_to_sequences(train_df['message']), maxlen=MAX_LEN)
+    X_test = pad_sequences(tokenizer.texts_to_sequences(test_df['message']), maxlen=MAX_LEN)
+    y_train = train_df['sentiment'].values
+    y_test = test_df['sentiment'].values
+
+    # Load embeddings
+    embedding_matrix = load_glove_embeddings(GLOVE_PATH, word_index, EMBEDDING_DIM)
+
+    # Model and training setup
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = BiLSTMModel(embedding_matrix).to(device)
+
+    train_loader = DataLoader(TweetDataset(X_train, y_train), batch_size=BATCH_SIZE, shuffle=True)
+    test_loader = DataLoader(TweetDataset(X_test, y_test), batch_size=BATCH_SIZE)
+
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), lr=1e-3)
+
+    # Train loop
+    for epoch in range(NUM_EPOCHS):
+        loss = train_model(model, train_loader, optimizer, criterion, device)
+        print(f"Epoch {epoch+1}/{NUM_EPOCHS}, Loss: {loss:.4f}")
+
+    # Evaluate
+    evaluate_model(model, test_loader, device, CLASS_NAMES)
+
+if __name__ == "__main__":
+    main()
diff --git a/models/BilSTM/model.py b/models/BilSTM/model.py
new file mode 100644
index 0000000..13f8366
--- /dev/null
+++ b/models/BilSTM/model.py
@@ -0,0 +1,24 @@
+import torch
+import torch.nn as nn
+
+class BiLSTMModel(nn.Module):
+    """
+    BiLSTM model using pre-trained embeddings.
+    """
+    def __init__(self, embedding_matrix, hidden_dim=128, output_dim=3, dropout=0.5):
+        super(BiLSTMModel, self).__init__()
+        vocab_size, embedding_dim = embedding_matrix.shape
+        self.embedding = nn.Embedding(vocab_size, embedding_dim)
+        self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
+        self.embedding.weight.requires_grad = False  # Freeze embeddings
+
+        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
+        self.dropout = nn.Dropout(dropout)
+        self.fc = nn.Linear(hidden_dim * 2, output_dim)
+
+    def forward(self, x):
+        embedded = self.embedding(x)
+        lstm_out, _ = self.lstm(embedded)
+        pooled = torch.mean(lstm_out, dim=1)
+        output = self.fc(self.dropout(pooled))
+        return output
diff --git a/models/BilSTM/train.py b/models/BilSTM/train.py
new file mode 100644
index 0000000..c0e7baf
--- /dev/null
+++ b/models/BilSTM/train.py
@@ -0,0 +1,24 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+def train_model(model, train_loader, optimizer, device):
+    """
+    Train the BiLSTM model on the training set using CrossEntropyLoss.
+    """
+    model.train()
+    total_loss = 0
+    criterion = nn.CrossEntropyLoss()
+
+    for inputs, labels in train_loader:
+        inputs, labels = inputs.to(device), labels.to(device)
+
+        optimizer.zero_grad()
+        outputs = model(inputs)  # Raw logits
+        loss = criterion(outputs, labels)  
+        loss.backward()
+        optimizer.step()
+
+        total_loss += loss.item()
+
+    return total_loss / len(train_loader)
diff --git a/models/BilSTM/utils.py b/models/BilSTM/utils.py
new file mode 100644
index 0000000..7fb2f22
--- /dev/null
+++ b/models/BilSTM/utils.py
@@ -0,0 +1,20 @@
+import numpy as np
+
+def load_glove_embeddings(glove_path, word_index, embedding_dim=100):
+    """
+    Loads GloVe embeddings into an embedding matrix aligned with the tokenizer word index.
+    """
+    embeddings_index = {}
+    with open(glove_path, 'r', encoding='utf8') as f:
+        for line in f:
+            values = line.split()
+            word = values[0]
+            vector = np.asarray(values[1:], dtype='float32')
+            embeddings_index[word] = vector
+
+    embedding_matrix = np.zeros((len(word_index) + 1, embedding_dim))
+    for word, i in word_index.items():
+        vec = embeddings_index.get(word)
+        if vec is not None:
+            embedding_matrix[i] = vec
+    return embedding_matrix
diff --git a/models/VADER/vader.py b/models/VADER/vader.py
new file mode 100644
index 0000000..665333a
--- /dev/null
+++ b/models/VADER/vader.py
@@ -0,0 +1,51 @@
+import os
+import nltk
+import pandas as pd
+from nltk.sentiment import SentimentIntensityAnalyzer
+from sklearn.metrics import classification_report
+
+# Setup
+nltk.download('vader_lexicon')
+script_dir = os.path.dirname(os.path.abspath(__file__))
+file_path = os.path.join(script_dir, 'test_data.csv')
+df = pd.read_csv(file_path)
+
+# Initialize VADER
+sia = SentimentIntensityAnalyzer()
+
+# Label mapping
+label_map = {-1: 'anti', 0: 'neutral', 1: 'pro'}
+df['true_sentiment'] = df['sentiment'].map(label_map)
+
+def get_vader_sentiment(text):
+    score = sia.polarity_scores(text)['compound']
+    if score > 0:
+        return 'pro'
+    elif score < 0:
+        return 'anti'
+    else:
+        return 'neutral'
+
+# Apply sentiment classification
+df['vader_sentiment'] = df['message'].astype(str).apply(get_vader_sentiment)
+
+# Ensure clean string types
+df['true_sentiment'] = df['true_sentiment'].astype(str)
+df['vader_sentiment'] = df['vader_sentiment'].astype(str)
+
+# Evaluation
+print("Classification Report")
+print(classification_report(
+    df['true_sentiment'],
+    df['vader_sentiment'],
+    labels=['anti', 'neutral', 'pro'],
+    target_names=['anti', 'neutral', 'pro']
+))
+
+print(df['true_sentiment'].value_counts())
+
+
+# Print common misclassifications
+print("\nTop Misclassified Examples:")
+misclassified = df[df['true_sentiment'] != df['vader_sentiment']]
+print(misclassified[['message', 'true_sentiment', 'vader_sentiment']].sample(10, random_state=42))

From 7acbfa6b2315f6f6c9999c9f4f01f7c031f14a39 Mon Sep 17 00:00:00 2001
From: nedcut <nedcut@gmail.com>
Date: Thu, 1 May 2025 22:18:35 -0400
Subject: [PATCH 2/2] fixing merge

---
 models/BERT/evaluate.py     | 86 -------------------------------------
 models/BERT/run_training.py | 13 ------
 requirements.txt            |  4 +-
 3 files changed, 3 insertions(+), 100 deletions(-)
 delete mode 100644 models/BERT/evaluate.py
 delete mode 100644 models/BERT/run_training.py

diff --git a/models/BERT/evaluate.py b/models/BERT/evaluate.py
deleted file mode 100644
index 7c07f32..0000000
--- a/models/BERT/evaluate.py
+++ /dev/null
@@ -1,86 +0,0 @@
-import pandas as pd
-import torch 
-import numpy as np
-from transformers import AutoTokenizer
-from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
-import matplotlib.pyplot as plt
-import seaborn as sns
-from NLP_climate_analysis.models.BERT.model import ClimateModel
-
-def load_model(model_path):
-    """Load the saved model"""
-    model = ClimateModel()
-    model.load_state_dict(torch.load(f"{model_path}/pytorch_model.bin", map_location=torch.device('cpu')))
-    return model
-
-def predict(model, tokenizer, texts, device='cpu'):
-    """Run prediction on a list of texts"""
-    model.to(device)
-    model.eval()
-    
-    encodings = tokenizer(texts, truncation=True, padding=True, max_length=128, return_tensors='pt')
-    encodings = {k: v.to(device) for k, v in encodings.items()}
-    
-    with torch.no_grad():
-        outputs = model(**encodings)
-    
-    predictions = torch.argmax(outputs, dim=1).cpu().numpy()
-    return predictions
-
-def plot_confusion_matrix(cm, class_names):
-    """Plot confusion matrix"""
-    plt.figure(figsize=(10, 7))
-    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
-    plt.xlabel('Predicted')
-    plt.ylabel('True')
-    plt.title('Confusion Matrix')
-    plt.savefig('confusion_matrix.png')
-    plt.close()
-
-def main():
-    model_path = './saved_model'
-    test_data_path = 'test_data.csv'
-    class_names = ['anti', 'neutral', 'pro']
-    
-    # Load the model and tokenizer
-    print("Loading model and tokenizer...")
-    model = load_model(model_path)
-    tokenizer = AutoTokenizer.from_pretrained(model_path)
-    
-    # Load test data
-    print("Loading test data...")
-    test_df = pd.read_csv(test_data_path)
-    
-    # Make predictions
-    print("Making predictions...")
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    predictions = predict(model, tokenizer, test_df['message'].tolist(), device)
-    
-    # Evaluate predictions
-    print("Evaluating predictions...")
-    true_labels = test_df['sentiment'].values
-    accuracy = accuracy_score(true_labels, predictions)
-    print(f"Accuracy: {accuracy:.4f}")
-    
-    print("\nClassification Report:")
-    report = classification_report(true_labels, predictions, target_names=class_names)
-    print(report)
-    
-    # Create and save confusion matrix
-    cm = confusion_matrix(true_labels, predictions)
-    plot_confusion_matrix(cm, class_names)
-    print("Confusion matrix saved as 'confusion_matrix.png'")
-    
-    # Interactive prediction mode
-    print("\nEnter text to classify (or type 'exit' to quit):")
-    while True:
-        text = input("> ")
-        if text.lower() == 'exit':
-            break
-        
-        pred = predict(model, tokenizer, [text], device)[0]
-        sentiment = class_names[pred]
-        print(f"Predicted sentiment: {sentiment}")
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/models/BERT/run_training.py b/models/BERT/run_training.py
deleted file mode 100644
index 8ee4a05..0000000
--- a/models/BERT/run_training.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import os
-import sys
-
-# Bypass transformers version check
-os.environ['TRANSFORMERS_SKIP_DEPENDENCY_CHECK'] = '1'
-
-# Import the training script and run it
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-from NLP_climate_analysis.models.BERT.train import main
-
-# Run the main function
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index dabecae..337f9cf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,6 @@ torch>=1.7.0
 transformers>=4.5.0
 scikit-learn>=0.24.0
 matplotlib>=3.3.0
-seaborn>=0.11.0
\ No newline at end of file
+seaborn>=0.11.0
+ accelerate>=0.26.0
+ huggingface_hub[hf_xet]
\ No newline at end of file