forked from vzhou842/profanity-check
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patheval.sync.py
More file actions
49 lines (38 loc) · 1.24 KB
/
eval.sync.py
File metadata and controls
49 lines (38 loc) · 1.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import joblib
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score
# %%
data = pd.read_csv('./test_data.csv')
texts = data['text'].astype(str)
y = data['is_offensive']
# %%
vectorizer = joblib.load('vectorizer.joblib')
model = joblib.load('model.joblib')
# %%
eval_texts = ['Hello there, how are you',
'Lorem Ipsum is simply dummy text of the printing and typesetting industry.',
'!!!! Click this now!!! -> https://example.com',
'fuck you',
'fUcK u',
'GO TO hElL, you dirty scum']
tokens = vectorizer.encode(eval_texts)
model.predict(tokens)
# %%
df = pd.read_csv("user_inputs.csv", sep=",")
test_texts = df["translated"].astype(str)
tokens = vectorizer.encode(test_texts)
preds = model.predict(tokens)
def _get_profane_prob(prob):
return prob[1]
preds_probs = np.apply_along_axis(
_get_profane_prob, 1, model.predict_proba(vectorizer.encode(test_texts)))
true_pos = df["translated"][preds == 1]
print(true_pos)
print(preds_probs[preds == 1])
# %%
y_pred = model.predict(vectorizer.encode(texts))
# %%
conf_matrix = confusion_matrix(y, y_pred)
print(f"f1: {f1_score(y, y_pred)}")
print(f"acc: {accuracy_score(y, y_pred)}")