-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHelper.py
More file actions
88 lines (65 loc) · 2.93 KB
/
Helper.py
File metadata and controls
88 lines (65 loc) · 2.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from urlextract import URLExtract
from wordcloud import WordCloud
import pandas as pd
from collections import Counter
import emoji
import nltk
from nltk.corpus import stopwords
stop_words=[]
nltk.download('stopwords')
stop_words = stopwords.words('english')
extractor=URLExtract()
def fetch_stat(selected_user,df):
if selected_user != "Overall":
df= df[df["user"].str.contains(selected_user, case=False, na=False)]
num_messages=df.shape[0]
words=[]
for message in df["message"]:
words.extend(message.split())
number_of_media=df[df["message"]== "<Media omitted>\n"].shape[0]
links=[]
for message in df["message"]:
links.extend(extractor.find_urls(message))
return len(words),num_messages,number_of_media,len(links)
def fetch_most_busy(df):
x=df["user"].value_counts().head(5)
per=round((df["user"].value_counts()/df.shape[0])*100).reset_index().rename(columns={"user":"name","count":"percent"})
per.index= range(1, len(per) + 1)
return x,per
def create_word_cloud(selected_user,df):
if selected_user != "Overall":
df= df[df["user"].str.contains(selected_user, case=False, na=False)]
new_df=df[~df["user"].str.contains("group_notification", case=False, na=False)]
new_df=new_df[~new_df["user"].str.contains("WhatsApp_Notification", case=False, na=False)]
new_df=new_df[~new_df["message"].str.contains("<Media omitted>", case=False, na=False)]
def remove_stop_w(message):
y=[]
for word in message.lower().split():
if word not in stop_words:
y.append(word)
return " ".join(y)
wc=WordCloud(width=500,height=500,min_font_size=10,background_color="white")
new_df["message"] = new_df["message"].apply(remove_stop_w)
df_Wc = wc.generate(new_df["message"].str.cat(sep=" "))
return df_Wc
def most_c_w(selected_users,df):
if selected_users != "Overall":
df= df[df["user"].str.contains(selected_users, case=False, na=False)]
new_df=df[~df["user"].str.contains("group_notification", case=False, na=False)]
new_df=new_df[~new_df["user"].str.contains("WhatsApp_Notification", case=False, na=False)]
new_df=new_df[~new_df["message"].str.contains("<Media omitted>", case=False, na=False)]
words=[]
for message in new_df["message"]:
for word in message.lower().split():
if word not in stop_words:
words.append(word)
most_common_df=pd.DataFrame(Counter(words).most_common(20))
return most_common_df
def emoji_helper(selected_user,df):
if selected_user != "Overall":
df= df[df["user"].str.contains(selected_user, case=False, na=False)]
emojis=[]
for message in df["message"]:
emojis.extend([ c for c in message if c in emoji.EMOJI_DATA])
emoji_df=pd.DataFrame(Counter(emojis).most_common(len(Counter(emojis))))
return emoji_df