Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,7 @@ venv.bak/

# mypy
.mypy_cache/

Data/
Data/anonymized
anonymized/
375 changes: 375 additions & 0 deletions __init__.ipynb

Large diffs are not rendered by default.

44 changes: 30 additions & 14 deletions notebooks/parse_slack_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -12,7 +12,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -33,17 +33,32 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 25,
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "[Errno 2] No such file or directory: 'Data/anonymized\\\\channels.json'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32mc:\\Users\\ezra2\\OneDrive\\Documents\\10Acadmy\\week0_starter_network_analysis\\notebooks\\parse_slack_data.ipynb Cell 3\u001b[0m line \u001b[0;36m6\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/ezra2/OneDrive/Documents/10Acadmy/week0_starter_network_analysis/notebooks/parse_slack_data.ipynb#W2sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m \u001b[39m# if rpath not in sys.path:\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/ezra2/OneDrive/Documents/10Acadmy/week0_starter_network_analysis/notebooks/parse_slack_data.ipynb#W2sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m sys\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39minsert(\u001b[39m0\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mC:\u001b[39m\u001b[39m\\\\\u001b[39;00m\u001b[39mUsers\u001b[39m\u001b[39m\\\\\u001b[39;00m\u001b[39mezra2\u001b[39m\u001b[39m\\\\\u001b[39;00m\u001b[39mOneDrive\u001b[39m\u001b[39m\\\\\u001b[39;00m\u001b[39mDocuments\u001b[39m\u001b[39m\\\\\u001b[39;00m\u001b[39m10Acadmy\u001b[39m\u001b[39m\\\\\u001b[39;00m\u001b[39mweek0_starter_network_analysis\u001b[39m\u001b[39m\\\\\u001b[39;00m\u001b[39msrc\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/ezra2/OneDrive/Documents/10Acadmy/week0_starter_network_analysis/notebooks/parse_slack_data.ipynb#W2sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39msrc\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mloader\u001b[39;00m \u001b[39mimport\u001b[39;00m SlackDataLoader\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/ezra2/OneDrive/Documents/10Acadmy/week0_starter_network_analysis/notebooks/parse_slack_data.ipynb#W2sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m \u001b[39m# import src.utils as utils\u001b[39;00m\n",
"File \u001b[1;32mc:\\Users\\ezra2\\OneDrive\\Documents\\10Acadmy\\week0_starter_network_analysis\\src\\__init__.py:3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mloader\u001b[39;00m \u001b[39mimport\u001b[39;00m SlackDataLoader \n\u001b[0;32m 2\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mjson\u001b[39;00m\n\u001b[1;32m----> 3\u001b[0m data_loader \u001b[39m=\u001b[39m SlackDataLoader(\u001b[39m'\u001b[39;49m\u001b[39mData/anonymized\u001b[39;49m\u001b[39m'\u001b[39;49m)\n",
"File \u001b[1;32m~\\OneDrive\\Documents\\10Acadmy\\week0_starter_network_analysis\\src\\loader.py:38\u001b[0m, in \u001b[0;36mSlackDataLoader.__init__\u001b[1;34m(self, path)\u001b[0m\n\u001b[0;32m 33\u001b[0m \u001b[39m\u001b[39m\u001b[39m'''\u001b[39;00m\n\u001b[0;32m 34\u001b[0m \u001b[39mpath: path to the slack exported data folder\u001b[39;00m\n\u001b[0;32m 35\u001b[0m \u001b[39m'''\u001b[39;00m\n\u001b[0;32m 37\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mpath \u001b[39m=\u001b[39m path\n\u001b[1;32m---> 38\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mchannels \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mget_channels()\n\u001b[0;32m 39\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39musers \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_users()\n",
"File \u001b[1;32m~\\OneDrive\\Documents\\10Acadmy\\week0_starter_network_analysis\\src\\loader.py:67\u001b[0m, in \u001b[0;36mSlackDataLoader.get_channels\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 61\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mget_channels\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[0;32m 62\u001b[0m \u001b[39m \u001b[39m\u001b[39m'''\u001b[39;00m\n\u001b[0;32m 63\u001b[0m \u001b[39m write a function to get all the channels from the json file\u001b[39;00m\n\u001b[0;32m 64\u001b[0m \u001b[39m '''\u001b[39;00m\n\u001b[1;32m---> 67\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39;49m(os\u001b[39m.\u001b[39;49mpath\u001b[39m.\u001b[39;49mjoin(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mpath, \u001b[39m'\u001b[39;49m\u001b[39mchannels.json\u001b[39;49m\u001b[39m'\u001b[39;49m), \u001b[39m'\u001b[39;49m\u001b[39mr\u001b[39;49m\u001b[39m'\u001b[39;49m) \u001b[39mas\u001b[39;00m f:\n\u001b[0;32m 68\u001b[0m channels \u001b[39m=\u001b[39m json\u001b[39m.\u001b[39mload(f)\n\u001b[0;32m 70\u001b[0m \u001b[39mreturn\u001b[39;00m channels\n",
"\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'Data/anonymized\\\\channels.json'"
]
}
],
"source": [
"# Add parent directory to path to import modules from src\n",
"rpath = os.path.abspath('..')\n",
"if rpath not in sys.path:\n",
" sys.path.insert(0, rpath)\n",
"# if rpath not in sys.path:\n",
"sys.path.insert(0, 'C:\\\\Users\\\\ezra2\\\\OneDrive\\\\Documents\\\\10Acadmy\\\\week0_starter_network_analysis\\\\src')\n",
"\n",
"from src.loader import SlackDataLoader\n",
"import src.utils as utils"
"# import src.utils as utils"
]
},
{
Expand Down Expand Up @@ -95,7 +110,8 @@
" \"\"\"\n",
"\n",
" # specify path to get json files\n",
" combined = []\n",
" combined = []#sa\n",
" \n",
" for json_file in glob.glob(f\"{path_channel}*.json\"):\n",
" with open(json_file, 'r', encoding=\"utf8\") as slack_data:\n",
" combined.append(slack_data)\n",
Expand Down Expand Up @@ -359,7 +375,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -368,7 +384,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -377,7 +393,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -386,7 +402,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -395,7 +411,7 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -444,7 +460,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
"version": "3.11.4"
}
},
"nbformat": 4,
Expand Down
Empty file removed src/__init__.py
Empty file.
58 changes: 58 additions & 0 deletions src/__init__1.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "[Errno 2] No such file or directory: 'Data/anonymized\\\\channels.json'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32mc:\\Users\\ezra2\\OneDrive\\Documents\\10Acadmy\\week0_starter_network_analysis\\src\\__init__.ipynb Cell 1\u001b[0m line \u001b[0;36m9\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/ezra2/OneDrive/Documents/10Acadmy/week0_starter_network_analysis/src/__init__.ipynb#W4sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m sys\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39minsert(\u001b[39m1\u001b[39m,\u001b[39m\"\u001b[39m\u001b[39mC:/Users/ezra2/OneDrive/Documents/10Acadmy/week0_starter_network_analysis/src\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/ezra2/OneDrive/Documents/10Acadmy/week0_starter_network_analysis/src/__init__.ipynb#W4sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39msrc\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mloader\u001b[39;00m \u001b[39mimport\u001b[39;00m SlackDataLoader\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/ezra2/OneDrive/Documents/10Acadmy/week0_starter_network_analysis/src/__init__.ipynb#W4sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m n\u001b[39m=\u001b[39mSlackDataLoader(\u001b[39m\"\u001b[39;49m\u001b[39mData/anonymized\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n",
"File \u001b[1;32mC:\\Users/ezra2/OneDrive/Documents/10Acadmy/week0_starter_network_analysis\\src\\loader.py:38\u001b[0m, in \u001b[0;36mSlackDataLoader.__init__\u001b[1;34m(self, path)\u001b[0m\n\u001b[0;32m 33\u001b[0m \u001b[39m\u001b[39m\u001b[39m'''\u001b[39;00m\n\u001b[0;32m 34\u001b[0m \u001b[39mpath: path to the slack exported data folder\u001b[39;00m\n\u001b[0;32m 35\u001b[0m \u001b[39m'''\u001b[39;00m\n\u001b[0;32m 37\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mpath \u001b[39m=\u001b[39m path\n\u001b[1;32m---> 38\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mchannels \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mget_channels()\n\u001b[0;32m 39\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39musers \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_users()\n",
"File \u001b[1;32mC:\\Users/ezra2/OneDrive/Documents/10Acadmy/week0_starter_network_analysis\\src\\loader.py:67\u001b[0m, in \u001b[0;36mSlackDataLoader.get_channels\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 61\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mget_channels\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[0;32m 62\u001b[0m \u001b[39m \u001b[39m\u001b[39m'''\u001b[39;00m\n\u001b[0;32m 63\u001b[0m \u001b[39m write a function to get all the channels from the json file\u001b[39;00m\n\u001b[0;32m 64\u001b[0m \u001b[39m '''\u001b[39;00m\n\u001b[1;32m---> 67\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39;49m(os\u001b[39m.\u001b[39;49mpath\u001b[39m.\u001b[39;49mjoin(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mpath, \u001b[39m'\u001b[39;49m\u001b[39mchannels.json\u001b[39;49m\u001b[39m'\u001b[39;49m), \u001b[39m'\u001b[39;49m\u001b[39mr\u001b[39;49m\u001b[39m'\u001b[39;49m) \u001b[39mas\u001b[39;00m f:\n\u001b[0;32m 68\u001b[0m channels \u001b[39m=\u001b[39m json\u001b[39m.\u001b[39mload(f)\n\u001b[0;32m 70\u001b[0m \u001b[39mreturn\u001b[39;00m channels\n",
"\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'Data/anonymized\\\\channels.json'"
]
}
],
"source": [
"import sys\n",
"sys.path.insert(1,\"C:/Users/ezra2/OneDrive/Documents/10Acadmy/week0_starter_network_analysis\")\n",
"\n",
"\n",
"\n",
"from src.loader import SlackDataLoader\n",
"\n",
"n=SlackDataLoader(\"Data/anonymized\")\n",
"\n",
" \n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
41 changes: 37 additions & 4 deletions src/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# Create wrapper classes for using slack_sdk in place of slacker
class SlackDataLoader:
'''
n=SlackDataLoader('Data/anonymized')
Slack exported data IO class.

When you open slack exported ZIP file, each channel or direct message
Expand All @@ -32,10 +33,21 @@ def __init__(self, path):
'''
path: path to the slack exported data folder
'''

self.path = path
self.channels = self.get_channels()
self.users = self.get_ussers()

self.users = self.get_users()
def get_channel_sub_files(self):
channels_files={}
BS=r"\""[0]
for (current_folder,folders_in_current_folder,files_in_current_folder) in os.walk(self.path):
if(len(folders_in_current_folder) == 0):
current_channel_name=str(current_folder).split(BS)[1]
channels_files[current_channel_name]=files_in_current_folder
return json.dumps(channels_files)




def get_users(self):
'''
Expand All @@ -50,18 +62,39 @@ def get_channels(self):
'''
write a function to get all the channels from the json file
'''


with open(os.path.join(self.path, 'channels.json'), 'r') as f:
channels = json.load(f)

return channels

def get_channel_sub_file(self,channel_name):
channel_sub_files=json.loads(self.get_channel_sub_files())
return channel_sub_files[channel_name]
def get_channel_messages_by_day(self,current_day_path):
with open(current_day_path,'r') as f:
message=json.load(f)
return message
def get_channel_messages(self, channel_name):
channel_path=os.path.join(self.path,channel_name)
'''
write a function to get all the messages from a channel

'''
message={}
daily_channel_messages_file=self.get_channel_sub_file(channel_name)
for current_day in daily_channel_messages_file:
current_day_path=os.path.join(channel_path,current_day)
message[current_day]=(self.get_channel_messages_by_day(current_day_path))
#to see if the json object is correct write into file
# with open("sample.json", "w") as outfile:
# outfile.write(json.dumps(message))
return json.dumps(message)




#

def get_user_map(self):
'''
write a function to get a map between user id and user name
Expand Down
39 changes: 39 additions & 0 deletions src/new.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from loader import SlackDataLoader
import json
data_loader = SlackDataLoader('Data/anonymized')

# print(data_loader.get_channel_sub_files())
(data_loader.get_channel_messages("week-11-group4"))







# # slack_data = data_loader.get_users()
# cha=data_loader.get_channels()[0]['name']
# data_loader.get_channel_messages(cha)
# import os
# import json
# folder_path="Data/anonymized"
# # print(folder_path)

# channels_files={}
# for (current_folder,folders_in_current_folder,files_in_current_folder) in os.walk(folder_path):
# if(len(folders_in_current_folder) == 0):
# BS=r"\""[0]
# # print("=======================================")
# # print(f'*****current_folder {str(current_folder).split(BS)[1]}')
# # print(f'*****folders_in_current_folder {str(folders_in_current_folder)}')
# # print(f'*****files_in_current_folder {str(files_in_current_folder)}')
# # print("=======================================")
# current_channel=str(current_folder).split(BS)[1]
# # channels_files[current_channel]={}
# channels_files[current_channel]=files_in_current_folder
# # print(current_channel)


# # print(json.loads(channels_files))
# obj = json.dumps(channels_files)
# print(obj)