Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
Binary file added submissions/.DS_Store
Binary file not shown.
Binary file added submissions/Arthur Armaing/.DS_Store
Binary file not shown.
12 changes: 12 additions & 0 deletions submissions/Arthur Armaing/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
EmailToCalendar:

Takes in the last n emails, and scans each email for possible events to add to your calendar. If it detects events, it will connect to your google calendar and add them to it, giving time and location for each event as well as a short description in the title.
This is all provided that you have proper credentials to enter your google calendar and a Cloud project.

HENCE, for the sake of running the project, I will be leaving my calendar credentials to this project. THAT SAID, PLEASE DO NOT ABUSE THE CREDENTIALS in ANY WAY to modify, delete, or send calendar events or emails.

The project can be run by running main.py. The default is set to 2 email, but can be changed to more if needed. In the example I will be providing, I will be using 1 email, containing a general body meeting on tuesday at 6:30pm.

Please install the dependencies such as the transformers library, Python, pyTorch, and the provided libraries imported in each file.

The project can be expanded to schedule the task of running the script, and with accuracy of the LLM, both of which I intend to do on my free time.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
101 changes: 101 additions & 0 deletions submissions/Arthur Armaing/calendarTest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import datetime as dt
import os.path

from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import requests

SCOPES = ["https://www.googleapis.com/auth/calendar"]

def make_calendar_event(summary, location, start_time):
creds = None
if os.path.exists("token1.json"):
creds = Credentials.from_authorized_user_file("token1.json", SCOPES)

if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
"credentials.json", SCOPES
)
creds = flow.run_local_server(port=0)

# Save the credentials for the next run
with open("token1.json", "w") as token:
token.write(creds.to_json())

try:
service = build("calendar", "v3", credentials=creds)
event = {
"summary":summary,
"location": location,
"description": "",
"colorId": 9,
"start": {
"dateTime": start_time.isoformat(),
"timeZone": "America/New_York"
},
"end": {
"dateTime": (start_time + dt.timedelta(hours=1)).isoformat(), # 1 hour long
"timeZone": "America/New_York"
}
}
event = service.events().insert(calendarId="primary", body=event).execute()
print(f'Event created: {event.get("htmlLink")}')

except HttpError as error:
print(f"An error occurred: {error}")

'''
if __name__ == "__main__":
make_calendar_event( "Python-generated event", "online, duh", dt.datetime.now(dt.timezone(dt.timedelta(hours=-5))))
'''












'''
"recurrence": {
"frequency": "DAILY",
"interval": 1
},
"attendees": {
{"email": "arthur.armaing@gmail.com"}
}
'''


'''
# to list the last 5 events
now = dt.datetime.now(dt.timezone.utc).isoformat()

event_result = service.events().list(
calendarId="primary",
timeMin=now,
maxResults=5,
singleEvents=True,
orderBy="startTime"
).execute()
events = event_result.get("items", [])

if not events:
print("No upcoming events found")
return

for event in events:
start = event["start"].get("dateTime") # .get("dateTime", event["start"].get("date"))
print(start, " ", event["summary"])
print(type(start))
'''
26 changes: 26 additions & 0 deletions submissions/Arthur Armaing/classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from transformers import pipeline

def classify_text(text):
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
candidate_labels = ["Event", "Meeting", "Workshop", "Spam", "Advertisement", "Update"]
res = classifier(
text[:1000],
candidate_labels=candidate_labels
)
print(res['scores'])

max_score = max(res['scores'])
for i in range(len(res['scores'])):
if res['scores'][i] == max_score:
return candidate_labels[i]
return candidate_labels[0]


'''
message = ""
with open("passage.txt", "r") as file:
message = file.read()

print(classify_text(message))
'''

1 change: 1 addition & 0 deletions submissions/Arthur Armaing/credentials.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"installed":{"client_id":"719470134677-b8e2ujkgbfi8qsd500cmh6fno005vsqp.apps.googleusercontent.com","project_id":"mailtocalendar-471019","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"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","redirect_uris":["http://localhost"]}}
18 changes: 18 additions & 0 deletions submissions/Arthur Armaing/eviomni_prompt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
You are a highly skilled knowledge reasoner and extractor.
Your task is to carefully read the given question and passages to reason how the passages lead to the answer and extract relevant information that may be used to answer the question.

**Follow these steps:**

**In the <reason></reason> tag, perform the following steps:**
*Question Analysis: Analyze the question to understand the specific information they are seeking. Identify the key concepts, entities, and relationships involved.
*Passage Analysis: For each passage, carefully read and identify sentences or phrases that are useful for answering the given question.

**In the <extract></extract> tag, synthesize useful information from the passages into a coherent narrative. Organize the information logically and concisely.**

**In <answer></answer> tags, give a short answer to the given question, based on the passages, reasoning information, and extracted knowledge. If none of them work, please answer the question based on your knowledge.**

**Question:**
{question}

**Passages:**
{passages}
74 changes: 74 additions & 0 deletions submissions/Arthur Armaing/inferenceModel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import StoppingCriteria, StoppingCriteriaList
import re

class MultiTokenStoppingCriteria(StoppingCriteria):
def __init__(self, stop_ids, device):
self.stop_ids = stop_ids
self.stop_len = len(stop_ids)

def __call__(self, input_ids, scores, **kwargs):
if len(input_ids[0]) >= self.stop_len:
last_tokens = input_ids[0][-self.stop_len:].tolist()
return last_tokens == self.stop_ids
return False


def get_model_inference(event_message):
model_name = "HIT-TMG/EviOmni-nq_train-1.5B"
model = AutoModelForCausalLM.from_pretrained(
model_name,
dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

#formulate prompt and fill in stuff
prompt = open("eviomni_prompt", "r").read()
passages = event_message

question = ""
with open("prompt.txt", 'r', encoding='utf-8') as file:
question = file.read()

instruction = prompt.format(question=question, passages=passages)

messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": instruction}
]

stop_token = "</extract>\n\n"
stop_ids = tokenizer.encode(stop_token, add_special_tokens=False)

stopping_criteria = StoppingCriteriaList([
MultiTokenStoppingCriteria(stop_ids, model.device)
])

text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

generated_ids = model.generate(
**model_inputs,
max_new_tokens=512,
stopping_criteria=stopping_criteria
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
match = re.search(r"<extract>(.*?)</extract>", response, re.DOTALL)
evidence = match.group(1).strip()
return evidence


if __name__ == "__main__":
with open("passage.txt", "r") as file:
msg = file.read()
print(get_model_inference(msg))

88 changes: 88 additions & 0 deletions submissions/Arthur Armaing/mailReader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@

import os.path
import datetime as dt
import base64

from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

# If modifying these scopes, delete the file token.json.
SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]


def get_last_emails(num_emails):
creds = None

if os.path.exists("token.json"):
creds = Credentials.from_authorized_user_file("token.json", SCOPES)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
"credentials.json", SCOPES
)
creds = flow.run_local_server(port=0)

with open("token.json", "w") as token:
token.write(creds.to_json())

message_output = []
try:
now = dt.datetime.now(dt.timezone.utc).isoformat()
service = build("gmail", "v1", credentials=creds)
results = service.users().messages().list(
userId="me",
labelIds=["INBOX"],
maxResults=num_emails
).execute()
messages = results.get("messages", [])

if not messages:
print("No messages found.")
return

for msg in messages:
msg_data = service.users().messages().get(userId="me", id=msg["id"]).execute()

'''headers = msg_data["payload"]["headers"]
subject = next((h["value"] for h in headers if h["name"] == "Subject"), "No Subject")
sender = next((h["value"] for h in headers if h["name"] == "From"), "Unknown Sender")
date = next((h["value"] for h in headers if h["name"] == "Date"), "Unknown Date")'''

# Try to extract body text
body = ""
if "data" in msg_data["payload"]["body"]: # Simple email
body = msg_data["payload"]["body"]["data"]
elif "parts" in msg_data["payload"]: # Multipart email
for part in msg_data["payload"]["parts"]:
if part["mimeType"] == "text/plain" and "data" in part["body"]:
body = part["body"]["data"]
break
if body:
body = base64.urlsafe_b64decode(body).decode("utf-8", errors="ignore")
message_output.append(body[:700])
else:
body = "[No text body found]"

'''print(f"From: {sender}")
print(f"Subject: {subject}")
print(f"Date: {date}")
print(f"Body:\n{body[:500]}")
print("-" * 40)'''

except HttpError as error:
print(f"An error occurred: {error}")

return message_output


'''
if __name__ == "__main__":
out = get_last_5_emails()
print(out)
'''
43 changes: 43 additions & 0 deletions submissions/Arthur Armaing/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from mailReader import get_last_emails
from calendarTest import make_calendar_event
from classifier import classify_text
from inferenceModel import get_model_inference, MultiTokenStoppingCriteria

from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import StoppingCriteria, StoppingCriteriaList
import re
import datetime as dt

def main():
emails = get_last_emails(1)

for email in emails:
if classify_text(email) in ["Meeting", "Event", "Workshop"]:
print("Event detected")
event_params = get_model_inference(email)
event_params = event_params.split(";")
# print(event_params)
if event_params[1] != "N/A":
# Pre-processing of inputs
equalsSign = event_params[1].find("=")
event_params[1] = event_params[1][equalsSign + 1:].strip()
event_params[1] = event_params[1].replace("/", " ").split(" ")
#print(event_params[1])
event_params[1] = dt.datetime(2025, int(event_params[1][0]), int(event_params[1][1]), int(event_params[1][2]), int(event_params[1][3]))

equalsSign = event_params[0].find("=")
event_params[0] = event_params[0][equalsSign + 1:].strip()

equalsSign = event_params[2].find("=")
event_params[2] = event_params[2][equalsSign + 1:].strip()

#print(event_params[0])
#print(event_params[1])
#print(event_params[2])

make_calendar_event(event_params[0], event_params[2], event_params[1])
print("Calendar event created")


if __name__ == "__main__":
main()
24 changes: 24 additions & 0 deletions submissions/Arthur Armaing/ml_files/added_tokens.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"</tool_call>": 151658,
"<tool_call>": 151657,
"<|box_end|>": 151649,
"<|box_start|>": 151648,
"<|endoftext|>": 151643,
"<|file_sep|>": 151664,
"<|fim_middle|>": 151660,
"<|fim_pad|>": 151662,
"<|fim_prefix|>": 151659,
"<|fim_suffix|>": 151661,
"<|im_end|>": 151645,
"<|im_start|>": 151644,
"<|image_pad|>": 151655,
"<|object_ref_end|>": 151647,
"<|object_ref_start|>": 151646,
"<|quad_end|>": 151651,
"<|quad_start|>": 151650,
"<|repo_name|>": 151663,
"<|video_pad|>": 151656,
"<|vision_end|>": 151653,
"<|vision_pad|>": 151654,
"<|vision_start|>": 151652
}
Loading