Conversation
|
`import random import urllib.request Define a list of common English stop words (AI generated)stop_words = { """East of Eden""" """The Great Gatsby""" Summary by counting the number of times a particular words appears in the text.def count_word_frequencies(text, remove_stop): How many times do the most popular words show up?eden_freq = count_word_frequencies(text1, remove_stop=True) def frequency(item): def top_words(word_freq, title, n=20): top_words(eden_freq, "East of Eden") """Movie Reviews""" from imdb import Cinemagoer# create an instance of the Cinemagoer classia = Cinemagoer()# search moviemovie1 = ia.search_movie("East of Eden")[0]print(movie1.movieID)# Get reviewsmovie1 = ia.get_movie('0048028', info=['reviews']) # Make sure to add the second argumentreviews1 = movie1.get('reviews', [])for review in reviews1:print(review['content'])print()# Get actorJames_Dean = ia.get_person('0000015')# Get actor's moviesfilmography = James_Dean.get('filmography', {})films_as_actor = filmography.get('actor', [])print(films_as_actor)"""The Great Gatsby"""# search moviemovie2 = ia.search_movie("The Great Gatsby")[0]print(movie2.movieID)# Get reviewsmovie2 = ia.get_movie('1343092', info=['reviews']) # Make sure to add the second argumentreviews2 = movie2.get('reviews', [])for review in reviews2:print(review['content'])print()# Get actorLeo_DiCap = ia.get_person('0000138')# Get actor's moviesfilmography = Leo_DiCap.get('filmography', {})films_as_actor = filmography.get('actor', [])print(films_as_actor)from nltk.sentiment.vader import SentimentIntensityAnalyzer sentence = 'Software Design is my favorite class because learning Python is so cool!' Output{'neg': 0.0, 'neu': 0.614, 'pos': 0.386, 'compound': 0.7417}from thefuzz import fuzz print(fuzz.ratio("this is a test", "this is a test!")) # 97 import numpy as np these are the similarities computed from the previous sectionS = np.asarray([[1., 0.90850572, 0.96451312, 0.97905034, 0.78340575], dissimilarity is 1 minus similaritydissimilarities = 1 - S compute the embeddingcoord = MDS(dissimilarity='precomputed').fit_transform(dissimilarities) plt.scatter(coord[:, 0], coord[:, 1]) Label the pointsfor i in range(coord.shape[0]): plt.show() """Request and Open API""" # Why is my API wrong? """Request and Open API"""from openai import OpenAIfrom dotenv import load_dotenvimport osload_dotenv()client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))# Remove this duplicate line:# client = OpenAI()# The correct method is completions.create or chat.completions.create# There is no "responses.create" method in the OpenAI APIresponse = client.chat.completions.create(model="gpt-4o",messages=[{"role": "user", "content": "Write a one-sentence bedtime story about a unicorn."}])# The response structure is differentprint(response.choices[0].message.content)"""Correlation with the two books""" chunks1 = chunk_text(text1) chunks = chunks1 + chunks2 # Total 10 chunks n = len(chunks) for i in range(n): dissimilarities = 1 - S MDS Embeddingcoord = MDS(dissimilarity='precomputed', random_state=42).fit_transform(dissimilarities) Plotplt.figure(figsize=(10, 6)) for i in range(coord.shape[0]): plt.title("MDS Plot of East of Eden vs The Great Gatsby using Fuzzy Similarity") Putting them on a list to pickledef main():book_files = ["Parts/East of Eden.txt","Parts/The Great Gatsby.txt"]histograms = []for book in book_files:hist = process_file(book, skip_header=True)histograms.append(hist)print(f"Processed '{book}':")print(f" Total words: {total_words(hist)}")print(f" Unique words: {different_words(hist)}\n")# Example: most common words in each bookfor i, hist in enumerate(histograms):print(f"Most common words in Book {i+1}:")top_words = most_common(hist, excluding_stopwords=True)for freq, word in top_words[:10]:print(f"{word}\t{freq}")print("\n")# Pickleimport pickle# Assuming you already read the texts from fileswith open('Parts/East of Eden', 'r', encoding='utf-8') as f1:east_of_eden_text = f1.read()with open('Parts/The Great Gatsby', 'r', encoding='utf-8') as f2:great_gatsby_text = f2.read()# Combine both into a dictionarybooks = {"East of Eden": east_of_eden_text,"The Great Gatsby": great_gatsby_text}# Save data to a pickle filewith open('books_texts.pkl', 'wb') as f:pickle.dump(books, f)# Load data from the pickle file laterwith open('books_texts.pkl', 'rb') as f:reloaded_books = pickle.load(f)` |
No description provided.