diff --git a/libs/community/langchain_community/document_loaders/__init__.py b/libs/community/langchain_community/document_loaders/__init__.py index c91345da..08fd01a0 100644 --- a/libs/community/langchain_community/document_loaders/__init__.py +++ b/libs/community/langchain_community/document_loaders/__init__.py @@ -205,6 +205,9 @@ from langchain_community.document_loaders.gcs_file import ( GCSFileLoader, ) + from langchain_community.document_loaders.genius import ( + GeniusLoader, + ) from langchain_community.document_loaders.geodataframe import ( GeoDataFrameLoader, ) @@ -814,6 +817,7 @@ def __getattr__(name: str) -> Any: "GlueCatalogLoader", "GCSFileLoader", "GeoDataFrameLoader", + "GeniusLoader", "GitHubIssuesLoader", "GitLoader", "GitbookLoader", diff --git a/libs/community/langchain_community/document_loaders/genius.py b/libs/community/langchain_community/document_loaders/genius.py new file mode 100644 index 00000000..45f90c36 --- /dev/null +++ b/libs/community/langchain_community/document_loaders/genius.py @@ -0,0 +1,55 @@ +from typing import Iterator + +from langchain_core.document_loaders import BaseLoader +from langchain_core.documents import Document + + +class GeniusLoader(BaseLoader): + """Load lyrics using the Genius API. + + This loader utilizes the `lyricsgenius` Python package to fetch song lyrics + and metadata. You need a Genius API token, which can be generated at + https://genius.com/api-clients. + """ + + def __init__(self, search_query: str, api_token: str = None): + """Initialize with search query and API token. + + Args: + search_query: The search query (e.g., "Imagine Dragons - Radioactive"). + api_token: Genius API Token. If not provided, looks for GENIUS_ACCESS_TOKEN env var. + """ + self.search_query = search_query + self.api_token = api_token + + def lazy_load(self) -> Iterator[Document]: + """Load lyrics and metadata.""" + try: + import lyricsgenius + except ImportError: + raise ImportError( + "lyricsgenius package not found, please install it with " + "`pip install lyricsgenius`" + ) + + # Initialize Genius client + genius = lyricsgenius.Genius(self.api_token) + + # Search for the song (we use the first best match) + song = genius.search_song(self.search_query) + + # If no song is found, yield nothing (empty iterator) + if not song: + return + + # Create the LangChain Document + metadata = { + "source": "Genius", + "title": song.title, + "artist": song.artist, + "album": song.album, + "url": song.url, + "id": song.id, + } + + yield Document(page_content=song.lyrics, metadata=metadata) diff --git a/test_run.py b/test_run.py new file mode 100644 index 00000000..ab673cc1 --- /dev/null +++ b/test_run.py @@ -0,0 +1,36 @@ +# We try to import it from the top-level package +# If this works, it means your __init__.py edit was successful +try: + from langchain_community.document_loaders import GeniusLoader + + print("✅ STEP 1 PASSED: GeniusLoader was imported successfully!") +except ImportError as e: + print("❌ STEP 1 FAILED: Could not import GeniusLoader.") + print(e) + exit() + +# Now we try to use it +# Note: If you don't have a real token, we expect an error, but NOT a crash. +try: + print("attempting to initialize loader...") + # Replace 'fake_token' with a real one if you want actual lyrics + loader = GeniusLoader("Taylor Swift", api_token="fake_token") + print("✅ STEP 2 PASSED: Loader initialized!") + + print("Attempting to load data...") + docs = list(loader.lazy_load()) + + if docs: + print(f"✅ STEP 3 PASSED: Found song: {docs[0].metadata['title']}") + else: + print("⚠️ STEP 3: No docs found (Expected if token is fake).") + +except Exception as e: + # If it fails because of the API token, that is actually GOOD. + # It means your code ran and tried to hit the API. + if "401" in str(e) or "403" in str(e) or "Token" in str(e): + print( + "✅ STEP 3 PASSED: Code ran! (API rejected the fake token, which is expected)." + ) + else: + print(f"❌ STEP 3 FAILED with unexpected error: {e}")