diff --git a/ricecooker/utils/downloader.py b/ricecooker/utils/downloader.py index e524e991..3ccf4365 100644 --- a/ricecooker/utils/downloader.py +++ b/ricecooker/utils/downloader.py @@ -389,8 +389,12 @@ def download_assets( # if we're really stuck, just default to HTML as that is most likely if this is a redirect. if not ext: ext = ".html" - subpath = os.path.dirname(filename) - filename = "index{}".format(ext) + + subpath = filename + # Add the existing filename in front of index.xxx, this can contain slashes and those will result + # in subdirectories created in the downloaded version. This ensures multiple instances of extensionless + # resources referenced from a page won't clobber each other. + filename = filename + "/index{}".format(ext) os.makedirs(os.path.join(destination, subpath), exist_ok=True) @@ -428,7 +432,7 @@ def js_content_middleware(content, url, **kwargs): return content def css_node_filter(node): - if "rel" in node: + if "rel" in node.attrs: return "stylesheet" in node["rel"] return node["href"].split("?")[0].strip().endswith(".css") diff --git a/tests/test_downloader.py b/tests/test_downloader.py index 911e08a6..af506322 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -1,10 +1,51 @@ import os import unittest +from datetime import datetime -from ricecooker.utils import downloader - +import pytest +from pathlib import Path +from ricecooker.utils import downloader +import http.server +from threading import Thread +import shutil + +PORT = 8181 + +# if any changes are needed in the files served out of the "samples" folder, you need to delete the .webcache folder +# that is generated at the project root for the tests to be rerun +@pytest.fixture(scope="module") +def http_local_server(): + # Get the directory containing the current file + current_file_directory = Path(__file__).resolve().parent + print(f"Directory of the current file: {current_file_directory}") + + test_content_path = str(current_file_directory / "testcontent") + + class Handler(http.server.SimpleHTTPRequestHandler): + def __init__(self, *args, **kwargs): + super().__init__(*args, directory=test_content_path, **kwargs) + + def spawn_http_server(arg): + with http.server.HTTPServer(("", PORT), Handler) as httpd: + # this is a behavior to treat extensionless files as CSS is used by + # the test test_pretextbook_css_fetch below, see the docs on that test + # method for more info + Handler.extensions_map = {'': 'text/css'} + print("serving at port", PORT) + try: + httpd.serve_forever() + except: + httpd.server_close() + + server_spawning_thread = Thread(target=spawn_http_server, args=(10,)) + server_spawning_thread.daemon = True + server_spawning_thread.start() + return server_spawning_thread + +@pytest.mark.usefixtures("http_local_server") class TestArchiver(unittest.TestCase): + def test_get_archive_filename_absolute(self): link = "https://learningequality.org/kolibri.png" @@ -70,3 +111,36 @@ def test_archive_path_as_relative_url(self): link_filename, page_filename ) assert rel_path == "../kolibri_1.2.3.png" + + # If any changes are needed in the files served out of the "samples" folder, you need to delete the .webcache folder + # that is generated at the project root for the tests to be rerun + # + # This test relies on behavior in the embedded http server declared above as a class level fixture + # to treat any extensionless file as having a mime type of text/css. + # Handler.extensions_map = {'': 'text/css'} + # if another test needs different behavior this may need to be customized or have the fixture be scoped to this test + # but for now it seems useful to share the resource between this and other future tests. + def test_pretextbook_css_fetch(self): + sushi_url = "http://localhost:" + str(PORT) + "/samples/PreTeXt_book_test/activecalculus.org/single2e/sec-5-2-FTC2.html" + dest_dir = "active_calc_2e_again_" + datetime.now().strftime("%Y-%m-%d_%H:%M:%S") + current_file_dir = Path(__file__).resolve().parent + downloads_dir = current_file_dir.parent / "downloads" + try: + archive = downloader.ArchiveDownloader("downloads/" + dest_dir) + archive.get_page(sushi_url) + + book_dest_dir = downloads_dir / dest_dir / "localhost:8181" / "samples" / "PreTeXt_book_test" + with open(book_dest_dir / "activecalculus.org" / "single2e" / "sec-5-2-FTC2.html", 'r') as file: + page_html = file.read() + assert "link href=\"../../fonts.googleapis.com/css2_family_Material+Symbols+Outlined:opsz,wght,FILL,GRAD@24,400,0,0/index.css" in page_html + + with open(book_dest_dir / "fonts.googleapis.com" / "css2_family_Material+Symbols+Outlined:opsz,wght,FILL,GRAD@24,400,0,0" / "index.css", 'r') as file: + css_file_contents = file.read() + # this has an extra '..' compared to what is in the original extensionless css file, because in the course of generating an index.css + # file to have clear extensions in the archived version the file ends up nested down another level + assert "src: url(\"../../fonts.gstatic.com/s/materialsymbolsoutlined" in css_file_contents + + font_size = os.path.getsize(book_dest_dir / "fonts.gstatic.com" / "s" / "materialsymbolsoutlined" / "v290" / "material_symbols.woff") + assert font_size > 0 + finally: + shutil.rmtree(downloads_dir / dest_dir) diff --git a/tests/testcontent/samples/PreTeXt_book_test/activecalculus.org/single2e/sec-5-2-FTC2.html b/tests/testcontent/samples/PreTeXt_book_test/activecalculus.org/single2e/sec-5-2-FTC2.html new file mode 100644 index 00000000..0e2d90d5 --- /dev/null +++ b/tests/testcontent/samples/PreTeXt_book_test/activecalculus.org/single2e/sec-5-2-FTC2.html @@ -0,0 +1,22 @@ + + + + + + + + + + + AC The Second Fundamental Theorem of Calculus + + + + + + + + + diff --git a/tests/testcontent/samples/PreTeXt_book_test/fonts.googleapis.com/css2_family_Material+Symbols+Outlined:opsz,wght,FILL,GRAD@24,400,0,0 b/tests/testcontent/samples/PreTeXt_book_test/fonts.googleapis.com/css2_family_Material+Symbols+Outlined:opsz,wght,FILL,GRAD@24,400,0,0 new file mode 100644 index 00000000..00d15183 --- /dev/null +++ b/tests/testcontent/samples/PreTeXt_book_test/fonts.googleapis.com/css2_family_Material+Symbols+Outlined:opsz,wght,FILL,GRAD@24,400,0,0 @@ -0,0 +1,21 @@ +@font-face { + font-family: 'Material Symbols Outlined'; + font-style: normal; + font-weight: 400; + src: url("../fonts.gstatic.com/s/materialsymbolsoutlined/v290/material_symbols.woff") format('woff'); +} + +.material-symbols-outlined { + font-family: 'Material Symbols Outlined'; + font-weight: normal; + font-style: normal; + font-size: 24px; + line-height: 1; + letter-spacing: normal; + text-transform: none; + display: inline-block; + white-space: nowrap; + word-wrap: normal; + direction: ltr; + -moz-font-feature-settings: 'liga'; +} diff --git a/tests/testcontent/samples/PreTeXt_book_test/fonts.gstatic.com/s/materialsymbolsoutlined/v290/material_symbols.woff b/tests/testcontent/samples/PreTeXt_book_test/fonts.gstatic.com/s/materialsymbolsoutlined/v290/material_symbols.woff new file mode 100644 index 00000000..1a1179f8 Binary files /dev/null and b/tests/testcontent/samples/PreTeXt_book_test/fonts.gstatic.com/s/materialsymbolsoutlined/v290/material_symbols.woff differ