|
20 | 20 | from typing import List |
21 | 21 | from typing import NamedTuple |
22 | 22 | from urllib.parse import quote_plus |
23 | | -from urllib.parse import urljoin |
| 23 | +from urllib.parse import urlparse |
| 24 | +from urllib.parse import urlunparse |
24 | 25 |
|
25 | 26 | import attr |
26 | 27 | import packageurl |
@@ -1595,29 +1596,34 @@ def fetch_links( |
1595 | 1596 | url, _, _sha256 = anchor_tag["href"].partition("#sha256=") |
1596 | 1597 | if "data-requires-python" in anchor_tag.attrs: |
1597 | 1598 | python_requires = anchor_tag.attrs["data-requires-python"] |
1598 | | - url = resolve_relative_url(package_url, url) # Resolve relative URL |
| 1599 | + # Resolve relative URL |
| 1600 | + url = resolve_relative_url(package_url, url) |
1599 | 1601 | links.append(Link(url=url, python_requires=python_requires)) |
1600 | 1602 | # TODO: keep sha256 |
1601 | 1603 | return links |
1602 | 1604 |
|
1603 | 1605 |
|
1604 | 1606 | def resolve_relative_url(package_url, url): |
1605 | 1607 | """ |
1606 | | - Resolve a relative URL using the package URL. |
| 1608 | + Return the resolved `url` URLstring given a `package_url` base URL string |
| 1609 | + of a package. |
1607 | 1610 |
|
1608 | | - Args: |
1609 | | - package_url (str): The base URL of the package. |
1610 | | - url (str): The URL to be resolved. |
1611 | | -
|
1612 | | - Returns: |
1613 | | - str: The resolved URL. |
1614 | | - Examples: |
1615 | | - >>> resolve_relative_url("https://example.com/package", "../path/file.txt") |
1616 | | - 'https://example.com/path/file.txt' |
| 1611 | + For example: |
| 1612 | + >>> resolve_relative_url("https://example.com/package", "../path/file.txt") |
| 1613 | + 'https://example.com/path/file.txt' |
1617 | 1614 | """ |
1618 | 1615 | if not url.startswith(("http://", "https://")): |
1619 | | - base_url = "/".join(package_url.split("/")[:-1]) # Extract base URL |
1620 | | - url = urljoin(base_url, url) # Resolve relative URL |
| 1616 | + base_url_parts = urlparse(package_url) |
| 1617 | + url_parts = urlparse(url) |
| 1618 | + # If the relative URL starts with '..', remove the last directory from the base URL |
| 1619 | + if url_parts.path.startswith(".."): |
| 1620 | + path = base_url_parts.path.rstrip("/").rsplit("/", 1)[0] + url_parts.path[2:] |
| 1621 | + else: |
| 1622 | + path = urlunparse( |
| 1623 | + ("", "", url_parts.path, url_parts.params, url_parts.query, url_parts.fragment) |
| 1624 | + ) |
| 1625 | + resolved_url_parts = base_url_parts._replace(path=path) |
| 1626 | + url = urlunparse(resolved_url_parts) |
1621 | 1627 | return url |
1622 | 1628 |
|
1623 | 1629 |
|
|
0 commit comments