Skip to content

Commit 6b3b3a5

Browse files
author
mathioud
committed
change logic to urlparse & update doctests
Signed-off-by: mathioud <georgios.mathioudakis@here.com>
1 parent 7d065b0 commit 6b3b3a5

File tree

1 file changed

+20
-14
lines changed

1 file changed

+20
-14
lines changed

src/python_inspector/utils_pypi.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
from typing import List
2121
from typing import NamedTuple
2222
from urllib.parse import quote_plus
23-
from urllib.parse import urljoin
23+
from urllib.parse import urlparse
24+
from urllib.parse import urlunparse
2425

2526
import attr
2627
import packageurl
@@ -1595,29 +1596,34 @@ def fetch_links(
15951596
url, _, _sha256 = anchor_tag["href"].partition("#sha256=")
15961597
if "data-requires-python" in anchor_tag.attrs:
15971598
python_requires = anchor_tag.attrs["data-requires-python"]
1598-
url = resolve_relative_url(package_url, url) # Resolve relative URL
1599+
# Resolve relative URL
1600+
url = resolve_relative_url(package_url, url)
15991601
links.append(Link(url=url, python_requires=python_requires))
16001602
# TODO: keep sha256
16011603
return links
16021604

16031605

16041606
def resolve_relative_url(package_url, url):
16051607
"""
1606-
Resolve a relative URL using the package URL.
1608+
Return the resolved `url` URLstring given a `package_url` base URL string
1609+
of a package.
16071610
1608-
Args:
1609-
package_url (str): The base URL of the package.
1610-
url (str): The URL to be resolved.
1611-
1612-
Returns:
1613-
str: The resolved URL.
1614-
Examples:
1615-
>>> resolve_relative_url("https://example.com/package", "../path/file.txt")
1616-
'https://example.com/path/file.txt'
1611+
For example:
1612+
>>> resolve_relative_url("https://example.com/package", "../path/file.txt")
1613+
'https://example.com/path/file.txt'
16171614
"""
16181615
if not url.startswith(("http://", "https://")):
1619-
base_url = "/".join(package_url.split("/")[:-1]) # Extract base URL
1620-
url = urljoin(base_url, url) # Resolve relative URL
1616+
base_url_parts = urlparse(package_url)
1617+
url_parts = urlparse(url)
1618+
# If the relative URL starts with '..', remove the last directory from the base URL
1619+
if url_parts.path.startswith(".."):
1620+
path = base_url_parts.path.rstrip("/").rsplit("/", 1)[0] + url_parts.path[2:]
1621+
else:
1622+
path = urlunparse(
1623+
("", "", url_parts.path, url_parts.params, url_parts.query, url_parts.fragment)
1624+
)
1625+
resolved_url_parts = base_url_parts._replace(path=path)
1626+
url = urlunparse(resolved_url_parts)
16211627
return url
16221628

16231629

0 commit comments

Comments
 (0)