Skip to content

Commit ad212c0

Browse files
authored
Merge pull request #67 from redapple/canonicalize-url-docs
canonicalize_url: add to w3lib.url.__all__ + doctest
2 parents 3c56191 + 52960a4 commit ad212c0

File tree

1 file changed

+23
-5
lines changed

1 file changed

+23
-5
lines changed

w3lib/url.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -274,9 +274,16 @@ def any_to_uri(uri_or_path):
274274
return uri_or_path if u.scheme else path_to_file_uri(uri_or_path)
275275

276276

277-
__all__ = ["add_or_replace_parameter", "any_to_uri", "file_uri_to_path",
278-
"is_url", "path_to_file_uri", "safe_download_url",
279-
"safe_url_string", "url_query_cleaner", "url_query_parameter",
277+
__all__ = ["add_or_replace_parameter",
278+
"any_to_uri",
279+
"canonicalize_url",
280+
"file_uri_to_path",
281+
"is_url",
282+
"path_to_file_uri",
283+
"safe_download_url",
284+
"safe_url_string",
285+
"url_query_cleaner",
286+
"url_query_parameter",
280287

281288
# this last one is deprecated ; include it to be on the safe side
282289
"urljoin_rfc"]
@@ -307,7 +314,7 @@ def _safe_ParseResult(parts, encoding='utf8', path_encoding='utf8'):
307314

308315
def canonicalize_url(url, keep_blank_values=True, keep_fragments=False,
309316
encoding=None):
310-
"""Canonicalize the given url by applying the following procedures:
317+
r"""Canonicalize the given url by applying the following procedures:
311318
312319
- sort query arguments, first by key, then by value
313320
- percent encode paths ; non-ASCII characters are percent-encoded
@@ -322,7 +329,18 @@ def canonicalize_url(url, keep_blank_values=True, keep_fragments=False,
322329
The url passed can be bytes or unicode, while the url returned is
323330
always a native str (bytes in Python 2, unicode in Python 3).
324331
325-
For examples see the tests in tests/test_utils_url.py
332+
>>> import w3lib.url
333+
>>>
334+
>>> # sorting query arguments
335+
>>> w3lib.url.canonicalize_url('http://www.example.com/do?c=3&b=5&b=2&a=50')
336+
'http://www.example.com/do?a=50&b=2&b=5&c=3'
337+
>>>
338+
>>> # UTF-8 conversion + percent-encoding of non-ASCII characters
339+
>>> w3lib.url.canonicalize_url(u'http://www.example.com/r\u00e9sum\u00e9')
340+
'http://www.example.com/r%C3%A9sum%C3%A9'
341+
>>>
342+
343+
For more examples, see the tests in `tests/test_url.py`.
326344
"""
327345
# If supplied `encoding` is not compatible with all characters in `url`,
328346
# fallback to UTF-8 as safety net.

0 commit comments

Comments
 (0)