@@ -274,9 +274,16 @@ def any_to_uri(uri_or_path):
274274 return uri_or_path if u .scheme else path_to_file_uri (uri_or_path )
275275
276276
277- __all__ = ["add_or_replace_parameter" , "any_to_uri" , "file_uri_to_path" ,
278- "is_url" , "path_to_file_uri" , "safe_download_url" ,
279- "safe_url_string" , "url_query_cleaner" , "url_query_parameter" ,
277+ __all__ = ["add_or_replace_parameter" ,
278+ "any_to_uri" ,
279+ "canonicalize_url" ,
280+ "file_uri_to_path" ,
281+ "is_url" ,
282+ "path_to_file_uri" ,
283+ "safe_download_url" ,
284+ "safe_url_string" ,
285+ "url_query_cleaner" ,
286+ "url_query_parameter" ,
280287
281288 # this last one is deprecated ; include it to be on the safe side
282289 "urljoin_rfc" ]
@@ -307,7 +314,7 @@ def _safe_ParseResult(parts, encoding='utf8', path_encoding='utf8'):
307314
308315def canonicalize_url (url , keep_blank_values = True , keep_fragments = False ,
309316 encoding = None ):
310- """Canonicalize the given url by applying the following procedures:
317+ r """Canonicalize the given url by applying the following procedures:
311318
312319 - sort query arguments, first by key, then by value
313320 - percent encode paths ; non-ASCII characters are percent-encoded
@@ -322,7 +329,18 @@ def canonicalize_url(url, keep_blank_values=True, keep_fragments=False,
322329 The url passed can be bytes or unicode, while the url returned is
323330 always a native str (bytes in Python 2, unicode in Python 3).
324331
325- For examples see the tests in tests/test_utils_url.py
332+ >>> import w3lib.url
333+ >>>
334+ >>> # sorting query arguments
335+ >>> w3lib.url.canonicalize_url('http://www.example.com/do?c=3&b=5&b=2&a=50')
336+ 'http://www.example.com/do?a=50&b=2&b=5&c=3'
337+ >>>
338+ >>> # UTF-8 conversion + percent-encoding of non-ASCII characters
339+ >>> w3lib.url.canonicalize_url(u'http://www.example.com/r\u00e9sum\u00e9')
340+ 'http://www.example.com/r%C3%A9sum%C3%A9'
341+ >>>
342+
343+ For more examples, see the tests in `tests/test_url.py`.
326344 """
327345 # If supplied `encoding` is not compatible with all characters in `url`,
328346 # fallback to UTF-8 as safety net.
0 commit comments