From 518025bf8b8a4cfa05ad3170c632377c82e47279 Mon Sep 17 00:00:00 2001 From: lsn1028 Date: Mon, 28 Nov 2022 09:56:29 +0800 Subject: [PATCH] CVE-2022-0391: Mention urllib.parse changes in Whats new section. Descriptor: python-3.10.2 fix bug CVE-2022-0391 issue:https://gitee.com/openharmony/third_party_python/issues/I628CS?from=project-issue Signed-off-by: lsn1028 --- Doc/whatsnew/3.10.rst | 7 +++++++ Lib/urllib/parse.py | 8 +++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index dd4da45..e63cc74 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -1490,6 +1490,13 @@ URL by the parser in :mod:`urllib.parse` preventing such attacks. The removal characters are controlled by a new module level variable ``urllib.parse._UNSAFE_URL_BYTES_TO_REMOVE``. (See :issue:`43882`) +The presence of newline or tab characters in parts of a URL allows for some +forms of attacks. Following the WHATWG specification that updates :rfc:`3986`, +ASCII newline ``\n``, ``\r`` and tab ``\t`` characters are stripped from the +URL by the parser in :mod:`urllib.parse` preventing such attacks. The removal +characters are controlled by a new module level variable +``urllib.parse._UNSAFE_URL_BYTES_TO_REMOVE``. (See :issue:`43882`) + xml --- diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index b35997b..4249163 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -456,11 +456,6 @@ def urlsplit(url, scheme='', allow_fragments=True): """ url, scheme, _coerce_result = _coerce_args(url, scheme) - - for b in _UNSAFE_URL_BYTES_TO_REMOVE: - url = url.replace(b, "") - scheme = scheme.replace(b, "") - allow_fragments = bool(allow_fragments) key = url, scheme, allow_fragments, type(url), type(scheme) cached = _parse_cache.get(key, None) @@ -477,6 +472,9 @@ def urlsplit(url, scheme='', allow_fragments=True): else: scheme, url = url[:i].lower(), url[i+1:] + for b in _UNSAFE_URL_BYTES_TO_REMOVE: + url = url.replace(b, "") + if url[:2] == '//': netloc, url = _splitnetloc(url, 2) if (('[' in netloc and ']' not in netloc) or -- Gitee