92 lines
2.7 KiB
Python
92 lines
2.7 KiB
Python
import codecs
|
|
import http
|
|
import typing
|
|
from urllib.parse import quote
|
|
|
|
from .exceptions import InvalidURL
|
|
|
|
# The unreserved URI characters (RFC 3986)
|
|
UNRESERVED_SET = frozenset(
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~"
|
|
)
|
|
|
|
|
|
def unquote_unreserved(uri: str) -> str:
|
|
"""
|
|
Un-escape any percent-escape sequences in a URI that are unreserved
|
|
characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
|
|
"""
|
|
parts = uri.split("%")
|
|
for i in range(1, len(parts)):
|
|
h = parts[i][0:2]
|
|
if len(h) == 2 and h.isalnum():
|
|
try:
|
|
c = chr(int(h, 16))
|
|
except ValueError:
|
|
raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
|
|
|
|
if c in UNRESERVED_SET:
|
|
parts[i] = c + parts[i][2:]
|
|
else:
|
|
parts[i] = "%" + parts[i]
|
|
else:
|
|
parts[i] = "%" + parts[i]
|
|
return "".join(parts)
|
|
|
|
|
|
def requote_uri(uri: str) -> str:
|
|
"""
|
|
Re-quote the given URI.
|
|
|
|
This function passes the given URI through an unquote/quote cycle to
|
|
ensure that it is fully and consistently quoted.
|
|
"""
|
|
safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
|
|
safe_without_percent = "!#$&'()*+,/:;=?@[]~"
|
|
try:
|
|
# Unquote only the unreserved characters
|
|
# Then quote only illegal characters (do not quote reserved,
|
|
# unreserved, or '%')
|
|
return quote(unquote_unreserved(uri), safe=safe_with_percent)
|
|
except InvalidURL:
|
|
# We couldn't unquote the given URI, so let's try quoting it, but
|
|
# there may be unquoted '%'s in the URI. We need to make sure they're
|
|
# properly quoted so they do not cause issues elsewhere.
|
|
return quote(uri, safe=safe_without_percent)
|
|
|
|
|
|
def normalize_header_key(value: typing.AnyStr, encoding: str = None) -> bytes:
|
|
"""
|
|
Coerce str/bytes into a strictly byte-wise HTTP header key.
|
|
"""
|
|
if isinstance(value, bytes):
|
|
return value.lower()
|
|
return value.encode(encoding or "ascii").lower()
|
|
|
|
|
|
def normalize_header_value(value: typing.AnyStr, encoding: str = None) -> bytes:
|
|
"""
|
|
Coerce str/bytes into a strictly byte-wise HTTP header value.
|
|
"""
|
|
if isinstance(value, bytes):
|
|
return value
|
|
return value.encode(encoding or "ascii")
|
|
|
|
|
|
def get_reason_phrase(status_code: int) -> str:
|
|
"""
|
|
Return an HTTP reason phrase, eg. "OK" for 200, or "Not Found" for 404.
|
|
"""
|
|
try:
|
|
return http.HTTPStatus(status_code).phrase
|
|
except ValueError as exc:
|
|
return ""
|
|
|
|
|
|
def is_known_encoding(encoding: str) -> bool:
|
|
try:
|
|
codecs.lookup(encoding)
|
|
except LookupError:
|
|
return False
|
|
return True
|