httpx/httpcore/utils.py
2019-04-30 16:35:25 +01:00

92 lines
2.7 KiB
Python

import codecs
import http
import typing
from urllib.parse import quote
from .exceptions import InvalidURL
# The unreserved URI characters (RFC 3986)
UNRESERVED_SET = frozenset(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~"
)
def unquote_unreserved(uri: str) -> str:
"""
Un-escape any percent-escape sequences in a URI that are unreserved
characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
"""
parts = uri.split("%")
for i in range(1, len(parts)):
h = parts[i][0:2]
if len(h) == 2 and h.isalnum():
try:
c = chr(int(h, 16))
except ValueError:
raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
if c in UNRESERVED_SET:
parts[i] = c + parts[i][2:]
else:
parts[i] = "%" + parts[i]
else:
parts[i] = "%" + parts[i]
return "".join(parts)
def requote_uri(uri: str) -> str:
"""
Re-quote the given URI.
This function passes the given URI through an unquote/quote cycle to
ensure that it is fully and consistently quoted.
"""
safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
safe_without_percent = "!#$&'()*+,/:;=?@[]~"
try:
# Unquote only the unreserved characters
# Then quote only illegal characters (do not quote reserved,
# unreserved, or '%')
return quote(unquote_unreserved(uri), safe=safe_with_percent)
except InvalidURL:
# We couldn't unquote the given URI, so let's try quoting it, but
# there may be unquoted '%'s in the URI. We need to make sure they're
# properly quoted so they do not cause issues elsewhere.
return quote(uri, safe=safe_without_percent)
def normalize_header_key(value: typing.AnyStr, encoding: str = None) -> bytes:
"""
Coerce str/bytes into a strictly byte-wise HTTP header key.
"""
if isinstance(value, bytes):
return value.lower()
return value.encode(encoding or "ascii").lower()
def normalize_header_value(value: typing.AnyStr, encoding: str = None) -> bytes:
"""
Coerce str/bytes into a strictly byte-wise HTTP header value.
"""
if isinstance(value, bytes):
return value
return value.encode(encoding or "ascii")
def get_reason_phrase(status_code: int) -> str:
"""
Return an HTTP reason phrase, eg. "OK" for 200, or "Not Found" for 404.
"""
try:
return http.HTTPStatus(status_code).phrase
except ValueError as exc:
return ""
def is_known_encoding(encoding: str) -> bool:
try:
codecs.lookup(encoding)
except LookupError:
return False
return True