Carefulness with encoding everywhere

This commit is contained in:
Tom Christie 2019-05-01 11:32:03 +01:00
parent b948798414
commit e1ecb360d9
5 changed files with 56 additions and 21 deletions

View File

@ -54,8 +54,8 @@ class HTTP11Connection(Adapter):
assert timeout is None or isinstance(timeout, TimeoutConfig)
#  Start sending the request.
method = request.method.encode()
target = request.url.full_path
method = request.method.encode('ascii')
target = request.url.full_path.encode('ascii')
headers = request.headers.raw
event = h11.Request(method=method, target=target, headers=headers)
await self._send_event(event, timeout)
@ -75,7 +75,7 @@ class HTTP11Connection(Adapter):
event = await self._receive_event(timeout)
assert isinstance(event, h11.Response)
reason_phrase = event.reason.decode("latin1")
reason_phrase = event.reason.decode("ascii", errors="ignore")
status_code = event.status_code
headers = event.headers
content = self._body_iter(timeout)

View File

@ -62,7 +62,7 @@ class HTTP2Connection(Adapter):
headers = []
for k, v in event.headers:
if k == b":status":
status_code = int(v.decode())
status_code = int(v.decode('ascii', errors='ignore'))
elif not k.startswith(b":"):
headers.append((k, v))
@ -98,10 +98,10 @@ class HTTP2Connection(Adapter):
async def send_headers(self, request: Request, timeout: OptionalTimeout) -> int:
stream_id = self.h2_state.get_next_available_stream_id()
headers = [
(b":method", request.method.encode()),
(b":authority", request.url.host.encode()),
(b":scheme", request.url.scheme.encode()),
(b":path", request.url.full_path.encode()),
(b":method", request.method.encode('ascii')),
(b":authority", request.url.authority.encode('ascii')),
(b":scheme", request.url.scheme.encode('ascii')),
(b":path", request.url.full_path.encode('ascii')),
] + request.headers.raw
self.h2_state.send_headers(stream_id, headers)
data_to_send = self.h2_state.data_to_send()

View File

@ -89,18 +89,17 @@ class URL:
port = self.components.port
if port is None:
return {"https": 443, "http": 80}[self.scheme]
return port
return int(port)
@property
def full_path(self) -> str:
path = self.path or "/"
query = self.query
if query:
return path + "?" + query
path = self.path
if self.query:
path += "?" + self.query
return path
@property
def is_secure(self) -> bool:
def is_ssl(self) -> bool:
return self.components.scheme == "https"
@property
@ -155,7 +154,7 @@ class Origin:
def __init__(self, url: URLTypes) -> None:
if not isinstance(url, URL):
url = URL(url)
self.is_ssl = url.scheme == "https"
self.is_ssl = url.is_ssl
self.host = url.host
self.port = url.port

View File

@ -16,21 +16,41 @@ def test_response():
def test_response_content_type_encoding():
"""
Use the charset encoding in the Content-Type header if possible.
"""
headers = {"Content-Type": "text-plain; charset=latin-1"}
response = httpcore.Response(
200, content="Latin 1: ÿ".encode("latin-1"), headers=headers
)
content = "Latin 1: ÿ".encode("latin-1")
response = httpcore.Response(200, content=content, headers=headers)
assert response.text == "Latin 1: ÿ"
assert response.encoding == "latin-1"
def test_response_autodetect_encoding():
response = httpcore.Response(200, content="Snowmen: ☃☃☃".encode("utf-8"))
assert response.text == "Snowmen: ☃☃☃"
assert response.encoding == "utf-8"
"""
Autodetect encoding if there is no charset info in a Content-Type header.
"""
content = "おはようございます。".encode("EUC-JP")
response = httpcore.Response(200, content=content)
assert response.text == "おはようございます。"
assert response.encoding == "EUC-JP"
def test_response_fallback_to_autodetect():
"""
Fallback to autodetection if we get an invalid charset in the Content-Type header.
"""
headers = {"Content-Type": "text-plain; charset=invalid-codec-name"}
content = "おはようございます。".encode("EUC-JP")
response = httpcore.Response(200, content=content, headers=headers)
assert response.text == "おはようございます。"
assert response.encoding == "EUC-JP"
def test_response_default_encoding():
"""
Default to utf-8 if all else fails.
"""
response = httpcore.Response(200, content=b"")
assert response.text == ""
assert response.encoding == "utf-8"

View File

@ -5,3 +5,19 @@ def test_idna_url():
url = URL("http://中国.icom.museum:80/")
assert url == URL("http://xn--fiqs8s.icom.museum:80/")
assert url.host == "xn--fiqs8s.icom.museum"
def test_url():
url = URL("https://example.org:123/path/to/somewhere?abc=123#anchor")
assert url.scheme == "https"
assert url.host == "example.org"
assert url.port == 123
assert url.authority == "example.org:123"
assert url.path == "/path/to/somewhere"
assert url.query == "abc=123"
assert url.fragment == "anchor"
assert repr(url) == "URL('https://example.org:123/path/to/somewhere?abc=123#anchor')"
new = url.copy_with(scheme="http")
assert new == URL("http://example.org:123/path/to/somewhere?abc=123#anchor")
assert new.scheme == "http"