Media types with 'text/' should default to iso-8859-1

This commit is contained in:
Tom Christie 2019-05-01 12:32:31 +01:00
parent 118d94b58c
commit d7cf8bbf36
2 changed files with 32 additions and 14 deletions

View File

@ -68,18 +68,6 @@ class URL:
def authority(self) -> str:
return self.components.authority or ""
@property
def path(self) -> str:
return self.components.path or "/"
@property
def query(self) -> str:
return self.components.query or ""
@property
def fragment(self) -> str:
return self.components.fragment or ""
@property
def host(self) -> str:
return self.components.host or ""
@ -91,6 +79,14 @@ class URL:
return {"https": 443, "http": 80}[self.scheme]
return int(port)
@property
def path(self) -> str:
return self.components.path or "/"
@property
def query(self) -> str:
return self.components.query or ""
@property
def full_path(self) -> str:
path = self.path
@ -98,6 +94,10 @@ class URL:
path += "?" + self.query
return path
@property
def fragment(self) -> str:
return self.components.fragment or ""
@property
def is_ssl(self) -> bool:
return self.components.scheme == "https"
@ -509,8 +509,13 @@ class Response:
if content_type is None:
return None
parsed = cgi.parse_header(content_type)[-1]
return parsed.get("charset")
# RFC 2616 specifies that 'iso-8859-1' should be used as the default
# for 'text/*' media types, if no charset is provided.
# See: https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1
parsed = cgi.parse_header(content_type)
media_type, info = parsed[0], parsed[-1]
default = "iso-8859-1" if media_type.startswith("text/") else None
return info.get("charset", default)
@property
def apparent_encoding(self) -> typing.Optional[str]:

View File

@ -47,6 +47,19 @@ def test_response_fallback_to_autodetect():
assert response.encoding == "EUC-JP"
def test_response():
"""
A media type of 'text/*' with no charset should default to ISO-8859-1.
See: https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1
"""
content = b"Hello, world!"
headers = {"Content-Type": "text/plain"}
response = httpcore.Response(200, content=content, headers=headers)
assert response.status_code == 200
assert response.encoding == "iso-8859-1"
assert response.text == "Hello, world!"
def test_response_default_encoding():
"""
Default to utf-8 if all else fails.