Add support for zstd decoding (#3139)
This adds support for zstd decoding using the python package zstandard. This is similar to how it is implemented in urllib3. I also chose the optional installation option httpx[zstd] to mimic the same option in urllib3. zstd decoding is similar to brotli, but in benchmarks it is supposed to be even faster. The zstd compression is described in RFC 8878. See https://github.com/encode/httpx/discussions/1986 Co-authored-by: Kamil Monicz <kamil@monicz.dev>
This commit is contained in:
parent
7df47ce4d9
commit
392dbe45f0
@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||
|
||||
## Unreleased
|
||||
|
||||
## Added
|
||||
|
||||
* Support for `zstd` content decoding using the python `zstandard` package is added. Installable using `httpx[zstd]`. (#3139)
|
||||
|
||||
### Fixed
|
||||
|
||||
* Fix `app` type signature in `ASGITransport`. (#3109)
|
||||
|
||||
@ -138,6 +138,7 @@ As well as these optional installs:
|
||||
* `rich` - Rich terminal support. *(Optional, with `httpx[cli]`)*
|
||||
* `click` - Command line client support. *(Optional, with `httpx[cli]`)*
|
||||
* `brotli` or `brotlicffi` - Decoding for "brotli" compressed responses. *(Optional, with `httpx[brotli]`)*
|
||||
* `zstandard` - Decoding for "zstd" compressed responses. *(Optional, with `httpx[zstd]`)*
|
||||
|
||||
A huge amount of credit is due to `requests` for the API layout that
|
||||
much of this work follows, as well as to `urllib3` for plenty of design
|
||||
|
||||
@ -119,6 +119,7 @@ As well as these optional installs:
|
||||
* `rich` - Rich terminal support. *(Optional, with `httpx[cli]`)*
|
||||
* `click` - Command line client support. *(Optional, with `httpx[cli]`)*
|
||||
* `brotli` or `brotlicffi` - Decoding for "brotli" compressed responses. *(Optional, with `httpx[brotli]`)*
|
||||
* `zstandard` - Decoding for "zstd" compressed responses. *(Optional, with `httpx[zstd]`)*
|
||||
|
||||
A huge amount of credit is due to `requests` for the API layout that
|
||||
much of this work follows, as well as to `urllib3` for plenty of design
|
||||
@ -138,10 +139,10 @@ Or, to include the optional HTTP/2 support, use:
|
||||
$ pip install httpx[http2]
|
||||
```
|
||||
|
||||
To include the optional brotli decoder support, use:
|
||||
To include the optional brotli and zstandard decoders support, use:
|
||||
|
||||
```shell
|
||||
$ pip install httpx[brotli]
|
||||
$ pip install httpx[brotli,zstd]
|
||||
```
|
||||
|
||||
HTTPX requires Python 3.8+
|
||||
|
||||
@ -100,7 +100,8 @@ b'<!doctype html>\n<html>\n<head>\n<title>Example Domain</title>...'
|
||||
|
||||
Any `gzip` and `deflate` HTTP response encodings will automatically
|
||||
be decoded for you. If `brotlipy` is installed, then the `brotli` response
|
||||
encoding will also be supported.
|
||||
encoding will be supported. If `zstandard` is installed, then `zstd`
|
||||
response encodings will also be supported.
|
||||
|
||||
For example, to create an image from binary data returned by a request, you can use the following code:
|
||||
|
||||
@ -362,7 +363,8 @@ Or stream the text, on a line-by-line basis...
|
||||
|
||||
HTTPX will use universal line endings, normalising all cases to `\n`.
|
||||
|
||||
In some cases you might want to access the raw bytes on the response without applying any HTTP content decoding. In this case any content encoding that the web server has applied such as `gzip`, `deflate`, or `brotli` will not be automatically decoded.
|
||||
In some cases you might want to access the raw bytes on the response without applying any HTTP content decoding. In this case any content encoding that the web server has applied such as `gzip`, `deflate`, `brotli`, or `zstd` will
|
||||
not be automatically decoded.
|
||||
|
||||
```pycon
|
||||
>>> with httpx.stream("GET", "https://www.example.com") as r:
|
||||
|
||||
@ -3,8 +3,11 @@ The _compat module is used for code which requires branching between different
|
||||
Python environments. It is excluded from the code coverage checks.
|
||||
"""
|
||||
|
||||
import re
|
||||
import ssl
|
||||
import sys
|
||||
from types import ModuleType
|
||||
from typing import Optional
|
||||
|
||||
# Brotli support is optional
|
||||
# The C bindings in `brotli` are recommended for CPython.
|
||||
@ -17,6 +20,24 @@ except ImportError: # pragma: no cover
|
||||
except ImportError:
|
||||
brotli = None
|
||||
|
||||
# Zstandard support is optional
|
||||
zstd: Optional[ModuleType] = None
|
||||
try:
|
||||
import zstandard as zstd
|
||||
except (AttributeError, ImportError, ValueError): # Defensive:
|
||||
zstd = None
|
||||
else:
|
||||
# The package 'zstandard' added the 'eof' property starting
|
||||
# in v0.18.0 which we require to ensure a complete and
|
||||
# valid zstd stream was fed into the ZstdDecoder.
|
||||
# See: https://github.com/urllib3/urllib3/pull/2624
|
||||
_zstd_version = tuple(
|
||||
map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]
|
||||
)
|
||||
if _zstd_version < (0, 18): # Defensive:
|
||||
zstd = None
|
||||
|
||||
|
||||
if sys.version_info >= (3, 10) or ssl.OPENSSL_VERSION_INFO >= (1, 1, 0, 7):
|
||||
|
||||
def set_minimum_tls_version_1_2(context: ssl.SSLContext) -> None:
|
||||
|
||||
@ -11,7 +11,7 @@ import io
|
||||
import typing
|
||||
import zlib
|
||||
|
||||
from ._compat import brotli
|
||||
from ._compat import brotli, zstd
|
||||
from ._exceptions import DecodingError
|
||||
|
||||
|
||||
@ -140,6 +140,44 @@ class BrotliDecoder(ContentDecoder):
|
||||
raise DecodingError(str(exc)) from exc
|
||||
|
||||
|
||||
class ZStandardDecoder(ContentDecoder):
|
||||
"""
|
||||
Handle 'zstd' RFC 8878 decoding.
|
||||
|
||||
Requires `pip install zstandard`.
|
||||
Can be installed as a dependency of httpx using `pip install httpx[zstd]`.
|
||||
"""
|
||||
|
||||
# inspired by the ZstdDecoder implementation in urllib3
|
||||
def __init__(self) -> None:
|
||||
if zstd is None: # pragma: no cover
|
||||
raise ImportError(
|
||||
"Using 'ZStandardDecoder', ..."
|
||||
"Make sure to install httpx using `pip install httpx[zstd]`."
|
||||
) from None
|
||||
|
||||
self.decompressor = zstd.ZstdDecompressor().decompressobj()
|
||||
|
||||
def decode(self, data: bytes) -> bytes:
|
||||
assert zstd is not None
|
||||
output = io.BytesIO()
|
||||
try:
|
||||
output.write(self.decompressor.decompress(data))
|
||||
while self.decompressor.eof and self.decompressor.unused_data:
|
||||
unused_data = self.decompressor.unused_data
|
||||
self.decompressor = zstd.ZstdDecompressor().decompressobj()
|
||||
output.write(self.decompressor.decompress(unused_data))
|
||||
except zstd.ZstdError as exc:
|
||||
raise DecodingError(str(exc)) from exc
|
||||
return output.getvalue()
|
||||
|
||||
def flush(self) -> bytes:
|
||||
ret = self.decompressor.flush() # note: this is a no-op
|
||||
if not self.decompressor.eof:
|
||||
raise DecodingError("Zstandard data is incomplete") # pragma: no cover
|
||||
return bytes(ret)
|
||||
|
||||
|
||||
class MultiDecoder(ContentDecoder):
|
||||
"""
|
||||
Handle the case where multiple encodings have been applied.
|
||||
@ -323,8 +361,11 @@ SUPPORTED_DECODERS = {
|
||||
"gzip": GZipDecoder,
|
||||
"deflate": DeflateDecoder,
|
||||
"br": BrotliDecoder,
|
||||
"zstd": ZStandardDecoder,
|
||||
}
|
||||
|
||||
|
||||
if brotli is None:
|
||||
SUPPORTED_DECODERS.pop("br") # pragma: no cover
|
||||
if zstd is None:
|
||||
SUPPORTED_DECODERS.pop("zstd") # pragma: no cover
|
||||
|
||||
@ -818,7 +818,7 @@ class Response:
|
||||
def iter_bytes(self, chunk_size: int | None = None) -> typing.Iterator[bytes]:
|
||||
"""
|
||||
A byte-iterator over the decoded response content.
|
||||
This allows us to handle gzip, deflate, and brotli encoded responses.
|
||||
This allows us to handle gzip, deflate, brotli, and zstd encoded responses.
|
||||
"""
|
||||
if hasattr(self, "_content"):
|
||||
chunk_size = len(self._content) if chunk_size is None else chunk_size
|
||||
@ -918,7 +918,7 @@ class Response:
|
||||
) -> typing.AsyncIterator[bytes]:
|
||||
"""
|
||||
A byte-iterator over the decoded response content.
|
||||
This allows us to handle gzip, deflate, and brotli encoded responses.
|
||||
This allows us to handle gzip, deflate, brotli, and zstd encoded responses.
|
||||
"""
|
||||
if hasattr(self, "_content"):
|
||||
chunk_size = len(self._content) if chunk_size is None else chunk_size
|
||||
|
||||
@ -52,6 +52,9 @@ http2 = [
|
||||
socks = [
|
||||
"socksio==1.*",
|
||||
]
|
||||
zstd = [
|
||||
"zstandard>=0.18.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
httpx = "httpx:main"
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# On the other hand, we're not pinning package dependencies, because our tests
|
||||
# needs to pass with the latest version of the packages.
|
||||
# Reference: https://github.com/encode/httpx/pull/1721#discussion_r661241588
|
||||
-e .[brotli,cli,http2,socks]
|
||||
-e .[brotli,cli,http2,socks,zstd]
|
||||
|
||||
# Optional charset auto-detection
|
||||
# Used in our test cases
|
||||
|
||||
@ -357,7 +357,7 @@ def test_raw_client_header():
|
||||
assert response.json() == [
|
||||
["Host", "example.org"],
|
||||
["Accept", "*/*"],
|
||||
["Accept-Encoding", "gzip, deflate, br"],
|
||||
["Accept-Encoding", "gzip, deflate, br, zstd"],
|
||||
["Connection", "keep-alive"],
|
||||
["User-Agent", f"python-httpx/{httpx.__version__}"],
|
||||
["Example-Header", "example-value"],
|
||||
|
||||
@ -36,7 +36,7 @@ def test_event_hooks():
|
||||
"host": "127.0.0.1:8000",
|
||||
"user-agent": f"python-httpx/{httpx.__version__}",
|
||||
"accept": "*/*",
|
||||
"accept-encoding": "gzip, deflate, br",
|
||||
"accept-encoding": "gzip, deflate, br, zstd",
|
||||
"connection": "keep-alive",
|
||||
"authorization": "Basic dXNlcm5hbWU6cGFzc3dvcmQ=",
|
||||
},
|
||||
@ -87,7 +87,7 @@ async def test_async_event_hooks():
|
||||
"host": "127.0.0.1:8000",
|
||||
"user-agent": f"python-httpx/{httpx.__version__}",
|
||||
"accept": "*/*",
|
||||
"accept-encoding": "gzip, deflate, br",
|
||||
"accept-encoding": "gzip, deflate, br, zstd",
|
||||
"connection": "keep-alive",
|
||||
"authorization": "Basic dXNlcm5hbWU6cGFzc3dvcmQ=",
|
||||
},
|
||||
@ -144,7 +144,7 @@ def test_event_hooks_with_redirect():
|
||||
"host": "127.0.0.1:8000",
|
||||
"user-agent": f"python-httpx/{httpx.__version__}",
|
||||
"accept": "*/*",
|
||||
"accept-encoding": "gzip, deflate, br",
|
||||
"accept-encoding": "gzip, deflate, br, zstd",
|
||||
"connection": "keep-alive",
|
||||
"authorization": "Basic dXNlcm5hbWU6cGFzc3dvcmQ=",
|
||||
},
|
||||
@ -159,7 +159,7 @@ def test_event_hooks_with_redirect():
|
||||
"host": "127.0.0.1:8000",
|
||||
"user-agent": f"python-httpx/{httpx.__version__}",
|
||||
"accept": "*/*",
|
||||
"accept-encoding": "gzip, deflate, br",
|
||||
"accept-encoding": "gzip, deflate, br, zstd",
|
||||
"connection": "keep-alive",
|
||||
"authorization": "Basic dXNlcm5hbWU6cGFzc3dvcmQ=",
|
||||
},
|
||||
@ -201,7 +201,7 @@ async def test_async_event_hooks_with_redirect():
|
||||
"host": "127.0.0.1:8000",
|
||||
"user-agent": f"python-httpx/{httpx.__version__}",
|
||||
"accept": "*/*",
|
||||
"accept-encoding": "gzip, deflate, br",
|
||||
"accept-encoding": "gzip, deflate, br, zstd",
|
||||
"connection": "keep-alive",
|
||||
"authorization": "Basic dXNlcm5hbWU6cGFzc3dvcmQ=",
|
||||
},
|
||||
@ -216,7 +216,7 @@ async def test_async_event_hooks_with_redirect():
|
||||
"host": "127.0.0.1:8000",
|
||||
"user-agent": f"python-httpx/{httpx.__version__}",
|
||||
"accept": "*/*",
|
||||
"accept-encoding": "gzip, deflate, br",
|
||||
"accept-encoding": "gzip, deflate, br, zstd",
|
||||
"connection": "keep-alive",
|
||||
"authorization": "Basic dXNlcm5hbWU6cGFzc3dvcmQ=",
|
||||
},
|
||||
|
||||
@ -34,7 +34,7 @@ def test_client_header():
|
||||
assert response.json() == {
|
||||
"headers": {
|
||||
"accept": "*/*",
|
||||
"accept-encoding": "gzip, deflate, br",
|
||||
"accept-encoding": "gzip, deflate, br, zstd",
|
||||
"connection": "keep-alive",
|
||||
"example-header": "example-value",
|
||||
"host": "example.org",
|
||||
@ -56,7 +56,7 @@ def test_header_merge():
|
||||
assert response.json() == {
|
||||
"headers": {
|
||||
"accept": "*/*",
|
||||
"accept-encoding": "gzip, deflate, br",
|
||||
"accept-encoding": "gzip, deflate, br, zstd",
|
||||
"connection": "keep-alive",
|
||||
"host": "example.org",
|
||||
"user-agent": "python-myclient/0.2.1",
|
||||
@ -78,7 +78,7 @@ def test_header_merge_conflicting_headers():
|
||||
assert response.json() == {
|
||||
"headers": {
|
||||
"accept": "*/*",
|
||||
"accept-encoding": "gzip, deflate, br",
|
||||
"accept-encoding": "gzip, deflate, br, zstd",
|
||||
"connection": "keep-alive",
|
||||
"host": "example.org",
|
||||
"user-agent": f"python-httpx/{httpx.__version__}",
|
||||
@ -100,7 +100,7 @@ def test_header_update():
|
||||
assert first_response.json() == {
|
||||
"headers": {
|
||||
"accept": "*/*",
|
||||
"accept-encoding": "gzip, deflate, br",
|
||||
"accept-encoding": "gzip, deflate, br, zstd",
|
||||
"connection": "keep-alive",
|
||||
"host": "example.org",
|
||||
"user-agent": f"python-httpx/{httpx.__version__}",
|
||||
@ -111,7 +111,7 @@ def test_header_update():
|
||||
assert second_response.json() == {
|
||||
"headers": {
|
||||
"accept": "*/*",
|
||||
"accept-encoding": "gzip, deflate, br",
|
||||
"accept-encoding": "gzip, deflate, br, zstd",
|
||||
"another-header": "AThing",
|
||||
"connection": "keep-alive",
|
||||
"host": "example.org",
|
||||
@ -164,7 +164,7 @@ def test_remove_default_header():
|
||||
assert response.json() == {
|
||||
"headers": {
|
||||
"accept": "*/*",
|
||||
"accept-encoding": "gzip, deflate, br",
|
||||
"accept-encoding": "gzip, deflate, br, zstd",
|
||||
"connection": "keep-alive",
|
||||
"host": "example.org",
|
||||
}
|
||||
@ -192,7 +192,7 @@ def test_host_with_auth_and_port_in_url():
|
||||
assert response.json() == {
|
||||
"headers": {
|
||||
"accept": "*/*",
|
||||
"accept-encoding": "gzip, deflate, br",
|
||||
"accept-encoding": "gzip, deflate, br, zstd",
|
||||
"connection": "keep-alive",
|
||||
"host": "example.org",
|
||||
"user-agent": f"python-httpx/{httpx.__version__}",
|
||||
@ -215,7 +215,7 @@ def test_host_with_non_default_port_in_url():
|
||||
assert response.json() == {
|
||||
"headers": {
|
||||
"accept": "*/*",
|
||||
"accept-encoding": "gzip, deflate, br",
|
||||
"accept-encoding": "gzip, deflate, br, zstd",
|
||||
"connection": "keep-alive",
|
||||
"host": "example.org:123",
|
||||
"user-agent": f"python-httpx/{httpx.__version__}",
|
||||
|
||||
@ -157,7 +157,7 @@ async def test_asgi_headers():
|
||||
"headers": [
|
||||
["host", "www.example.org"],
|
||||
["accept", "*/*"],
|
||||
["accept-encoding", "gzip, deflate, br"],
|
||||
["accept-encoding", "gzip, deflate, br, zstd"],
|
||||
["connection", "keep-alive"],
|
||||
["user-agent", f"python-httpx/{httpx.__version__}"],
|
||||
]
|
||||
|
||||
@ -1,10 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import typing
|
||||
import zlib
|
||||
|
||||
import chardet
|
||||
import pytest
|
||||
import zstandard as zstd
|
||||
|
||||
import httpx
|
||||
|
||||
@ -73,6 +75,53 @@ def test_brotli():
|
||||
assert response.content == body
|
||||
|
||||
|
||||
def test_zstd():
|
||||
body = b"test 123"
|
||||
compressed_body = zstd.compress(body)
|
||||
|
||||
headers = [(b"Content-Encoding", b"zstd")]
|
||||
response = httpx.Response(
|
||||
200,
|
||||
headers=headers,
|
||||
content=compressed_body,
|
||||
)
|
||||
assert response.content == body
|
||||
|
||||
|
||||
def test_zstd_decoding_error():
|
||||
compressed_body = "this_is_not_zstd_compressed_data"
|
||||
|
||||
headers = [(b"Content-Encoding", b"zstd")]
|
||||
with pytest.raises(httpx.DecodingError):
|
||||
httpx.Response(
|
||||
200,
|
||||
headers=headers,
|
||||
content=compressed_body,
|
||||
)
|
||||
|
||||
|
||||
def test_zstd_multiframe():
|
||||
# test inspired by urllib3 test suite
|
||||
data = (
|
||||
# Zstandard frame
|
||||
zstd.compress(b"foo")
|
||||
# skippable frame (must be ignored)
|
||||
+ bytes.fromhex(
|
||||
"50 2A 4D 18" # Magic_Number (little-endian)
|
||||
"07 00 00 00" # Frame_Size (little-endian)
|
||||
"00 00 00 00 00 00 00" # User_Data
|
||||
)
|
||||
# Zstandard frame
|
||||
+ zstd.compress(b"bar")
|
||||
)
|
||||
compressed_body = io.BytesIO(data)
|
||||
|
||||
headers = [(b"Content-Encoding", b"zstd")]
|
||||
response = httpx.Response(200, headers=headers, content=compressed_body)
|
||||
response.read()
|
||||
assert response.content == b"foobar"
|
||||
|
||||
|
||||
def test_multi():
|
||||
body = b"test 123"
|
||||
|
||||
|
||||
@ -129,7 +129,7 @@ def test_verbose(server):
|
||||
"GET / HTTP/1.1",
|
||||
f"Host: {server.url.netloc.decode('ascii')}",
|
||||
"Accept: */*",
|
||||
"Accept-Encoding: gzip, deflate, br",
|
||||
"Accept-Encoding: gzip, deflate, br, zstd",
|
||||
"Connection: keep-alive",
|
||||
f"User-Agent: python-httpx/{httpx.__version__}",
|
||||
"",
|
||||
@ -154,7 +154,7 @@ def test_auth(server):
|
||||
"GET / HTTP/1.1",
|
||||
f"Host: {server.url.netloc.decode('ascii')}",
|
||||
"Accept: */*",
|
||||
"Accept-Encoding: gzip, deflate, br",
|
||||
"Accept-Encoding: gzip, deflate, br, zstd",
|
||||
"Connection: keep-alive",
|
||||
f"User-Agent: python-httpx/{httpx.__version__}",
|
||||
"Authorization: Basic dXNlcm5hbWU6cGFzc3dvcmQ=",
|
||||
|
||||
Loading…
Reference in New Issue
Block a user