Open tianhuangtencent opened 1 year ago
I try to combine pycurl and httptools, but I found it can't deal with https, here is my code:
success with http:
import pycurl import httptools class HttpResponse: def on_message_begin(self): print('on_message_begin') def on_url(self, url: bytes): print(f'on_url: url={url}') def on_header(self, name: bytes, value: bytes): print(f'on_header: name={name}, value={value.decode("ISO-8859-1")}') def on_headers_complete(self): print(f'on_header_complete') def on_body(self, body: bytes): print(f'on_body: {len(body)}') def on_message_complete(self): print(f'on_message_complete') def on_chunk_header(self): print(f'on_chunk_header') def on_chunk_complete(self): print(f'on_chunk_complete') def on_status(self, status: bytes): print(f'on_status: status={status}') print(pycurl.version) print(httptools.__version__) m = HttpResponse() p = httptools.HttpResponseParser(m) c = pycurl.Curl() c.setopt(pycurl.URL, "http://uvloop.readthedocs.io/") c.setopt(pycurl.HTTP_TRANSFER_DECODING, 0) c.setopt(pycurl.WRITEFUNCTION, p.feed_data) c.setopt(pycurl.HEADERFUNCTION, p.feed_data) c.perform()
and output:
PycURL/7.45.2 libcurl/7.76.1 OpenSSL/1.1.1u zlib/1.2.11 libssh2/1.9.0 nghttp2/1.43.0 0.5.0 on_message_begin on_status: status=b'Found' on_header: name=b'Date', value=Thu, 08 Jun 2023 09:50:21 GMT on_header: name=b'Content-Type', value=text/html; charset=utf-8 on_header: name=b'Transfer-Encoding', value=chunked on_header: name=b'Connection', value=keep-alive on_header: name=b'Location', value=https://uvloop.readthedocs.io/ on_header: name=b'CF-Ray', value=7d403ae94955cec5-SJC on_header: name=b'CF-Cache-Status', value=EXPIRED on_header: name=b'Cache-Control', value=max-age=1200 on_header: name=b'Content-Language', value=en on_header: name=b'Vary', value=Accept-Language, Cookie, Accept-Encoding on_header: name=b'CDN-Cache-Control', value=public on_header: name=b'Referrer-Policy', value=no-referrer-when-downgrade on_header: name=b'X-Backend', value=web-i-0854c4793bcd745a7 on_header: name=b'X-Content-Type-Options', value=nosniff on_header: name=b'X-RTD-Domain', value=uvloop.readthedocs.io on_header: name=b'X-RTD-Project', value= on_header: name=b'X-RTD-Project-Method', value=public_domain on_header: name=b'X-RTD-Redirect', value=http_to_https on_header: name=b'X-RTD-Version-Method', value=path on_header: name=b'X-Served', value=Django-Proxito on_header: name=b'X-XSS-Protection', value=1; mode=block on_header: name=b'Server', value=cloudflare on_header: name=b'alt-svc', value=h3=":443"; ma=86400 on_header_complete on_chunk_header on_chunk_complete on_message_complete
fail with https:
import certifi import pycurl import httptools class HttpResponse: def on_message_begin(self): print('on_message_begin') def on_url(self, url: bytes): print(f'on_url: url={url}') def on_header(self, name: bytes, value: bytes): print(f'on_header: name={name}, value={value.decode("ISO-8859-1")}') def on_headers_complete(self): print(f'on_header_complete') def on_body(self, body: bytes): print(f'on_body: {len(body)}') def on_message_complete(self): print(f'on_message_complete') def on_chunk_header(self): print(f'on_chunk_header') def on_chunk_complete(self): print(f'on_chunk_complete') def on_status(self, status: bytes): print(f'on_status: status={status}') print(pycurl.version) print(httptools.__version__) m = HttpResponse() p = httptools.HttpResponseParser(m) c = pycurl.Curl() c.setopt(pycurl.URL, "https://uvloop.readthedocs.io/") c.setopt(c.CAINFO, certifi.where()) c.setopt(pycurl.HTTP_TRANSFER_DECODING, 0) c.setopt(pycurl.WRITEFUNCTION, p.feed_data) c.setopt(pycurl.HEADERFUNCTION, p.feed_data) c.perform()
output:
PycURL/7.45.2 libcurl/7.76.1 OpenSSL/1.1.1u zlib/1.2.11 libssh2/1.9.0 nghttp2/1.43.0 0.5.0 on_message_begin --------------------------------------------------------------------------- HttpParserError Traceback (most recent call last) httptools/parser/parser.pyx in httptools.parser.parser.HttpParser.feed_data() HttpParserError: Expected dot --------------------------------------------------------------------------- error Traceback (most recent call last) /tmp/ipykernel_16171/3019824725.py in <module> 37 c.setopt(pycurl.WRITEFUNCTION, p.feed_data) 38 c.setopt(pycurl.HEADERFUNCTION, p.feed_data) ---> 39 c.perform() error: (23, 'Failed writing header')
by the way, how can i let httptools auto decode with Accept-Encoding: gzip,deflate
Accept-Encoding: gzip,deflate
llhttp does not support HTTP/2, and httptools inherits that.
Looks like https://uvloop.readthedocs.io/ (the website of readthedocs.io, not uvloop) uses HTTP/2 by default.
I try to combine pycurl and httptools, but I found it can't deal with https, here is my code:
success with http:
and output:
fail with https:
output:
by the way, how can i let httptools auto decode with
Accept-Encoding: gzip,deflate