[X] I added a very descriptive title to this issue.
[X] I searched the LangChain documentation with the integrated search.
[X] I used the GitHub search to find a similar question and didn't find it.
[X] I am sure that this is a bug in LangChain rather than my code.
[X] The bug is not resolved by updating to the latest stable version of LangChain (or the specific integration package).
Example Code
This following code works to reach out to my self-hosted unstructured API and turns a file in Unsturctured Json:
def test_file_conversion_api():
"""Test file conversion making a request to the endpoint directly with multi part form data requests
Copying the logic from this curl command:
def _make_request(
self,
conn: BaseHTTPConnection,
method: str,
url: str,
body: _TYPE_BODY | None = None,
headers: typing.Mapping[str, str] | None = None,
retries: Retry | None = None,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
chunked: bool = False,
response_conn: BaseHTTPConnection | None = None,
preload_content: bool = True,
decode_content: bool = True,
enforce_content_length: bool = True,
) -> BaseHTTPResponse:
"""
Perform a request on a given urllib connection object taken from our
pool.
:param conn:
a connection from one of our connection pools
:param method:
HTTP request method (such as GET, POST, PUT, etc.)
:param url:
The URL to perform the request on.
:param body:
Data to send in the request body, either :class:`str`, :class:`bytes`,
an iterable of :class:`str`/:class:`bytes`, or a file-like object.
:param headers:
Dictionary of custom headers to send, such as User-Agent,
If-None-Match, etc. If None, pool headers are used. If provided,
these headers completely replace any pool-specific headers.
:param retries:
Configure the number of retries to allow before raising a
:class:`~urllib3.exceptions.MaxRetryError` exception.
Pass ``None`` to retry until you receive a response. Pass a
:class:`~urllib3.util.retry.Retry` object for fine-grained control
over different types of retries.
Pass an integer number to retry connection errors that many times,
but no other types of errors. Pass zero to never retry.
If ``False``, then retries are disabled and any exception is raised
immediately. Also, instead of raising a MaxRetryError on redirects,
the redirect response will be returned.
:type retries: :class:`~urllib3.util.retry.Retry`, False, or an int.
:param timeout:
If specified, overrides the default timeout for this one
request. It may be a float (in seconds) or an instance of
:class:`urllib3.util.Timeout`.
:param chunked:
If True, urllib3 will send the body using chunked transfer
encoding. Otherwise, urllib3 will send the body using the standard
content-length form. Defaults to False.
:param response_conn:
Set this to ``None`` if you will handle releasing the connection or
set the connection to have the response release it.
:param preload_content:
If True, the response's body will be preloaded during construction.
:param decode_content:
If True, will attempt to decode the body based on the
'content-encoding' header.
:param enforce_content_length:
Enforce content length checking. Body returned by server must match
value of Content-Length header, if present. Otherwise, raise error.
"""
self.num_requests += 1
timeout_obj = self._get_timeout(timeout)
timeout_obj.start_connect()
conn.timeout = Timeout.resolve_default_timeout(timeout_obj.connect_timeout)
try:
# Trigger any extra validation we need to do.
try:
self._validate_conn(conn)
lib\site-packages\urllib3\connectionpool.py:467:
lib\site-packages\urllib3\connectionpool.py:1099: in _validate_conn
conn.connect()
lib\site-packages\urllib3\connection.py:653: in connect
sock_and_verified = _ssl_wrap_socket_and_match_hostname(
lib\site-packages\urllib3\connection.py:806: in _ssl_wrap_socket_and_match_hostname
ssl_sock = ssl_wrapsocket(
lib\site-packages\urllib3\util\ssl.py:465: in ssl_wrap_socket
ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, serverhostname)
lib\site-packages\urllib3\util\ssl.py:509: in _ssl_wrap_socket_impl
return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
C:\Python310\lib\ssl.py:513: in wrap_socket
return self.sslsocket_class._create(
C:\Python310\lib\ssl.py:1071: in _create
self.do_handshake()
def urlopen( # type: ignore[override]
self,
method: str,
url: str,
body: _TYPE_BODY | None = None,
headers: typing.Mapping[str, str] | None = None,
retries: Retry | bool | int | None = None,
redirect: bool = True,
assert_same_host: bool = True,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
pool_timeout: int | None = None,
release_conn: bool | None = None,
chunked: bool = False,
body_pos: _TYPE_BODY_POSITION | None = None,
preload_content: bool = True,
decode_content: bool = True,
**response_kw: typing.Any,
) -> BaseHTTPResponse:
"""
Get a connection from the pool and perform an HTTP request. This is the
lowest level call for making a request, so you'll need to specify all
the raw details.
.. note::
More commonly, it's appropriate to use a convenience method
such as :meth:`request`.
.. note::
`release_conn` will only behave as expected if
`preload_content=False` because we want to make
`preload_content=False` the default behaviour someday soon without
breaking backwards compatibility.
:param method:
HTTP request method (such as GET, POST, PUT, etc.)
:param url:
The URL to perform the request on.
:param body:
Data to send in the request body, either :class:`str`, :class:`bytes`,
an iterable of :class:`str`/:class:`bytes`, or a file-like object.
:param headers:
Dictionary of custom headers to send, such as User-Agent,
If-None-Match, etc. If None, pool headers are used. If provided,
these headers completely replace any pool-specific headers.
:param retries:
Configure the number of retries to allow before raising a
:class:`~urllib3.exceptions.MaxRetryError` exception.
If ``None`` (default) will retry 3 times, see ``Retry.DEFAULT``. Pass a
:class:`~urllib3.util.retry.Retry` object for fine-grained control
over different types of retries.
Pass an integer number to retry connection errors that many times,
but no other types of errors. Pass zero to never retry.
If ``False``, then retries are disabled and any exception is raised
immediately. Also, instead of raising a MaxRetryError on redirects,
the redirect response will be returned.
:type retries: :class:`~urllib3.util.retry.Retry`, False, or an int.
:param redirect:
If True, automatically handle redirects (status codes 301, 302,
303, 307, 308). Each redirect counts as a retry. Disabling retries
will disable redirect, too.
:param assert_same_host:
If ``True``, will make sure that the host of the pool requests is
consistent else will raise HostChangedError. When ``False``, you can
use the pool on an HTTP proxy and request foreign hosts.
:param timeout:
If specified, overrides the default timeout for this one
request. It may be a float (in seconds) or an instance of
:class:`urllib3.util.Timeout`.
:param pool_timeout:
If set and the pool is set to block=True, then this method will
block for ``pool_timeout`` seconds and raise EmptyPoolError if no
connection is available within the time period.
:param bool preload_content:
If True, the response's body will be preloaded into memory.
:param bool decode_content:
If True, will attempt to decode the body based on the
'content-encoding' header.
:param release_conn:
If False, then the urlopen call will not release the connection
back into the pool once a response is received (but will release if
you read the entire contents of the response such as when
`preload_content=True`). This is useful if you're not preloading
the response's content immediately. You will need to call
``r.release_conn()`` on the response ``r`` to return the connection
back into the pool. If None, it takes the value of ``preload_content``
which defaults to ``True``.
:param bool chunked:
If True, urllib3 will send the body using chunked transfer
encoding. Otherwise, urllib3 will send the body using the standard
content-length form. Defaults to False.
:param int body_pos:
Position to seek to in file-like body in the event of a retry or
redirect. Typically this won't need to be set because urllib3 will
auto-populate the value when needed.
"""
parsed_url = parse_url(url)
destination_scheme = parsed_url.scheme
if headers is None:
headers = self.headers
if not isinstance(retries, Retry):
retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
if release_conn is None:
release_conn = preload_content
# Check host
if assert_same_host and not self.is_same_host(url):
raise HostChangedError(self, url, retries)
# Ensure that the URL we're connecting to is properly encoded
if url.startswith("/"):
url = to_str(_encode_target(url))
else:
url = to_str(parsed_url.url)
conn = None
# Track whether `conn` needs to be released before
# returning/raising/recursing. Update this variable if necessary, and
# leave `release_conn` constant throughout the function. That way, if
# the function recurses, the original value of `release_conn` will be
# passed down into the recursive call, and its value will be respected.
#
# See issue #651 [1] for details.
#
# [1] <https://github.com/urllib3/urllib3/issues/651>
release_this_conn = release_conn
http_tunnel_required = connection_requires_http_tunnel(
self.proxy, self.proxy_config, destination_scheme
)
# Merge the proxy headers. Only done when not using HTTP CONNECT. We
# have to copy the headers dict so we can safely change it without those
# changes being reflected in anyone else's copy.
if not http_tunnel_required:
headers = headers.copy() # type: ignore[attr-defined]
headers.update(self.proxy_headers) # type: ignore[union-attr]
# Must keep the exception bound to a separate variable or else Python 3
# complains about UnboundLocalError.
err = None
# Keep track of whether we cleanly exited the except block. This
# ensures we do proper cleanup in finally.
clean_exit = False
# Rewind body position, if needed. Record current position
# for future rewinds in the event of a redirect/retry.
body_pos = set_file_position(body, body_pos)
try:
# Request a connection from the queue.
timeout_obj = self._get_timeout(timeout)
conn = self._get_conn(timeout=pool_timeout)
conn.timeout = timeout_obj.connect_timeout # type: ignore[assignment]
# Is this a closed/new connection that requires CONNECT tunnelling?
if self.proxy is not None and http_tunnel_required and conn.is_closed:
try:
self._prepare_proxy(conn)
except (BaseSSLError, OSError, SocketTimeout) as e:
self._raise_timeout(
err=e, url=self.proxy.url, timeout_value=conn.timeout
)
raise
# If we're going to release the connection in ``finally:``, then
# the response doesn't need to know about the connection. Otherwise
# it will also try to release it and we'll have a double-release
# mess.
response_conn = conn if not release_conn else None
# Make the request on the HTTPConnection object
def _make_request(
self,
conn: BaseHTTPConnection,
method: str,
url: str,
body: _TYPE_BODY | None = None,
headers: typing.Mapping[str, str] | None = None,
retries: Retry | None = None,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
chunked: bool = False,
response_conn: BaseHTTPConnection | None = None,
preload_content: bool = True,
decode_content: bool = True,
enforce_content_length: bool = True,
) -> BaseHTTPResponse:
"""
Perform a request on a given urllib connection object taken from our
pool.
:param conn:
a connection from one of our connection pools
:param method:
HTTP request method (such as GET, POST, PUT, etc.)
:param url:
The URL to perform the request on.
:param body:
Data to send in the request body, either :class:`str`, :class:`bytes`,
an iterable of :class:`str`/:class:`bytes`, or a file-like object.
:param headers:
Dictionary of custom headers to send, such as User-Agent,
If-None-Match, etc. If None, pool headers are used. If provided,
these headers completely replace any pool-specific headers.
:param retries:
Configure the number of retries to allow before raising a
:class:`~urllib3.exceptions.MaxRetryError` exception.
Pass ``None`` to retry until you receive a response. Pass a
:class:`~urllib3.util.retry.Retry` object for fine-grained control
over different types of retries.
Pass an integer number to retry connection errors that many times,
but no other types of errors. Pass zero to never retry.
If ``False``, then retries are disabled and any exception is raised
immediately. Also, instead of raising a MaxRetryError on redirects,
the redirect response will be returned.
:type retries: :class:`~urllib3.util.retry.Retry`, False, or an int.
:param timeout:
If specified, overrides the default timeout for this one
request. It may be a float (in seconds) or an instance of
:class:`urllib3.util.Timeout`.
:param chunked:
If True, urllib3 will send the body using chunked transfer
encoding. Otherwise, urllib3 will send the body using the standard
content-length form. Defaults to False.
:param response_conn:
Set this to ``None`` if you will handle releasing the connection or
set the connection to have the response release it.
:param preload_content:
If True, the response's body will be preloaded during construction.
:param decode_content:
If True, will attempt to decode the body based on the
'content-encoding' header.
:param enforce_content_length:
Enforce content length checking. Body returned by server must match
value of Content-Length header, if present. Otherwise, raise error.
"""
self.num_requests += 1
timeout_obj = self._get_timeout(timeout)
timeout_obj.start_connect()
conn.timeout = Timeout.resolve_default_timeout(timeout_obj.connect_timeout)
try:
# Trigger any extra validation we need to do.
try:
self._validate_conn(conn)
except (SocketTimeout, BaseSSLError) as e:
self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
raise
# _validate_conn() starts the connection to an HTTPS proxy
# so we need to wrap errors with 'ProxyError' here too.
except (
OSError,
NewConnectionError,
TimeoutError,
BaseSSLError,
CertificateError,
SSLError,
) as e:
new_e: Exception = e
if isinstance(e, (BaseSSLError, CertificateError)):
new_e = SSLError(e)
# If the connection didn't successfully connect to it's proxy
# then there
if isinstance(
new_e, (OSError, NewConnectionError, TimeoutError, SSLError)
) and (conn and conn.proxy and not conn.has_connected_to_proxy):
new_e = _wrap_proxy_error(new_e, conn.proxy.scheme)
raise new_e
E urllib3.exceptions.SSLError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1007)
def send(
self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None
):
"""Sends PreparedRequest object. Returns Response object.
:param request: The :class:`PreparedRequest <PreparedRequest>` being sent.
:param stream: (optional) Whether to stream the request content.
:param timeout: (optional) How long to wait for the server to send
data before giving up, as a float, or a :ref:`(connect timeout,
read timeout) <timeouts>` tuple.
:type timeout: float or tuple or urllib3 Timeout object
:param verify: (optional) Either a boolean, in which case it controls whether
we verify the server's TLS certificate, or a string, in which case it
must be a path to a CA bundle to use
:param cert: (optional) Any user-provided SSL certificate to be trusted.
:param proxies: (optional) The proxies dictionary to apply to the request.
:rtype: requests.Response
"""
try:
conn = self.get_connection(request.url, proxies)
except LocationValueError as e:
raise InvalidURL(e, request=request)
self.cert_verify(conn, request.url, verify, cert)
url = self.request_url(request, proxies)
self.add_headers(
request,
stream=stream,
timeout=timeout,
verify=verify,
cert=cert,
proxies=proxies,
)
chunked = not (request.body is None or "Content-Length" in request.headers)
if isinstance(timeout, tuple):
try:
connect, read = timeout
timeout = TimeoutSauce(connect=connect, read=read)
except ValueError:
raise ValueError(
f"Invalid timeout {timeout}. Pass a (connect, read) timeout tuple, "
f"or a single float to set both timeouts to the same value."
)
elif isinstance(timeout, TimeoutSauce):
pass
else:
timeout = TimeoutSauce(connect=timeout, read=timeout)
try:
lib\site-packages\urllib3\connectionpool.py:847: in urlopen
retries = retries.increment(
self = Retry(total=0, connect=None, read=False, redirect=None, status=None), method = 'POST', url = '/general/v0/general', response = None
error = SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1007)'))
_pool = <urllib3.connectionpool.HTTPSConnectionPool object at 0x0000022254AC6C50>, _stacktrace = <traceback object at 0x0000022254AF4FC0>
def increment(
self,
method: str | None = None,
url: str | None = None,
response: BaseHTTPResponse | None = None,
error: Exception | None = None,
_pool: ConnectionPool | None = None,
_stacktrace: TracebackType | None = None,
) -> Retry:
"""Return a new Retry object with incremented retry counters.
:param response: A response object, or None, if the server did not
return a response.
:type response: :class:`~urllib3.response.BaseHTTPResponse`
:param Exception error: An error encountered during the request, or
None if the response was received successfully.
:return: A new ``Retry`` object.
"""
if self.total is False and error:
# Disabled, indicate to re-raise the error.
raise reraise(type(error), error, _stacktrace)
total = self.total
if total is not None:
total -= 1
connect = self.connect
read = self.read
redirect = self.redirect
status_count = self.status
other = self.other
cause = "unknown"
status = None
redirect_location = None
if error and self._is_connection_error(error):
# Connect retry?
if connect is False:
raise reraise(type(error), error, _stacktrace)
elif connect is not None:
connect -= 1
elif error and self._is_read_error(error):
# Read retry?
if read is False or method is None or not self._is_method_retryable(method):
raise reraise(type(error), error, _stacktrace)
elif read is not None:
read -= 1
elif error:
# Other retry?
if other is not None:
other -= 1
elif response and response.get_redirect_location():
# Redirect retry?
if redirect is not None:
redirect -= 1
cause = "too many redirects"
response_redirect_location = response.get_redirect_location()
if response_redirect_location:
redirect_location = response_redirect_location
status = response.status
else:
# Incrementing because of a server error like a 500 in
# status_forcelist and the given method is in the allowed_methods
cause = ResponseError.GENERIC_ERROR
if response and response.status:
if status_count is not None:
status_count -= 1
cause = ResponseError.SPECIFIC_ERROR.format(status_code=response.status)
status = response.status
history = self.history + (
RequestHistory(method, url, error, status, redirect_location),
)
new_retry = self.new(
total=total,
connect=connect,
read=read,
redirect=redirect,
status=status_count,
other=other,
history=history,
)
if new_retry.is_exhausted():
reason = error or ResponseError(cause)
raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
E urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='dev-discover.private.net', port=443): Max retries exceeded with url: /general/v0/general (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1007)')))
lib\site-packages\unstructured_client\general.py:59: in do_request
raise e
lib\site-packages\unstructured_client\general.py:56: in do_request
http_res = client.send(req)
lib\site-packages\requests\sessions.py:703: in send
r = adapter.send(request, **kwargs)
def send(
self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None
):
"""Sends PreparedRequest object. Returns Response object.
:param request: The :class:`PreparedRequest <PreparedRequest>` being sent.
:param stream: (optional) Whether to stream the request content.
:param timeout: (optional) How long to wait for the server to send
data before giving up, as a float, or a :ref:`(connect timeout,
read timeout) <timeouts>` tuple.
:type timeout: float or tuple or urllib3 Timeout object
:param verify: (optional) Either a boolean, in which case it controls whether
we verify the server's TLS certificate, or a string, in which case it
must be a path to a CA bundle to use
:param cert: (optional) Any user-provided SSL certificate to be trusted.
:param proxies: (optional) The proxies dictionary to apply to the request.
:rtype: requests.Response
"""
try:
conn = self.get_connection(request.url, proxies)
except LocationValueError as e:
raise InvalidURL(e, request=request)
self.cert_verify(conn, request.url, verify, cert)
url = self.request_url(request, proxies)
self.add_headers(
request,
stream=stream,
timeout=timeout,
verify=verify,
cert=cert,
proxies=proxies,
)
chunked = not (request.body is None or "Content-Length" in request.headers)
if isinstance(timeout, tuple):
try:
connect, read = timeout
timeout = TimeoutSauce(connect=connect, read=read)
except ValueError:
raise ValueError(
f"Invalid timeout {timeout}. Pass a (connect, read) timeout tuple, "
f"or a single float to set both timeouts to the same value."
)
elif isinstance(timeout, TimeoutSauce):
pass
else:
timeout = TimeoutSauce(connect=timeout, read=timeout)
try:
resp = conn.urlopen(
method=request.method,
url=url,
body=request.body,
headers=request.headers,
redirect=False,
assert_same_host=False,
preload_content=False,
decode_content=False,
retries=self.max_retries,
timeout=timeout,
chunked=chunked,
)
except (ProtocolError, OSError) as err:
raise ConnectionError(err, request=request)
except MaxRetryError as e:
if isinstance(e.reason, ConnectTimeoutError):
# TODO: Remove this in 3.0.0: see #2811
if not isinstance(e.reason, NewConnectionError):
raise ConnectTimeout(e, request=request)
if isinstance(e.reason, ResponseError):
raise RetryError(e, request=request)
if isinstance(e.reason, _ProxyError):
raise ProxyError(e, request=request)
if isinstance(e.reason, _SSLError):
# This branch is for urllib3 v1.22 and later.
raise SSLError(e, request=request)
E requests.exceptions.SSLError: HTTPSConnectionPool(host='dev-discover.private.net', port=443): Max retries exceeded with url: /general/v0/general (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1007)')))
During handling of the above exception, another exception occurred:
def test_file_conversation_langchain():
"""Test file conversion using the lang chain wrapper
"""
# seems to fail SSL
health_check_url = UNSTRUCTURED_API_URL.replace("general/v0/general", "healthcheck")
check = requests.get(health_check_url, verify=False)
print(check)
loader = UnstructuredAPIFileLoader(api_key=UNSTRUCTURED_API_KEY, url=UNSTRUCTURED_API_URL, file_path=FILE_NAME)
docs = loader.load()
tests\test_unstructured_api.py:53:
lib\site-packages\langchain_core\document_loaders\base.py:29: in load
return list(self.lazy_load())
lib\site-packages\langchain_community\document_loaders\unstructured.py:88: in lazy_load
elements = self._get_elements()
lib\site-packages\langchain_community\document_loaders\unstructured.py:277: in _get_elements
return get_elements_from_api(
lib\site-packages\langchain_community\document_loaders\unstructured.py:215: in get_elements_from_api
return partition_via_api(
lib\site-packages\unstructured\partition\api.py:103: in partition_via_api
response = sdk.general.partition(req)
lib\site-packages\unstructured_client\utils_human_utils.py:86: in wrapper
return func(*args, *kwargs)
lib\site-packages\unstructured_client\utils_human_split_pdf.py:40: in wrapper
return func(args, **kwargs)
lib\site-packages\unstructured_client\general.py:73: in partition
http_res = utils.retry(do_request, utils.Retries(retry_config, [
lib\site-packages\unstructured_client\utils\retries.py:95: in retry
return retry_with_backoff(do_request, retries.config.backoff.initial_interval, retries.config.backoff.max_interval, retries.config.backoff.exponent, retries.config.backoff.max_elapsed_time)
lib\site-packages\unstructured_client\utils\retries.py:106: in retry_with_backoff
return func()
def do_request():
res: requests.Response
try:
res = func()
for code in retries.status_codes:
if "X" in code.upper():
code_range = int(code[0])
status_major = res.status_code / 100
if status_major >= code_range and status_major < code_range + 1:
raise TemporaryError(res)
else:
parsed_code = int(code)
if res.status_code == parsed_code:
raise TemporaryError(res)
except requests.exceptions.ConnectionError as exception:
if retries.config.config.retry_connection_errors:
E AttributeError: 'RetryConfig' object has no attribute 'config'
I'm trying to use Langchain to turn the incoming JSON from the UnstructuredAPI that I have hosted, into langhcin documents. I can reach the API, but getting it into Langchain format is proving difficult due to SSL certs. Adding a verify=False for SSL certs would be fantastic
Checked other resources
Example Code
This following code works to reach out to my self-hosted unstructured API and turns a file in Unsturctured Json:
Notice that I have to put verify = False because the site is hosted on a private website with a self signed certificate
however there is no option to do that with Langchain Document loader
This code will have until the Loader times out. The Unstructured Loader can't deal with the SSL certificate Error.
Error Message and Stack Trace (if applicable)
self = <urllib3.connectionpool.HTTPSConnectionPool object at 0x0000022254AC6C50>, conn = <urllib3.connection.HTTPSConnection object at 0x0000022254AC7700>, method = 'POST' url = '/general/v0/general' body = b'--68d58396f5b31e8cd9878edbc5b4fe91\r\nContent-Disposition: form-data; name="files"; filename="C:/Users/223075449.HCA...\x06\x00\x00\x00\x00\x17\x00\x17\x00\x12\x06\x00\x00\xdf\x96\t\x00\x00\x00\r\n--68d58396f5b31e8cd9878edbc5b4fe91--\r\n' headers = {'unstructured-api-key': 'MY_API_KEY', 'Accept': 'application/json', 'user-agent': 'speakeasy-sdk/p...-client', 'Content-Length': '630224', 'Content-Type': 'multipart/form-data; boundary=68d58396f5b31e8cd9878edbc5b4fe91'} retries = Retry(total=0, connect=None, read=False, redirect=None, status=None), timeout = Timeout(connect=None, read=None, total=None), chunked = False response_conn = <urllib3.connection.HTTPSConnection object at 0x0000022254AC7700>, preload_content = False, decode_content = False, enforce_content_length = True
lib\site-packages\urllib3\connectionpool.py:467:
lib\site-packages\urllib3\connectionpool.py:1099: in _validate_conn conn.connect() lib\site-packages\urllib3\connection.py:653: in connect sock_and_verified = _ssl_wrap_socket_and_match_hostname( lib\site-packages\urllib3\connection.py:806: in _ssl_wrap_socket_and_match_hostname ssl_sock = ssl_wrapsocket( lib\site-packages\urllib3\util\ssl.py:465: in ssl_wrap_socket ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, serverhostname) lib\site-packages\urllib3\util\ssl.py:509: in _ssl_wrap_socket_impl return ssl_context.wrap_socket(sock, server_hostname=server_hostname) C:\Python310\lib\ssl.py:513: in wrap_socket return self.sslsocket_class._create( C:\Python310\lib\ssl.py:1071: in _create self.do_handshake()
self = <ssl.SSLSocket [closed] fd=-1, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=0>, block = False
C:\Python310\lib\ssl.py:1342: SSLCertVerificationError
During handling of the above exception, another exception occurred:
self = <urllib3.connectionpool.HTTPSConnectionPool object at 0x0000022254AC6C50>, method = 'POST', url = '/general/v0/general' body = b'--68d58396f5b31e8cd9878edbc5b4fe91\r\nContent-Disposition: form-data; name="files"; filename="C:/Users/223075449.HCA...\x06\x00\x00\x00\x00\x17\x00\x17\x00\x12\x06\x00\x00\xdf\x96\t\x00\x00\x00\r\n--68d58396f5b31e8cd9878edbc5b4fe91--\r\n' headers = {'unstructured-api-key': 'MY_API_KEY', 'Accept': 'application/json', 'user-agent': 'speakeasy-sdk/p...-client', 'Content-Length': '630224', 'Content-Type': 'multipart/form-data; boundary=68d58396f5b31e8cd9878edbc5b4fe91'} retries = Retry(total=0, connect=None, read=False, redirect=None, status=None), redirect = False, assert_same_host = False, timeout = Timeout(connect=None, read=None, total=None) pool_timeout = None, release_conn = False, chunked = False, body_pos = None, preload_content = False, decode_content = False, response_kw = {} parsed_url = Url(scheme=None, auth=None, host=None, port=None, path='/general/v0/general', query=None, fragment=None), destination_scheme = None, conn = None release_this_conn = True, http_tunnel_required = False, err = None, clean_exit = False
lib\site-packages\urllib3\connectionpool.py:793:
self = <urllib3.connectionpool.HTTPSConnectionPool object at 0x0000022254AC6C50>, conn = <urllib3.connection.HTTPSConnection object at 0x0000022254AC7700>, method = 'POST'
url = '/general/v0/general' body = b'--68d58396f5b31e8cd9878edbc5b4fe91\r\nContent-Disposition: form-data; name="files"; filename="C:/Users/223075449.HCA...\x06\x00\x00\x00\x00\x17\x00\x17\x00\x12\x06\x00\x00\xdf\x96\t\x00\x00\x00\r\n--68d58396f5b31e8cd9878edbc5b4fe91--\r\n' headers = {'unstructured-api-key': 'MY_API_KEY', 'Accept': 'application/json', 'user-agent': 'speakeasy-sdk/p...-client', 'Content-Length': '630224', 'Content-Type': 'multipart/form-data; boundary=68d58396f5b31e8cd9878edbc5b4fe91'} retries = Retry(total=0, connect=None, read=False, redirect=None, status=None), timeout = Timeout(connect=None, read=None, total=None), chunked = False response_conn = <urllib3.connection.HTTPSConnection object at 0x0000022254AC7700>, preload_content = False, decode_content = False, enforce_content_length = True
lib\site-packages\urllib3\connectionpool.py:491: SSLError
The above exception was the direct cause of the following exception:
self = <requests.adapters.HTTPAdapter object at 0x0000022254AC62F0>, request = <PreparedRequest [POST]>, stream = False, timeout = Timeout(connect=None, read=None, total=None)
verify = True, cert = None, proxies = {}
lib\site-packages\requests\adapters.py:486:
lib\site-packages\urllib3\connectionpool.py:847: in urlopen retries = retries.increment(
self = Retry(total=0, connect=None, read=False, redirect=None, status=None), method = 'POST', url = '/general/v0/general', response = None error = SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1007)')) _pool = <urllib3.connectionpool.HTTPSConnectionPool object at 0x0000022254AC6C50>, _stacktrace = <traceback object at 0x0000022254AF4FC0>
lib\site-packages\urllib3\util\retry.py:515: MaxRetryError
During handling of the above exception, another exception occurred:
lib\site-packages\unstructured_client\utils\retries.py:63:
lib\site-packages\unstructured_client\general.py:59: in do_request raise e lib\site-packages\unstructured_client\general.py:56: in do_request http_res = client.send(req) lib\site-packages\requests\sessions.py:703: in send r = adapter.send(request, **kwargs)
self = <requests.adapters.HTTPAdapter object at 0x0000022254AC62F0>, request = <PreparedRequest [POST]>, stream = False, timeout = Timeout(connect=None, read=None, total=None)
verify = True, cert = None, proxies = {}
lib\site-packages\requests\adapters.py:517: SSLError
During handling of the above exception, another exception occurred:
tests\test_unstructured_api.py:53:
lib\site-packages\langchain_core\document_loaders\base.py:29: in load return list(self.lazy_load()) lib\site-packages\langchain_community\document_loaders\unstructured.py:88: in lazy_load elements = self._get_elements() lib\site-packages\langchain_community\document_loaders\unstructured.py:277: in _get_elements return get_elements_from_api( lib\site-packages\langchain_community\document_loaders\unstructured.py:215: in get_elements_from_api return partition_via_api( lib\site-packages\unstructured\partition\api.py:103: in partition_via_api response = sdk.general.partition(req) lib\site-packages\unstructured_client\utils_human_utils.py:86: in wrapper return func(*args, *kwargs) lib\site-packages\unstructured_client\utils_human_split_pdf.py:40: in wrapper return func(args, **kwargs) lib\site-packages\unstructured_client\general.py:73: in partition http_res = utils.retry(do_request, utils.Retries(retry_config, [ lib\site-packages\unstructured_client\utils\retries.py:95: in retry return retry_with_backoff(do_request, retries.config.backoff.initial_interval, retries.config.backoff.max_interval, retries.config.backoff.exponent, retries.config.backoff.max_elapsed_time) lib\site-packages\unstructured_client\utils\retries.py:106: in retry_with_backoff return func()
lib\site-packages\unstructured_client\utils\retries.py:79: AttributeError
Description
I'm trying to use Langchain to turn the incoming JSON from the UnstructuredAPI that I have hosted, into langhcin documents. I can reach the API, but getting it into Langchain format is proving difficult due to SSL certs. Adding a verify=False for SSL certs would be fantastic
System Info
Windows Version: 0.1.17 Langchain
Python 3.10.11