This returns the following error
`(std) ➜ stateside git:(main) ✗ python3 tika_code.py
2024-09-25 11:46:12,166 [MainThread ] [INFO ] Retrieving https://drive.google.com/u/0/uc?id=1csLAKZ5G0UMlswlK3P0orJtcqY2yBxFN&export=download to /tmp/u-0-uc.
2024-09-25 11:46:15,933 [MainThread ] [INFO ] Retrieving http://search.maven.org/remotecontent?filepath=org/apache/tika/tika-server-standard/2.6.0/tika-server-standard-2.6.0.jar.md5 to /tmp/tika-server.jar.md5.
Traceback (most recent call last):
File "/home/kyojinprat/stateside/std/lib/python3.10/site-packages/tika/tika.py", line 804, in getRemoteJar
urlretrieve(urlOrPath, destPath)
File "/usr/lib/python3.10/urllib/request.py", line 241, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "/usr/lib/python3.10/urllib/request.py", line 216, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python3.10/urllib/request.py", line 525, in open
response = meth(req, response)
File "/usr/lib/python3.10/urllib/request.py", line 634, in http_response
response = self.parent.error(
File "/usr/lib/python3.10/urllib/request.py", line 563, in error
return self._call_chain(args)
File "/usr/lib/python3.10/urllib/request.py", line 496, in _call_chain
result = func(args)
File "/usr/lib/python3.10/urllib/request.py", line 643, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/kyojinprat/stateside/tika_code.py", line 7, in
data = parser.from_file(file_path, xmlContent=True)
File "/home/kyojinprat/stateside/std/lib/python3.10/site-packages/tika/parser.py", line 42, in from_file
output = parse1(service, filename, serverEndpoint, services={'meta': '/meta', 'text': '/tika', 'all': '/rmeta/xml'},
File "/home/kyojinprat/stateside/std/lib/python3.10/site-packages/tika/tika.py", line 337, in parse1
status, response = callServer('put', serverEndpoint, service, f,
File "/home/kyojinprat/stateside/std/lib/python3.10/site-packages/tika/tika.py", line 532, in callServer
serverEndpoint = checkTikaServer(scheme, serverHost, port, tikaServerJar, classpath, config_path)
File "/home/kyojinprat/stateside/std/lib/python3.10/site-packages/tika/tika.py", line 595, in checkTikaServer
if not checkJarSig(tikaServerJar, jarPath):
File "/home/kyojinprat/stateside/std/lib/python3.10/site-packages/tika/tika.py", line 613, in checkJarSig
getRemoteJar(tikaServerJar + ".md5", jarPath + ".md5")
File "/home/kyojinprat/stateside/std/lib/python3.10/site-packages/tika/tika.py", line 814, in getRemoteJar
urlretrieve(urlOrPath, destPath)
File "/usr/lib/python3.10/urllib/request.py", line 241, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "/usr/lib/python3.10/urllib/request.py", line 216, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python3.10/urllib/request.py", line 525, in open
response = meth(req, response)
File "/usr/lib/python3.10/urllib/request.py", line 634, in http_response
response = self.parent.error(
File "/usr/lib/python3.10/urllib/request.py", line 563, in error
return self._call_chain(args)
File "/usr/lib/python3.10/urllib/request.py", line 496, in _call_chain
result = func(args)
File "/usr/lib/python3.10/urllib/request.py", line 643, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden`
No matter what I do it always gives the same error. I have even tried hosting the server locally using java by running the jar file but it still doesn't work
I am trying to run the following code:
`import os import tika from tika import parser
tika.initVM() file_path = "https://drive.google.com/u/0/uc?id=1csLAKZ5G0UMlswlK3P0orJtcqY2yBxFN&export=download" os.environ['TIKA_SERVER_JAR'] = 'https://repo1.maven.org/maven2/org/apache/tika/tika-server/1.19/tika-server-1.19.jar' data = parser.from_file(file_path, xmlContent=True) print(data) `
This returns the following error `(std) ➜ stateside git:(main) ✗ python3 tika_code.py 2024-09-25 11:46:12,166 [MainThread ] [INFO ] Retrieving https://drive.google.com/u/0/uc?id=1csLAKZ5G0UMlswlK3P0orJtcqY2yBxFN&export=download to /tmp/u-0-uc. 2024-09-25 11:46:15,933 [MainThread ] [INFO ] Retrieving http://search.maven.org/remotecontent?filepath=org/apache/tika/tika-server-standard/2.6.0/tika-server-standard-2.6.0.jar.md5 to /tmp/tika-server.jar.md5. Traceback (most recent call last): File "/home/kyojinprat/stateside/std/lib/python3.10/site-packages/tika/tika.py", line 804, in getRemoteJar urlretrieve(urlOrPath, destPath) File "/usr/lib/python3.10/urllib/request.py", line 241, in urlretrieve with contextlib.closing(urlopen(url, data)) as fp: File "/usr/lib/python3.10/urllib/request.py", line 216, in urlopen return opener.open(url, data, timeout) File "/usr/lib/python3.10/urllib/request.py", line 525, in open response = meth(req, response) File "/usr/lib/python3.10/urllib/request.py", line 634, in http_response response = self.parent.error( File "/usr/lib/python3.10/urllib/request.py", line 563, in error return self._call_chain(args) File "/usr/lib/python3.10/urllib/request.py", line 496, in _call_chain result = func(args) File "/usr/lib/python3.10/urllib/request.py", line 643, in http_error_default raise HTTPError(req.full_url, code, msg, hdrs, fp) urllib.error.HTTPError: HTTP Error 403: Forbidden
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/home/kyojinprat/stateside/tika_code.py", line 7, in
data = parser.from_file(file_path, xmlContent=True)
File "/home/kyojinprat/stateside/std/lib/python3.10/site-packages/tika/parser.py", line 42, in from_file
output = parse1(service, filename, serverEndpoint, services={'meta': '/meta', 'text': '/tika', 'all': '/rmeta/xml'},
File "/home/kyojinprat/stateside/std/lib/python3.10/site-packages/tika/tika.py", line 337, in parse1
status, response = callServer('put', serverEndpoint, service, f,
File "/home/kyojinprat/stateside/std/lib/python3.10/site-packages/tika/tika.py", line 532, in callServer
serverEndpoint = checkTikaServer(scheme, serverHost, port, tikaServerJar, classpath, config_path)
File "/home/kyojinprat/stateside/std/lib/python3.10/site-packages/tika/tika.py", line 595, in checkTikaServer
if not checkJarSig(tikaServerJar, jarPath):
File "/home/kyojinprat/stateside/std/lib/python3.10/site-packages/tika/tika.py", line 613, in checkJarSig
getRemoteJar(tikaServerJar + ".md5", jarPath + ".md5")
File "/home/kyojinprat/stateside/std/lib/python3.10/site-packages/tika/tika.py", line 814, in getRemoteJar
urlretrieve(urlOrPath, destPath)
File "/usr/lib/python3.10/urllib/request.py", line 241, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "/usr/lib/python3.10/urllib/request.py", line 216, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python3.10/urllib/request.py", line 525, in open
response = meth(req, response)
File "/usr/lib/python3.10/urllib/request.py", line 634, in http_response
response = self.parent.error(
File "/usr/lib/python3.10/urllib/request.py", line 563, in error
return self._call_chain(args)
File "/usr/lib/python3.10/urllib/request.py", line 496, in _call_chain
result = func(args)
File "/usr/lib/python3.10/urllib/request.py", line 643, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden`
No matter what I do it always gives the same error. I have even tried hosting the server locally using java by running the jar file but it still doesn't work