sgsinclair / VoyantServer

GNU General Public License v3.0
98 stars 33 forks source link

Error when Uploading file #18

Closed yshussain closed 7 years ago

yshussain commented 7 years ago

When I upload a file running on a VM, I'm receiving an error:

2017-03-20 10:09:29.515:WARN:/:qtp1044036744-12: trombone: ERROR: An error occurred during multi-threaded document expansion.

java.lang.IllegalStateException: An error occurred during multi-threaded document expansion.

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor.getExtractedStoredDocumentSources(StoredDocumentSourceExtractor.java:99)

    at org.voyanttools.trombone.tool.build.DocumentExtractor.run(DocumentExtractor.java:89)

    at org.voyanttools.trombone.tool.build.RealCorpusCreator.run(RealCorpusCreator.java:93)

    at org.voyanttools.trombone.tool.corpus.CorpusCreator.run(CorpusCreator.java:49)

    at org.voyanttools.trombone.tool.utils.ToolRunner.run(ToolRunner.java:134)

    at org.voyanttools.trombone.Controller.run(Controller.java:110)

    at org.voyanttools.voyant.Trombone.runTromboneController(Trombone.java:274)

    at org.voyanttools.voyant.Trombone.doRequest(Trombone.java:249)

    at org.voyanttools.voyant.Trombone.doRequest(Trombone.java:130)

    at org.voyanttools.voyant.Trombone.doPost(Trombone.java:71)

    at javax.servlet.http.HttpServlet.service(HttpServlet.java:707)

    at javax.servlet.http.HttpServlet.service(HttpServlet.java:790)

    at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:835)

    at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:583)

    at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)

    at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)

    at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226)

    at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1158)

    at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:511)

    at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185)

    at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1090)

    at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)

    at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:119)

    at org.eclipse.jetty.server.Server.handle(Server.java:517)

    at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:308)

    at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:242)

    at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273)

    at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95)

    at org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:75)

    at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceAndRun(ExecuteProduceConsume.java:213)

    at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.run(ExecuteProduceConsume.java:147)

    at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:654)

    at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:572)

    at java.lang.Thread.run(Thread.java:745)

Caused by: 

java.util.concurrent.ExecutionException: java.lang.NoSuchMethodError: org.apache.fontbox.afm.AFMParser.parse(Z)Lorg/apache/fontbox/afm/FontMetrics;

    at java.util.concurrent.FutureTask.report(FutureTask.java:122)

    at java.util.concurrent.FutureTask.get(FutureTask.java:192)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor.getExtractedStoredDocumentSources(StoredDocumentSourceExtractor.java:94)

    at org.voyanttools.trombone.tool.build.DocumentExtractor.run(DocumentExtractor.java:89)

    at org.voyanttools.trombone.tool.build.RealCorpusCreator.run(RealCorpusCreator.java:93)

    at org.voyanttools.trombone.tool.corpus.CorpusCreator.run(CorpusCreator.java:49)

    at org.voyanttools.trombone.tool.utils.ToolRunner.run(ToolRunner.java:134)

    at org.voyanttools.trombone.Controller.run(Controller.java:110)

    at org.voyanttools.voyant.Trombone.runTromboneController(Trombone.java:274)

    at org.voyanttools.voyant.Trombone.doRequest(Trombone.java:249)

    at org.voyanttools.voyant.Trombone.doRequest(Trombone.java:130)

    at org.voyanttools.voyant.Trombone.doPost(Trombone.java:71)

    at javax.servlet.http.HttpServlet.service(HttpServlet.java:707)

    at javax.servlet.http.HttpServlet.service(HttpServlet.java:790)

    at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:835)

    at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:583)

    at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)

    at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)

    at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226)

    at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1158)

    at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:511)

    at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185)

    at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1090)

    at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)

    at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:119)

    at org.eclipse.jetty.server.Server.handle(Server.java:517)

    at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:308)

    at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:242)

    at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273)

    at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95)

    at org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:75)

    at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceAndRun(ExecuteProduceConsume.java:213)

    at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.run(ExecuteProduceConsume.java:147)

    at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:654)

    at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:572)

    at java.lang.Thread.run(Thread.java:745)

Caused by: 

java.lang.NoSuchMethodError: org.apache.fontbox.afm.AFMParser.parse(Z)Lorg/apache/fontbox/afm/FontMetrics;

    at org.apache.pdfbox.pdmodel.font.Standard14Fonts.addAFM(Standard14Fonts.java:118)

    at org.apache.pdfbox.pdmodel.font.Standard14Fonts.addAFM(Standard14Fonts.java:97)

    at org.apache.pdfbox.pdmodel.font.Standard14Fonts.<clinit>(Standard14Fonts.java:50)

    at org.apache.pdfbox.pdmodel.font.PDFont.<init>(PDFont.java:100)

    at org.apache.pdfbox.pdmodel.font.PDSimpleFont.<init>(PDSimpleFont.java:87)

    at org.apache.pdfbox.pdmodel.font.PDTrueTypeFont.<init>(PDTrueTypeFont.java:164)

    at org.apache.pdfbox.pdmodel.font.PDFontFactory.createFont(PDFontFactory.java:75)

    at org.apache.pdfbox.pdmodel.PDResources.getFont(PDResources.java:123)

    at org.apache.pdfbox.contentstream.operator.text.SetFontAndSize.process(SetFontAndSize.java:60)

    at org.apache.pdfbox.contentstream.PDFStreamEngine.processOperator(PDFStreamEngine.java:815)

    at org.apache.pdfbox.contentstream.PDFStreamEngine.processStreamOperators(PDFStreamEngine.java:472)

    at org.apache.pdfbox.contentstream.PDFStreamEngine.processStream(PDFStreamEngine.java:446)

    at org.apache.pdfbox.contentstream.PDFStreamEngine.processPage(PDFStreamEngine.java:149)

    at org.apache.pdfbox.text.PDFTextStreamEngine.processPage(PDFTextStreamEngine.java:136)

    at org.apache.pdfbox.text.PDFTextStripper.processPage(PDFTextStripper.java:391)

    at org.apache.tika.parser.pdf.PDF2XHTML.processPage(PDF2XHTML.java:214)

    at org.apache.pdfbox.text.PDFTextStripper.processPages(PDFTextStripper.java:319)

    at org.apache.pdfbox.text.PDFTextStripper.writeText(PDFTextStripper.java:266)

    at org.apache.tika.parser.pdf.PDF2XHTML.process(PDF2XHTML.java:160)

    at org.apache.tika.parser.pdf.PDFParser.parse(PDFParser.java:144)

    at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)

    at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)

    at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)

    at org.voyanttools.trombone.input.extract.TikaExtractor$ExtractableTikaInputSource.getInputStream(TikaExtractor.java:138)

    at org.voyanttools.trombone.storage.file.FileStoredDocumentSourceStorage.getStoredDocumentSource(FileStoredDocumentSourceStorage.java:115)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor.getExtractedStoredDocumentSource(StoredDocumentSourceExtractor.java:145)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor$CallableExtractor.call(StoredDocumentSourceExtractor.java:166)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor$CallableExtractor.call(StoredDocumentSourceExtractor.java:1)

    at java.util.concurrent.FutureTask.run(FutureTask.java:266)

    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)

    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)

    at java.lang.Thread.run(Thread.java:745)

2017-03-20 10:09:29.518:INFO:/:qtp1044036744-12: trombone: Parameters:|?VOYANT_BUILD: |?accessIP: 127.0.0.1|?upload: /home/ucsbcollab/Applications/VoyantServer/data/tmp.voyant.uploads/tmp.voyant.uploads352084943606655…|?VOYANT_VERSION: 2.2|?palette: default|?tool: corpus.CorpusCreator|?textarea-1014-inputEl: Type in one or more URLs on separate lines or paste in a full text.|

I'm running this on a Ubuntu 16.04.1 VM running OpenJDK 1.8.0_121.

sgsinclair commented 7 years ago

What kind of file are you trying to upload? Can you attach it here? (Or something as small as possible that still fails?)

yshussain commented 7 years ago

hamletfulltext.pdf It's happened on every PDF I've tried. Here's an example.

sgsinclair commented 7 years ago

Hmm, no problem locally capture d ecran 2017-03-20 a 13 39 15

It seems to load fine on the server as well: http://voyant-tools.org/?corpus=8ac375765d6451706e6e6022c65a3275

I notice that there are a couple of versions of some of the libs (like pdfbox-1.8.10 and pdfbox-2.0.1). Could you please try cleaning out some of the older libs, restarting your server, and seeing if that helps?

yshussain commented 7 years ago

Sorry for the slow response. I've deleted the pdfbox jars and receive the following error

*** Starting Voyant Server – Web page will open automatically when ready ***

Using Free AdminPort=34000

Server Starting

2017-03-23 08:12:19.012:INFO::main: Logging initialized @1116ms

Jetty Version: 9.3.z-SNAPSHOT

http://*:8888

Jetty starting up ... please wait

2017-03-23 08:12:19.896:INFO:oejs.Server:main: jetty-9.3.z-SNAPSHOT

2017-03-23 08:12:51.615:WARN:oeja.AnnotationConfiguration:main: ServletContainerInitializers: detected. Class hierarchy: empty

2017-03-23 08:12:51.991:WARN:oejs.SecurityHandler:main: ServletContext@o.e.j.w.WebAppContext@1b26f7b2{/,file:///home/ucsbcollab/Applications/VoyantServer/_app/,STARTING} has uncovered http methods for path: /

2017-03-23 08:12:52.088:INFO:oejsh.ContextHandler:main: Started o.e.j.w.WebAppContext@1b26f7b2{/,file:///home/ucsbcollab/Applications/VoyantServer/_app/,AVAILABLE}

2017-03-23 08:12:52.156:INFO:oejs.ServerConnector:main: Started ServerConnector@424a4f04{HTTP/1.1,[http/1.1]}{0.0.0.0:8888}

2017-03-23 08:12:52.158:INFO:oejs.Server:main: Started @34265ms

... Jetty has started.

Trombone FileStorage location: /home/ucsbcollab/Applications/VoyantServer/data/trombone5_2

Mar 23, 2017 8:13:14 AM org.apache.cxf.jaxrs.utils.JAXRSUtils logMessageHandlerProblem

SEVERE: No message body writer has been found for class org.apache.cxf.jaxrs.ext.multipart.MultipartBody, ContentType: multipart/form-data

Mar 23, 2017 8:13:14 AM org.apache.cxf.phase.PhaseInterceptorChain doDefaultLogging

WARNING: Interceptor for {http://localhost:8080/processHeaderDocument}WebClient has thrown exception, unwinding now

org.apache.cxf.interceptor.Fault: No message body writer has been found for class org.apache.cxf.jaxrs.ext.multipart.MultipartBody, ContentType: multipart/form-data

    at org.apache.cxf.jaxrs.client.WebClient$BodyWriter.doWriteBody(WebClient.java:1220)

    at org.apache.cxf.jaxrs.client.AbstractClient$AbstractBodyWriter.handleMessage(AbstractClient.java:1044)

    at org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:307)

    at org.apache.cxf.jaxrs.client.AbstractClient.doRunInterceptorChain(AbstractClient.java:623)

    at org.apache.cxf.jaxrs.client.WebClient.doChainedInvocation(WebClient.java:1084)

    at org.apache.cxf.jaxrs.client.WebClient.doInvoke(WebClient.java:883)

    at org.apache.cxf.jaxrs.client.WebClient.doInvoke(WebClient.java:854)

    at org.apache.cxf.jaxrs.client.WebClient.invoke(WebClient.java:320)

    at org.apache.cxf.jaxrs.client.WebClient.post(WebClient.java:329)

    at org.apache.tika.parser.journal.GrobidRESTParser.parse(GrobidRESTParser.java:77)

    at org.apache.tika.parser.journal.JournalParser.parse(JournalParser.java:60)

    at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)

    at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)

    at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)

    at org.voyanttools.trombone.input.extract.TikaExtractor$ExtractableTikaInputSource.getInputStream(TikaExtractor.java:137)

    at org.voyanttools.trombone.storage.file.FileStoredDocumentSourceStorage.getStoredDocumentSource(FileStoredDocumentSourceStorage.java:115)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor.getExtractedStoredDocumentSource(StoredDocumentSourceExtractor.java:145)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor$CallableExtractor.call(StoredDocumentSourceExtractor.java:166)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor$CallableExtractor.call(StoredDocumentSourceExtractor.java:1)

    at java.util.concurrent.FutureTask.run(FutureTask.java:266)

    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)

    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)

    at java.lang.Thread.run(Thread.java:745)

Caused by: javax.ws.rs.ProcessingException: No message body writer has been found for class org.apache.cxf.jaxrs.ext.multipart.MultipartBody, ContentType: multipart/form-data

    at org.apache.cxf.jaxrs.client.AbstractClient.reportMessageHandlerProblem(AbstractClient.java:740)

    at org.apache.cxf.jaxrs.client.AbstractClient.writeBody(AbstractClient.java:469)

    at org.apache.cxf.jaxrs.client.WebClient$BodyWriter.doWriteBody(WebClient.java:1215)

    ... 22 more

Caused by: java.net.ConnectException: Connection refused (Connection refused)

    at java.net.PlainSocketImpl.socketConnect(Native Method)

    at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)

    at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)

    at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)

    at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)

    at java.net.Socket.connect(Socket.java:589)

    at sun.net.NetworkClient.doConnect(NetworkClient.java:175)

    at sun.net.www.http.HttpClient.openServer(HttpClient.java:432)

    at sun.net.www.http.HttpClient.openServer(HttpClient.java:527)

    at sun.net.www.http.HttpClient.<init>(HttpClient.java:211)

    at sun.net.www.http.HttpClient.New(HttpClient.java:308)

    at sun.net.www.http.HttpClient.New(HttpClient.java:326)

    at sun.net.www.protocol.http.HttpURLConnection.getNewHttpClient(HttpURLConnection.java:1202)

    at sun.net.www.protocol.http.HttpURLConnection.plainConnect0(HttpURLConnection.java:1138)

    at sun.net.www.protocol.http.HttpURLConnection.plainConnect(HttpURLConnection.java:1032)

    at sun.net.www.protocol.http.HttpURLConnection.connect(HttpURLConnection.java:966)

    at sun.net.www.protocol.http.HttpURLConnection.getOutputStream0(HttpURLConnection.java:1316)

    at sun.net.www.protocol.http.HttpURLConnection.getOutputStream(HttpURLConnection.java:1291)

    at org.apache.cxf.transport.http.URLConnectionHTTPConduit$URLConnectionWrappedOutputStream.setupWrappedStream(URLConnectionHTTPConduit.java:174)

    at org.apache.cxf.transport.http.HTTPConduit$WrappedOutputStream.handleHeadersTrustCaching(HTTPConduit.java:1302)

    at org.apache.cxf.transport.http.HTTPConduit$WrappedOutputStream.onFirstWrite(HTTPConduit.java:1258)

    at org.apache.cxf.transport.http.URLConnectionHTTPConduit$URLConnectionWrappedOutputStream.onFirstWrite(URLConnectionHTTPConduit.java:201)

    at org.apache.cxf.io.AbstractWrappedOutputStream.write(AbstractWrappedOutputStream.java:47)

    at org.apache.cxf.io.AbstractThresholdOutputStream.unBuffer(AbstractThresholdOutputStream.java:89)

    at org.apache.cxf.io.AbstractThresholdOutputStream.write(AbstractThresholdOutputStream.java:63)

    at javax.activation.DataHandler.writeTo(DataHandler.java:309)

    at org.apache.cxf.jaxrs.provider.MultipartProvider.writeTo(MultipartProvider.java:266)

    at org.apache.cxf.jaxrs.utils.JAXRSUtils.writeMessageBody(JAXRSUtils.java:1363)

    at org.apache.cxf.jaxrs.client.AbstractClient.writeBody(AbstractClient.java:455)

    ... 23 more

2017-03-23 08:13:14.189:WARN:/:qtp1044036744-13: trombone: ERROR: An error occurred during multi-threaded document expansion.

java.lang.IllegalStateException: An error occurred during multi-threaded document expansion.

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor.getExtractedStoredDocumentSources(StoredDocumentSourceExtractor.java:99)

    at org.voyanttools.trombone.tool.build.DocumentExtractor.run(DocumentExtractor.java:89)

    at org.voyanttools.trombone.tool.build.RealCorpusCreator.run(RealCorpusCreator.java:93)

    at org.voyanttools.trombone.tool.corpus.CorpusCreator.run(CorpusCreator.java:49)

    at org.voyanttools.trombone.tool.utils.ToolRunner.run(ToolRunner.java:130)

    at org.voyanttools.trombone.Controller.run(Controller.java:110)

    at org.voyanttools.voyant.Trombone.runTromboneController(Trombone.java:274)

    at org.voyanttools.voyant.Trombone.doRequest(Trombone.java:249)

    at org.voyanttools.voyant.Trombone.doRequest(Trombone.java:131)

    at org.voyanttools.voyant.Trombone.doPost(Trombone.java:72)

    at javax.servlet.http.HttpServlet.service(HttpServlet.java:707)

    at javax.servlet.http.HttpServlet.service(HttpServlet.java:790)

    at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:835)

    at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:583)

    at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)

    at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)

    at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226)

    at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1158)

    at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:511)

    at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185)

    at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1090)

    at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)

    at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:119)

    at org.eclipse.jetty.server.Server.handle(Server.java:517)

    at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:308)

    at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:242)

    at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273)

    at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95)

    at org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:75)

    at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceAndRun(ExecuteProduceConsume.java:213)

    at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.run(ExecuteProduceConsume.java:147)

    at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:654)

    at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:572)

    at java.lang.Thread.run(Thread.java:745)

Caused by: 

java.util.concurrent.ExecutionException: java.io.IOException: Unable to parse document: FILE: hamletfulltext.pdf

    at java.util.concurrent.FutureTask.report(FutureTask.java:122)

    at java.util.concurrent.FutureTask.get(FutureTask.java:192)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor.getExtractedStoredDocumentSources(StoredDocumentSourceExtractor.java:94)

    at org.voyanttools.trombone.tool.build.DocumentExtractor.run(DocumentExtractor.java:89)

    at org.voyanttools.trombone.tool.build.RealCorpusCreator.run(RealCorpusCreator.java:93)

    at org.voyanttools.trombone.tool.corpus.CorpusCreator.run(CorpusCreator.java:49)

    at org.voyanttools.trombone.tool.utils.ToolRunner.run(ToolRunner.java:130)

    at org.voyanttools.trombone.Controller.run(Controller.java:110)

    at org.voyanttools.voyant.Trombone.runTromboneController(Trombone.java:274)

    at org.voyanttools.voyant.Trombone.doRequest(Trombone.java:249)

    at org.voyanttools.voyant.Trombone.doRequest(Trombone.java:131)

    at org.voyanttools.voyant.Trombone.doPost(Trombone.java:72)

    at javax.servlet.http.HttpServlet.service(HttpServlet.java:707)

    at javax.servlet.http.HttpServlet.service(HttpServlet.java:790)

    at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:835)

    at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:583)

    at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)

    at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)

    at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226)

    at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1158)

    at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:511)

    at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185)

    at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1090)

    at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)

    at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:119)

    at org.eclipse.jetty.server.Server.handle(Server.java:517)

    at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:308)

    at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:242)

    at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273)

    at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95)

    at org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:75)

    at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceAndRun(ExecuteProduceConsume.java:213)

    at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.run(ExecuteProduceConsume.java:147)

    at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:654)

    at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:572)

    at java.lang.Thread.run(Thread.java:745)

Caused by: 

java.io.IOException: Unable to parse document: FILE: hamletfulltext.pdf

    at org.voyanttools.trombone.input.extract.TikaExtractor$ExtractableTikaInputSource.getInputStream(TikaExtractor.java:139)

    at org.voyanttools.trombone.storage.file.FileStoredDocumentSourceStorage.getStoredDocumentSource(FileStoredDocumentSourceStorage.java:115)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor.getExtractedStoredDocumentSource(StoredDocumentSourceExtractor.java:145)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor$CallableExtractor.call(StoredDocumentSourceExtractor.java:166)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor$CallableExtractor.call(StoredDocumentSourceExtractor.java:1)

    at java.util.concurrent.FutureTask.run(FutureTask.java:266)

    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)

    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)

    at java.lang.Thread.run(Thread.java:745)

Caused by: 

org.apache.tika.exception.TikaException: Unexpected RuntimeException from org.apache.tika.parser.journal.JournalParser@3f808816

    at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:282)

    at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)

    at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)

    at org.voyanttools.trombone.input.extract.TikaExtractor$ExtractableTikaInputSource.getInputStream(TikaExtractor.java:137)

    at org.voyanttools.trombone.storage.file.FileStoredDocumentSourceStorage.getStoredDocumentSource(FileStoredDocumentSourceStorage.java:115)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor.getExtractedStoredDocumentSource(StoredDocumentSourceExtractor.java:145)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor$CallableExtractor.call(StoredDocumentSourceExtractor.java:166)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor$CallableExtractor.call(StoredDocumentSourceExtractor.java:1)

    at java.util.concurrent.FutureTask.run(FutureTask.java:266)

    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)

    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)

    at java.lang.Thread.run(Thread.java:745)

Caused by: 

javax.ws.rs.ProcessingException: org.apache.cxf.interceptor.Fault: No message body writer has been found for class org.apache.cxf.jaxrs.ext.multipart.MultipartBody, ContentType: multipart/form-data

    at org.apache.cxf.jaxrs.client.WebClient.doResponse(WebClient.java:1140)

    at org.apache.cxf.jaxrs.client.WebClient.doChainedInvocation(WebClient.java:1085)

    at org.apache.cxf.jaxrs.client.WebClient.doInvoke(WebClient.java:883)

    at org.apache.cxf.jaxrs.client.WebClient.doInvoke(WebClient.java:854)

    at org.apache.cxf.jaxrs.client.WebClient.invoke(WebClient.java:320)

    at org.apache.cxf.jaxrs.client.WebClient.post(WebClient.java:329)

    at org.apache.tika.parser.journal.GrobidRESTParser.parse(GrobidRESTParser.java:77)

    at org.apache.tika.parser.journal.JournalParser.parse(JournalParser.java:60)

    at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)

    at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)

    at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)

    at org.voyanttools.trombone.input.extract.TikaExtractor$ExtractableTikaInputSource.getInputStream(TikaExtractor.java:137)

    at org.voyanttools.trombone.storage.file.FileStoredDocumentSourceStorage.getStoredDocumentSource(FileStoredDocumentSourceStorage.java:115)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor.getExtractedStoredDocumentSource(StoredDocumentSourceExtractor.java:145)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor$CallableExtractor.call(StoredDocumentSourceExtractor.java:166)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor$CallableExtractor.call(StoredDocumentSourceExtractor.java:1)

    at java.util.concurrent.FutureTask.run(FutureTask.java:266)

    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)

    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)

    at java.lang.Thread.run(Thread.java:745)

Caused by: 

org.apache.cxf.interceptor.Fault: No message body writer has been found for class org.apache.cxf.jaxrs.ext.multipart.MultipartBody, ContentType: multipart/form-data

    at org.apache.cxf.jaxrs.client.WebClient$BodyWriter.doWriteBody(WebClient.java:1220)

    at org.apache.cxf.jaxrs.client.AbstractClient$AbstractBodyWriter.handleMessage(AbstractClient.java:1044)

    at org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:307)

    at org.apache.cxf.jaxrs.client.AbstractClient.doRunInterceptorChain(AbstractClient.java:623)

    at org.apache.cxf.jaxrs.client.WebClient.doChainedInvocation(WebClient.java:1084)

    at org.apache.cxf.jaxrs.client.WebClient.doInvoke(WebClient.java:883)

    at org.apache.cxf.jaxrs.client.WebClient.doInvoke(WebClient.java:854)

    at org.apache.cxf.jaxrs.client.WebClient.invoke(WebClient.java:320)

    at org.apache.cxf.jaxrs.client.WebClient.post(WebClient.java:329)

    at org.apache.tika.parser.journal.GrobidRESTParser.parse(GrobidRESTParser.java:77)

    at org.apache.tika.parser.journal.JournalParser.parse(JournalParser.java:60)

    at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)

    at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)

    at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)

    at org.voyanttools.trombone.input.extract.TikaExtractor$ExtractableTikaInputSource.getInputStream(TikaExtractor.java:137)

    at org.voyanttools.trombone.storage.file.FileStoredDocumentSourceStorage.getStoredDocumentSource(FileStoredDocumentSourceStorage.java:115)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor.getExtractedStoredDocumentSource(StoredDocumentSourceExtractor.java:145)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor$CallableExtractor.call(StoredDocumentSourceExtractor.java:166)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor$CallableExtractor.call(StoredDocumentSourceExtractor.java:1)

    at java.util.concurrent.FutureTask.run(FutureTask.java:266)

    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)

    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)

    at java.lang.Thread.run(Thread.java:745)

Caused by: 

javax.ws.rs.ProcessingException: No message body writer has been found for class org.apache.cxf.jaxrs.ext.multipart.MultipartBody, ContentType: multipart/form-data

    at org.apache.cxf.jaxrs.client.AbstractClient.reportMessageHandlerProblem(AbstractClient.java:740)

    at org.apache.cxf.jaxrs.client.AbstractClient.writeBody(AbstractClient.java:469)

    at org.apache.cxf.jaxrs.client.WebClient$BodyWriter.doWriteBody(WebClient.java:1215)

    at org.apache.cxf.jaxrs.client.AbstractClient$AbstractBodyWriter.handleMessage(AbstractClient.java:1044)

    at org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:307)

    at org.apache.cxf.jaxrs.client.AbstractClient.doRunInterceptorChain(AbstractClient.java:623)

    at org.apache.cxf.jaxrs.client.WebClient.doChainedInvocation(WebClient.java:1084)

    at org.apache.cxf.jaxrs.client.WebClient.doInvoke(WebClient.java:883)

    at org.apache.cxf.jaxrs.client.WebClient.doInvoke(WebClient.java:854)

    at org.apache.cxf.jaxrs.client.WebClient.invoke(WebClient.java:320)

    at org.apache.cxf.jaxrs.client.WebClient.post(WebClient.java:329)

    at org.apache.tika.parser.journal.GrobidRESTParser.parse(GrobidRESTParser.java:77)

    at org.apache.tika.parser.journal.JournalParser.parse(JournalParser.java:60)

    at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)

    at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)

    at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)

    at org.voyanttools.trombone.input.extract.TikaExtractor$ExtractableTikaInputSource.getInputStream(TikaExtractor.java:137)

    at org.voyanttools.trombone.storage.file.FileStoredDocumentSourceStorage.getStoredDocumentSource(FileStoredDocumentSourceStorage.java:115)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor.getExtractedStoredDocumentSource(StoredDocumentSourceExtractor.java:145)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor$CallableExtractor.call(StoredDocumentSourceExtractor.java:166)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor$CallableExtractor.call(StoredDocumentSourceExtractor.java:1)

    at java.util.concurrent.FutureTask.run(FutureTask.java:266)

    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)

    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)

    at java.lang.Thread.run(Thread.java:745)

Caused by: 

java.net.ConnectException: Connection refused (Connection refused)

    at java.net.PlainSocketImpl.socketConnect(Native Method)

    at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)

    at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)

    at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)

    at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)

    at java.net.Socket.connect(Socket.java:589)

    at sun.net.NetworkClient.doConnect(NetworkClient.java:175)

    at sun.net.www.http.HttpClient.openServer(HttpClient.java:432)

    at sun.net.www.http.HttpClient.openServer(HttpClient.java:527)

    at sun.net.www.http.HttpClient.<init>(HttpClient.java:211)

    at sun.net.www.http.HttpClient.New(HttpClient.java:308)

    at sun.net.www.http.HttpClient.New(HttpClient.java:326)

    at sun.net.www.protocol.http.HttpURLConnection.getNewHttpClient(HttpURLConnection.java:1202)

    at sun.net.www.protocol.http.HttpURLConnection.plainConnect0(HttpURLConnection.java:1138)

    at sun.net.www.protocol.http.HttpURLConnection.plainConnect(HttpURLConnection.java:1032)

    at sun.net.www.protocol.http.HttpURLConnection.connect(HttpURLConnection.java:966)

    at sun.net.www.protocol.http.HttpURLConnection.getOutputStream0(HttpURLConnection.java:1316)

    at sun.net.www.protocol.http.HttpURLConnection.getOutputStream(HttpURLConnection.java:1291)

    at org.apache.cxf.transport.http.URLConnectionHTTPConduit$URLConnectionWrappedOutputStream.setupWrappedStream(URLConnectionHTTPConduit.java:174)

    at org.apache.cxf.transport.http.HTTPConduit$WrappedOutputStream.handleHeadersTrustCaching(HTTPConduit.java:1302)

    at org.apache.cxf.transport.http.HTTPConduit$WrappedOutputStream.onFirstWrite(HTTPConduit.java:1258)

    at org.apache.cxf.transport.http.URLConnectionHTTPConduit$URLConnectionWrappedOutputStream.onFirstWrite(URLConnectionHTTPConduit.java:201)

    at org.apache.cxf.io.AbstractWrappedOutputStream.write(AbstractWrappedOutputStream.java:47)

    at org.apache.cxf.io.AbstractThresholdOutputStream.unBuffer(AbstractThresholdOutputStream.java:89)

    at org.apache.cxf.io.AbstractThresholdOutputStream.write(AbstractThresholdOutputStream.java:63)

    at javax.activation.DataHandler.writeTo(DataHandler.java:309)

    at org.apache.cxf.jaxrs.provider.MultipartProvider.writeTo(MultipartProvider.java:266)

    at org.apache.cxf.jaxrs.utils.JAXRSUtils.writeMessageBody(JAXRSUtils.java:1363)

    at org.apache.cxf.jaxrs.client.AbstractClient.writeBody(AbstractClient.java:455)

    at org.apache.cxf.jaxrs.client.WebClient$BodyWriter.doWriteBody(WebClient.java:1215)

    at org.apache.cxf.jaxrs.client.AbstractClient$AbstractBodyWriter.handleMessage(AbstractClient.java:1044)

    at org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:307)

    at org.apache.cxf.jaxrs.client.AbstractClient.doRunInterceptorChain(AbstractClient.java:623)

    at org.apache.cxf.jaxrs.client.WebClient.doChainedInvocation(WebClient.java:1084)

    at org.apache.cxf.jaxrs.client.WebClient.doInvoke(WebClient.java:883)

    at org.apache.cxf.jaxrs.client.WebClient.doInvoke(WebClient.java:854)

    at org.apache.cxf.jaxrs.client.WebClient.invoke(WebClient.java:320)

    at org.apache.cxf.jaxrs.client.WebClient.post(WebClient.java:329)

    at org.apache.tika.parser.journal.GrobidRESTParser.parse(GrobidRESTParser.java:77)

    at org.apache.tika.parser.journal.JournalParser.parse(JournalParser.java:60)

    at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)

    at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)

    at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)

    at org.voyanttools.trombone.input.extract.TikaExtractor$ExtractableTikaInputSource.getInputStream(TikaExtractor.java:137)

    at org.voyanttools.trombone.storage.file.FileStoredDocumentSourceStorage.getStoredDocumentSource(FileStoredDocumentSourceStorage.java:115)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor.getExtractedStoredDocumentSource(StoredDocumentSourceExtractor.java:145)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor$CallableExtractor.call(StoredDocumentSourceExtractor.java:166)

    at org.voyanttools.trombone.input.extract.StoredDocumentSourceExtractor$CallableExtractor.call(StoredDocumentSourceExtractor.java:1)

    at java.util.concurrent.FutureTask.run(FutureTask.java:266)

    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)

    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)

    at java.lang.Thread.run(Thread.java:745)

2017-03-23 08:13:14.192:INFO:/:qtp1044036744-13: trombone: Parameters:|?accessIP: 127.0.0.1|?upload: /home/ucsbcollab/Applications/VoyantServer/data/tmp.voyant.uploads/tmp.voyant.uploads550188387377441…|?palette: default|?tool: corpus.CorpusCreator|?textarea-1014-inputEl: Type in one or more URLs on separate lines or paste in a full text.|
yshussain commented 7 years ago

I uploaded the jar files for pdfbox2.0.5 and it appears to be working now. That was indeed the issue. Thanks!

sgsinclair commented 7 years ago

Excellent, thanks for confirming, I'll do some cleaning of the jars before creating the next build.