yacy / yacy_search_server

Distributed Peer-to-Peer Web Search Engine and Intranet Search Appliance
http://yacy.net
Other
3.38k stars 427 forks source link

Blocked threads when searching and crawling. on Windows #658

Open smokingwheels opened 2 weeks ago

smokingwheels commented 2 weeks ago

Likely to be lack of memory. Running on windows 11 amazon java 11.

THREADS WITH STATES: BLOCKED

'Thread= RemoteSearch.solrRemoteSearch("xcfo" to NmWgZ0aELTlt) id=14655 BLOCKED at java.base@11.0.24/java.lang.Thread.join(Thread.java:1308) at app//net.yacy.peers.Protocol.solrQuery(Protocol.java:1259) at app//net.yacy.peers.RemoteSearch$2.run(RemoteSearch.java:411)'

also trying this. set javacmd=%javacmd% -Djava.awt.headless=true -Dsolr.directoryFactory=solr.MMapDirectoryFactory -Dfile.encoding=UTF-8 -Dserver -DUseG1GC -DMaxGCPauseMillis=100 -DUseConcMarkSweepGC -DCMSIncrementalPacing

Trying to crawl 500 sites at once. YaCy Version: yacy_v1.940_202408262308_6db374fdc Assigned Memory = 2097152000 Used Memory = 256158208 ?? Available Memory = 1840993792

THREADS WITH STATES: BLOCKED

Thread= CrawlStacker_pool-1-thread-175 id=24178 BLOCKED at app//net.yacy.crawler.HostQueue.push(HostQueue.java:434) [synchronized (this) {] at app//net.yacy.crawler.HostBalancer.push(HostBalancer.java:293) at app//net.yacy.crawler.data.NoticedURL.push(NoticedURL.java:185) at app//net.yacy.crawler.CrawlStacker.stackCrawl(CrawlStacker.java:406) at app//net.yacy.crawler.CrawlStacker.process(CrawlStacker.java:145) at app//net.yacy.crawler.CrawlStacker.process(CrawlStacker.java:64) at app//net.yacy.kelondro.workflow.InstantBlockingThread.job(InstantBlockingThread.java:72) at app//net.yacy.kelondro.workflow.AbstractBlockingThread.run(AbstractBlockingThread.java:82) at java.base@11.0.24/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) at java.base@11.0.24/java.util.concurrent.FutureTask.run(FutureTask.java:264) at java.base@11.0.24/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) at java.base@11.0.24/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) at java.base@11.0.24/java.lang.Thread.run(Thread.java:829)

Thread= Switchboard.stackURLs-7-W-zFDLJjrOlu id=24109 BLOCKED at java.base@11.0.24/sun.security.ssl.SSLSocketImpl$AppInputStream.read(SSLSocketImpl.java:1070) at app//org.apache.http.impl.io.SessionInputBufferImpl.streamRead(SessionInputBufferImpl.java:137) at app//org.apache.http.impl.io.SessionInputBufferImpl.fillBuffer(SessionInputBufferImpl.java:153) at app//org.apache.http.impl.io.SessionInputBufferImpl.readLine(SessionInputBufferImpl.java:280) at app//org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:138) at app//org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:56) at app//org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:259) at app//org.apache.http.impl.DefaultBHttpClientConnection.receiveResponseHeader(DefaultBHttpClientConnection.java:163) at app//org.apache.http.impl.conn.CPoolProxy.receiveResponseHeader(CPoolProxy.java:157) at app//org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:273) at app//org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:125) at app//org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:272) at app//org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:186) at app//org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:89) at app//org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:110) at app//org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:185) at app//org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:83) at app//net.yacy.cora.protocol.http.HTTPClient.execute(HTTPClient.java:901) at app//net.yacy.cora.protocol.http.HTTPClient.getContentBytes(HTTPClient.java:835) at app//net.yacy.cora.protocol.http.HTTPClient.GETbytes(HTTPClient.java:447) at app//net.yacy.crawler.retrieval.HTTPLoader.load(HTTPLoader.java:374) at app//net.yacy.crawler.retrieval.HTTPLoader.load(HTTPLoader.java:415) at app//net.yacy.crawler.retrieval.HTTPLoader.load(HTTPLoader.java:85) at app//net.yacy.repository.LoaderDispatcher.loadInternal(LoaderDispatcher.java:243) at app//net.yacy.repository.LoaderDispatcher.load(LoaderDispatcher.java:181) at app//net.yacy.repository.LoaderDispatcher.load(LoaderDispatcher.java:152) at app//net.yacy.repository.LoaderDispatcher.loadDocument(LoaderDispatcher.java:570) at app//net.yacy.search.Switchboard.stackUrl(Switchboard.java:3716) at app//net.yacy.search.Switchboard$21.run(Switchboard.java:3646)

Thread= BusyThread CrawlQueues.coreCrawlJob daemon id=107 BLOCKED at app//net.yacy.crawler.robots.RobotsTxt.getEntry(RobotsTxt.java:164) [synchronized (syncObj) {] at app//net.yacy.crawler.robots.RobotsTxt.getEntry(RobotsTxt.java:126) at app//net.yacy.crawler.data.Latency.waitingRobots(Latency.java:126) at app//net.yacy.crawler.data.Latency.waitingRemaining(Latency.java:217) at app//net.yacy.crawler.data.Latency.getDomainSleepTime(Latency.java:286) at app//net.yacy.crawler.HostQueue.pop(HostQueue.java:491) at app//net.yacy.crawler.HostBalancer.pop(HostBalancer.java:490) at app//net.yacy.crawler.data.NoticedURL.pop(NoticedURL.java:333) at app//net.yacy.crawler.data.NoticedURL.pop(NoticedURL.java:283) at app//net.yacy.crawler.data.CrawlQueues.coreCrawlJob(CrawlQueues.java:331) at app//net.yacy.search.Switchboard$11.jobImpl(Switchboard.java:1176) at app//net.yacy.kelondro.workflow.InstantBusyThread.job(InstantBusyThread.java:64) at app//net.yacy.kelondro.workflow.AbstractBusyThread.run(AbstractBusyThread.java:215)

Thread= Switchboard.stackURLs-14-W-zFDLJjrOlu id=24116 BLOCKED at app//net.yacy.cora.federate.solr.connector.SolrServerConnector.deleteById(SolrServerConnector.java:149) [synchronized (this.server) {] at app//net.yacy.cora.federate.solr.connector.MirrorSolrConnector.deleteById(MirrorSolrConnector.java:166) at app//net.yacy.search.index.Fulltext.remove(Fulltext.java:555) at app//net.yacy.search.Switchboard.stackUrl(Switchboard.java:3711) at app//net.yacy.search.Switchboard$21.run(Switchboard.java:3646)

``

smokingwheels commented 1 week ago

I increased memory and have been crawling FTP sites. Yacy is very slow to respond but it is still working.

YaCy Version: yacy_v1.940_202408262308_6db374fdc Assigned Memory = 31457280000 Used Memory = 1304150224 Available Memory = 30153129776

THREADS WITH STATES: BLOCKED

Thread= commitScheduler-22-thread-1 id=339 BLOCKED at java.base@11.0.24/java.lang.Thread.yield(Native Method) at app//org.apache.lucene.store.ByteBufferGuard.invalidateAndUnmap(ByteBufferGuard.java:76) at app//org.apache.lucene.store.ByteBufferIndexInput.close(ByteBufferIndexInput.java:494) at app//org.apache.lucene.store.BufferedChecksumIndexInput.close(BufferedChecksumIndexInput.java:57) at app//org.apache.lucene.codecs.lucene90.Lucene90NormsProducer.(Lucene90NormsProducer.java:80) at app//org.apache.lucene.codecs.lucene90.Lucene90NormsFormat.normsProducer(Lucene90NormsFormat.java:96) at app//org.apache.lucene.index.IndexingChain.flush(IndexingChain.java:293) at app//org.apache.lucene.index.DocumentsWriterPerThread.flush(DocumentsWriterPerThread.java:386) at app//org.apache.lucene.index.DocumentsWriter.doFlush(DocumentsWriter.java:497) at app//org.apache.lucene.index.DocumentsWriter.flushAllThreads(DocumentsWriter.java:676) at app//org.apache.lucene.index.IndexWriter.getReader(IndexWriter.java:568) at app//org.apache.lucene.index.StandardDirectoryReader.doOpenFromWriter(StandardDirectoryReader.java:380) at app//org.apache.lucene.index.StandardDirectoryReader.doOpenIfChanged(StandardDirectoryReader.java:365) at app//org.apache.lucene.index.DirectoryReader.openIfChanged(DirectoryReader.java:246) at app//org.apache.solr.core.SolrCore.openNewSearcher(SolrCore.java:2306) at app//org.apache.solr.core.SolrCore.getSearcher(SolrCore.java:2559) at app//org.apache.solr.update.DirectUpdateHandler2.commit(DirectUpdateHandler2.java:770) at app//org.apache.solr.update.CommitTracker.run(CommitTracker.java:280) at java.base@11.0.24/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) at java.base@11.0.24/java.util.concurrent.FutureTask.run(FutureTask.java:264) at java.base@11.0.24/java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:304) at java.base@11.0.24/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) at java.base@11.0.24/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) at java.base@11.0.24/java.lang.Thread.run(Thread.java:829)

ChatGPT recommends. -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:G1HeapRegionSize=16m