Closed MrJo621 closed 2 years ago
麻烦提供运行上下文,比如爬虫的配置,操作过程
<mxGraphModel>
<root>
<mxCell id="0">
<JsonProperty as="data">
{"spiderName":"文献爬虫healio","submit-strategy":"random","threadCount":""}
</JsonProperty>
</mxCell>
<mxCell id="1" parent="0"/>
<mxCell id="2" value="开始" style="start" parent="1" vertex="1">
<mxGeometry x="10" y="80" width="32" height="32" as="geometry"/>
<JsonProperty as="data">
{"shape":"start"}
</JsonProperty>
</mxCell>
<mxCell id="11" value="开始抓取" style="request" parent="1" vertex="1">
<mxGeometry x="200" y="80" width="32" height="32" as="geometry"/>
<JsonProperty as="data">
{"value":"开始抓取","loopVariableName":"","method":"GET","sleep":"5000","timeout":"5000","response-charset":"","retryCount":"","retryInterval":"","body-type":"none","body-content-type":"text/plain","loopCount":"","url":"https://www.healio.com/h5news/specialtylanding/searchjson?page=${item+1}&pageId={A2F15EEF-2C14-40FC-A9C5-F54E929EEFD8}","proxy":"","request-body":"","follow-redirect":"1","tls-validate":"1","cookie-auto-set":"1","repeat-enable":"0","shape":"request"}
</JsonProperty>
</mxCell>
<mxCell id="15" value="定义变量" style="variable" parent="1" vertex="1">
<mxGeometry x="360" y="80" width="32" height="32" as="geometry"/>
<JsonProperty as="data">
{"value":"定义变量","loopVariableName":"","variable-name":["cards","titleList"],"variable-description":["",""],"loopCount":"","variable-value":["${json.parse(resp.html).cards}","${json.parse(resp.html).cards}"],"shape":"variable"}
</JsonProperty>
</mxCell>
<mxCell id="16" value="" style="strokeWidth=2;sharp=1;" parent="1" source="11" target="15" edge="1">
<mxGeometry relative="1" as="geometry"/>
<JsonProperty as="data">
{"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"}
</JsonProperty>
</mxCell>
<mxCell id="17" value="循环" style="loop" parent="1" vertex="1">
<mxGeometry x="510" y="80" width="32" height="32" as="geometry"/>
<JsonProperty as="data">
{"value":"循环","loopItem":"","loopVariableName":"index","loopCount":"${list.length(cards)}","loopStart":"0","loopEnd":"-1","shape":"loop"}
</JsonProperty>
</mxCell>
<mxCell id="18" value="" style="strokeWidth=2;sharp=1;" parent="1" source="15" target="17" edge="1">
<mxGeometry relative="1" as="geometry"/>
<JsonProperty as="data">
{"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"}
</JsonProperty>
</mxCell>
<mxCell id="22" value="定义每篇文章的信息" style="variable" parent="1" vertex="1">
<mxGeometry x="510" y="160" width="32" height="32" as="geometry"/>
<JsonProperty as="data">
{"value":"定义每篇文章的信息","loopVariableName":"","variable-name":["articleDate","articalAbstract","articalUrl","articalTitle"],"variable-description":["发布日期","简介","",""],"loopCount":"","variable-value":["${cards[index].PostedDate}","${cards[index].FirstParagraph}","https://www.healio.com${cards[index].Link}","${cards[index].Title}"],"shape":"variable"}
</JsonProperty>
</mxCell>
<mxCell id="24" value="" style="strokeWidth=2;strokeColor=blue;sharp=1;" parent="1" source="17" target="22" edge="1">
<mxGeometry relative="1" as="geometry"/>
<JsonProperty as="data">
{"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"blue","condition":"${cards[index].Link != null}","transmit-variable":"1"}
</JsonProperty>
</mxCell>
<mxCell id="27" value="循环" style="loop" parent="1" vertex="1">
<mxGeometry x="90" y="130" width="32" height="32" as="geometry"/>
<JsonProperty as="data">
{"value":"循环","loopItem":"","loopVariableName":"","loopCount":"4","loopStart":"0","loopEnd":"-1","shape":"loop"}
</JsonProperty>
</mxCell>
<mxCell id="28" value="" style="strokeWidth=2;sharp=1;" parent="1" source="2" target="27" edge="1">
<mxGeometry relative="1" as="geometry"/>
<JsonProperty as="data">
{"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"}
</JsonProperty>
</mxCell>
<mxCell id="29" value="" style="strokeWidth=2;sharp=1;" parent="1" source="27" target="11" edge="1">
<mxGeometry relative="1" as="geometry"/>
<JsonProperty as="data">
{"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"}
</JsonProperty>
</mxCell>
<mxCell id="30" value="输出" style="output" parent="1" vertex="1">
<mxGeometry x="630" y="160" width="32" height="32" as="geometry"/>
<JsonProperty as="data">
{"value":"输出","loopVariableName":"","datasourceId":"4c05381d67a342390df8b0847a1275c4","tableName":"artical_literature","csvName":"C:/Users/18752/Desktop/新建文本文档 (2)","csvEncoding":"UTF-8","output-name":["article_date","artical_abstract","artical_url","artical_title","batch","artical_type"],"loopCount":"","output-value":["${articleDate}","${articalAbstract}","${articalUrl}","${articalTitle}","${date.format(date.now(),'yyyy-MM-dd')}","healio"],"output-all":"1","output-database":"0","output-csv":"0","shape":"output"}
</JsonProperty>
</mxCell>
<mxCell id="31" value="" style="strokeWidth=2;sharp=1;" parent="1" source="22" target="30" edge="1">
<mxGeometry relative="1" as="geometry"/>
<JsonProperty as="data">
{"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"}
</JsonProperty>
</mxCell>
</root>
</mxGraphModel>
这是第一个-------------------------------------------------------------
<mxGraphModel>
<root>
<mxCell id="0">
<JsonProperty as="data">
{"spiderName":"文献爬虫frontiers_in_immunology","submit-strategy":"random","threadCount":""}
</JsonProperty>
</mxCell>
<mxCell id="1" parent="0"/>
<mxCell id="2" value="开始" style="start" parent="1" vertex="1">
<mxGeometry x="10" y="80" width="32" height="32" as="geometry"/>
<JsonProperty as="data">
{"shape":"start"}
</JsonProperty>
</mxCell>
<mxCell id="15" value="定义变量" style="variable" parent="1" vertex="1">
<mxGeometry x="360" y="80" width="32" height="32" as="geometry"/>
<JsonProperty as="data">
{"value":"定义变量","loopVariableName":"","variable-name":["articles"],"variable-description":[""],"loopCount":"","variable-value":["${json.parse(resp.html).ArticleCollection.Articles}"],"shape":"variable"}
</JsonProperty>
</mxCell>
<mxCell id="17" value="循环" style="loop" parent="1" vertex="1">
<mxGeometry x="510" y="80" width="32" height="32" as="geometry"/>
<JsonProperty as="data">
{"value":"循环","loopItem":"","loopVariableName":"index","loopCount":"${list.length(articles)}","loopStart":"0","loopEnd":"-1","shape":"loop"}
</JsonProperty>
</mxCell>
<mxCell id="18" value="" style="strokeWidth=2;sharp=1;" parent="1" source="15" target="17" edge="1">
<mxGeometry relative="1" as="geometry"/>
<JsonProperty as="data">
{"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"}
</JsonProperty>
</mxCell>
<mxCell id="22" value="定义每篇文章的信息" style="variable" parent="1" vertex="1">
<mxGeometry x="510" y="160" width="32" height="32" as="geometry"/>
<JsonProperty as="data">
{"value":"定义每篇文章的信息","loopVariableName":"","variable-name":["articleDate","articalAbstract","articalUrl","articalTitle","articalauthor","doi"],"variable-description":["发布日期","简介","","","",""],"loopCount":"","variable-value":["${articles[index].OnlineSinceDate}","${articles[index].OnlineSinceDate}","${articles[index].Url}","${articles[index].Title}","${json.stringify(articles[index].Authors)}","${articles[index].DOI}"],"shape":"variable"}
</JsonProperty>
</mxCell>
<mxCell id="24" value="" style="strokeWidth=2;strokeColor=black;sharp=1;" parent="1" source="17" target="22" edge="1">
<mxGeometry relative="1" as="geometry"/>
<JsonProperty as="data">
{"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"}
</JsonProperty>
</mxCell>
<mxCell id="25" value="输出" style="output" parent="1" vertex="1">
<mxGeometry x="510" y="240" width="32" height="32" as="geometry"/>
<JsonProperty as="data">
{"value":"输出","loopVariableName":"","datasourceId":"4c05381d67a342390df8b0847a1275c4","tableName":"artical_literature","csvName":"C:/Users/18752/Desktop/新建文本文档 (2)","csvEncoding":"UTF-8","output-name":["article_date","artical_abstract","artical_url","artical_title","batch","artical_type","artical_author","doi"],"loopCount":"","output-value":["${articleDate}","${articalAbstract}","${articalUrl}","${articalTitle}","${date.format(date.now(),'yyyy-MM-dd')}","frontiers_in_immunology","${articalauthor}","${doi}"],"output-all":"1","output-database":"0","output-csv":"0","shape":"output"}
</JsonProperty>
</mxCell>
<mxCell id="26" value="" style="strokeWidth=2;sharp=1;" parent="1" source="22" target="25" edge="1">
<mxGeometry relative="1" as="geometry"/>
<JsonProperty as="data">
{"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"}
</JsonProperty>
</mxCell>
<mxCell id="27" value="循环" style="loop" parent="1" vertex="1">
<mxGeometry x="90" y="130" width="32" height="32" as="geometry"/>
<JsonProperty as="data">
{"value":"循环","loopItem":"","loopVariableName":"","loopCount":"5","loopStart":"0","loopEnd":"-1","shape":"loop"}
</JsonProperty>
</mxCell>
<mxCell id="28" value="" style="strokeWidth=2;sharp=1;" parent="1" source="2" target="27" edge="1">
<mxGeometry relative="1" as="geometry"/>
<JsonProperty as="data">
{"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"}
</JsonProperty>
</mxCell>
<mxCell id="32" value="开始抓取" style="request" parent="1" vertex="1">
<mxGeometry x="230" y="80" width="32" height="32" as="geometry"/>
<JsonProperty as="data">
{"value":"开始抓取","loopVariableName":"","method":"POST","sleep":"","timeout":"","response-charset":"","retryCount":"","retryInterval":"","body-type":"form-data","body-content-type":"text/plain","parameter-form-name":["JournalId","SectionId","FromDate","ToDate","SortType"],"parameter-form-type":["text","text","text","text","text"],"parameter-form-description":["","","","",""],"loopCount":"","url":"https://www.frontiersin.org/api/journals/article/filter?index=${item}","proxy":"","request-body":"","parameter-form-filename":["","","","",""],"parameter-form-value":["276","0","","","recentdate"],"follow-redirect":"1","tls-validate":"1","cookie-auto-set":"1","repeat-enable":"0","shape":"request"}
</JsonProperty>
</mxCell>
<mxCell id="33" value="" style="strokeWidth=2;sharp=1;" parent="1" source="27" target="32" edge="1">
<mxGeometry relative="1" as="geometry"/>
<JsonProperty as="data">
{"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"}
</JsonProperty>
</mxCell>
<mxCell id="34" value="" style="strokeWidth=2;sharp=1;" parent="1" source="32" target="15" edge="1">
<mxGeometry relative="1" as="geometry"/>
<JsonProperty as="data">
{"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"}
</JsonProperty>
</mxCell>
</root>
</mxGraphModel>
---------------------这是第二个 先启动第一个,再启动第二个,然后就会一直在while的地方卡住,debug看下来是queue中的Future的isDone都为false,导致选不出max
max的值为Optional.empty
spider.thread.max=4
spider.thread.default=2
spider.job.enable=true 这个是线程的配置 环境是windows10 idea2021.1.3x64 jdk8
请先尝试调大线程数,这个问题稍后我会尝试复现并排查
<mxGraphModel> <root> <mxCell id="0"> <JsonProperty as="data"> {"spiderName":"文献爬虫healio","submit-strategy":"random","threadCount":""} </JsonProperty> </mxCell> <mxCell id="1" parent="0"/> <mxCell id="2" value="开始" style="start" parent="1" vertex="1"> <mxGeometry x="10" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"shape":"start"} </JsonProperty> </mxCell> <mxCell id="11" value="开始抓取" style="request" parent="1" vertex="1"> <mxGeometry x="200" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"开始抓取","loopVariableName":"","method":"GET","sleep":"5000","timeout":"5000","response-charset":"","retryCount":"","retryInterval":"","body-type":"none","body-content-type":"text/plain","loopCount":"","url":"https://www.healio.com/h5news/specialtylanding/searchjson?page=${item+1}&pageId={A2F15EEF-2C14-40FC-A9C5-F54E929EEFD8}","proxy":"","request-body":"","follow-redirect":"1","tls-validate":"1","cookie-auto-set":"1","repeat-enable":"0","shape":"request"} </JsonProperty> </mxCell> <mxCell id="15" value="定义变量" style="variable" parent="1" vertex="1"> <mxGeometry x="360" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"定义变量","loopVariableName":"","variable-name":["cards","titleList"],"variable-description":["",""],"loopCount":"","variable-value":["${json.parse(resp.html).cards}","${json.parse(resp.html).cards}"],"shape":"variable"} </JsonProperty> </mxCell> <mxCell id="16" value="" style="strokeWidth=2;sharp=1;" parent="1" source="11" target="15" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="17" value="循环" style="loop" parent="1" vertex="1"> <mxGeometry x="510" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"循环","loopItem":"","loopVariableName":"index","loopCount":"${list.length(cards)}","loopStart":"0","loopEnd":"-1","shape":"loop"} </JsonProperty> </mxCell> <mxCell id="18" value="" style="strokeWidth=2;sharp=1;" parent="1" source="15" target="17" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="22" value="定义每篇文章的信息" style="variable" parent="1" vertex="1"> <mxGeometry x="510" y="160" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"定义每篇文章的信息","loopVariableName":"","variable-name":["articleDate","articalAbstract","articalUrl","articalTitle"],"variable-description":["发布日期","简介","",""],"loopCount":"","variable-value":["${cards[index].PostedDate}","${cards[index].FirstParagraph}","https://www.healio.com${cards[index].Link}","${cards[index].Title}"],"shape":"variable"} </JsonProperty> </mxCell> <mxCell id="24" value="" style="strokeWidth=2;strokeColor=blue;sharp=1;" parent="1" source="17" target="22" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"blue","condition":"${cards[index].Link != null}","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="27" value="循环" style="loop" parent="1" vertex="1"> <mxGeometry x="90" y="130" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"循环","loopItem":"","loopVariableName":"","loopCount":"4","loopStart":"0","loopEnd":"-1","shape":"loop"} </JsonProperty> </mxCell> <mxCell id="28" value="" style="strokeWidth=2;sharp=1;" parent="1" source="2" target="27" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="29" value="" style="strokeWidth=2;sharp=1;" parent="1" source="27" target="11" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="30" value="输出" style="output" parent="1" vertex="1"> <mxGeometry x="630" y="160" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"输出","loopVariableName":"","datasourceId":"4c05381d67a342390df8b0847a1275c4","tableName":"artical_literature","csvName":"C:/Users/18752/Desktop/新建文本文档 (2)","csvEncoding":"UTF-8","output-name":["article_date","artical_abstract","artical_url","artical_title","batch","artical_type"],"loopCount":"","output-value":["${articleDate}","${articalAbstract}","${articalUrl}","${articalTitle}","${date.format(date.now(),'yyyy-MM-dd')}","healio"],"output-all":"1","output-database":"0","output-csv":"0","shape":"output"} </JsonProperty> </mxCell> <mxCell id="31" value="" style="strokeWidth=2;sharp=1;" parent="1" source="22" target="30" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> </root> </mxGraphModel>
这是第一个-------------------------------------------------------------
<mxGraphModel> <root> <mxCell id="0"> <JsonProperty as="data"> {"spiderName":"文献爬虫frontiers_in_immunology","submit-strategy":"random","threadCount":""} </JsonProperty> </mxCell> <mxCell id="1" parent="0"/> <mxCell id="2" value="开始" style="start" parent="1" vertex="1"> <mxGeometry x="10" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"shape":"start"} </JsonProperty> </mxCell> <mxCell id="15" value="定义变量" style="variable" parent="1" vertex="1"> <mxGeometry x="360" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"定义变量","loopVariableName":"","variable-name":["articles"],"variable-description":[""],"loopCount":"","variable-value":["${json.parse(resp.html).ArticleCollection.Articles}"],"shape":"variable"} </JsonProperty> </mxCell> <mxCell id="17" value="循环" style="loop" parent="1" vertex="1"> <mxGeometry x="510" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"循环","loopItem":"","loopVariableName":"index","loopCount":"${list.length(articles)}","loopStart":"0","loopEnd":"-1","shape":"loop"} </JsonProperty> </mxCell> <mxCell id="18" value="" style="strokeWidth=2;sharp=1;" parent="1" source="15" target="17" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="22" value="定义每篇文章的信息" style="variable" parent="1" vertex="1"> <mxGeometry x="510" y="160" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"定义每篇文章的信息","loopVariableName":"","variable-name":["articleDate","articalAbstract","articalUrl","articalTitle","articalauthor","doi"],"variable-description":["发布日期","简介","","","",""],"loopCount":"","variable-value":["${articles[index].OnlineSinceDate}","${articles[index].OnlineSinceDate}","${articles[index].Url}","${articles[index].Title}","${json.stringify(articles[index].Authors)}","${articles[index].DOI}"],"shape":"variable"} </JsonProperty> </mxCell> <mxCell id="24" value="" style="strokeWidth=2;strokeColor=black;sharp=1;" parent="1" source="17" target="22" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="25" value="输出" style="output" parent="1" vertex="1"> <mxGeometry x="510" y="240" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"输出","loopVariableName":"","datasourceId":"4c05381d67a342390df8b0847a1275c4","tableName":"artical_literature","csvName":"C:/Users/18752/Desktop/新建文本文档 (2)","csvEncoding":"UTF-8","output-name":["article_date","artical_abstract","artical_url","artical_title","batch","artical_type","artical_author","doi"],"loopCount":"","output-value":["${articleDate}","${articalAbstract}","${articalUrl}","${articalTitle}","${date.format(date.now(),'yyyy-MM-dd')}","frontiers_in_immunology","${articalauthor}","${doi}"],"output-all":"1","output-database":"0","output-csv":"0","shape":"output"} </JsonProperty> </mxCell> <mxCell id="26" value="" style="strokeWidth=2;sharp=1;" parent="1" source="22" target="25" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="27" value="循环" style="loop" parent="1" vertex="1"> <mxGeometry x="90" y="130" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"循环","loopItem":"","loopVariableName":"","loopCount":"5","loopStart":"0","loopEnd":"-1","shape":"loop"} </JsonProperty> </mxCell> <mxCell id="28" value="" style="strokeWidth=2;sharp=1;" parent="1" source="2" target="27" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="32" value="开始抓取" style="request" parent="1" vertex="1"> <mxGeometry x="230" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"开始抓取","loopVariableName":"","method":"POST","sleep":"","timeout":"","response-charset":"","retryCount":"","retryInterval":"","body-type":"form-data","body-content-type":"text/plain","parameter-form-name":["JournalId","SectionId","FromDate","ToDate","SortType"],"parameter-form-type":["text","text","text","text","text"],"parameter-form-description":["","","","",""],"loopCount":"","url":"https://www.frontiersin.org/api/journals/article/filter?index=${item}","proxy":"","request-body":"","parameter-form-filename":["","","","",""],"parameter-form-value":["276","0","","","recentdate"],"follow-redirect":"1","tls-validate":"1","cookie-auto-set":"1","repeat-enable":"0","shape":"request"} </JsonProperty> </mxCell> <mxCell id="33" value="" style="strokeWidth=2;sharp=1;" parent="1" source="27" target="32" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="34" value="" style="strokeWidth=2;sharp=1;" parent="1" source="32" target="15" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> </root> </mxGraphModel>
---------------------这是第二个 先启动第一个,再启动第二个,然后就会一直在while的地方卡住,debug看下来是queue中的Future的isDone都为false,导致选不出max
max的值为Optional.empty
平台最大线程数 spider.thread.max=4 #单任务默认最大线程数 spider.thread.default=2 #设置为true时定时任务才生效 spider.job.enable=true 这个是线程的配置 环境是windows10 idea2021.1.3x64 jdk8
首先说明的是:这不是 bug。简单说一下机制:每个流程都会启动两个线程工作,一个线程阻塞等待结果,一个线程用来调度,因此如果你同时启动两个流程,至少要保证最大线程数的限制大于 4。你可以试着将最大限制调整成 5 再尝试一下。
<mxGraphModel> <root> <mxCell id="0"> <JsonProperty as="data"> {"spiderName":"文献爬虫healio","submit-strategy":"random","threadCount":""} </JsonProperty> </mxCell> <mxCell id="1" parent="0"/> <mxCell id="2" value="开始" style="start" parent="1" vertex="1"> <mxGeometry x="10" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"shape":"start"} </JsonProperty> </mxCell> <mxCell id="11" value="开始抓取" style="request" parent="1" vertex="1"> <mxGeometry x="200" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"开始抓取","loopVariableName":"","method":"GET","sleep":"5000","timeout":"5000","response-charset":"","retryCount":"","retryInterval":"","body-type":"none","body-content-type":"text/plain","loopCount":"","url":"https://www.healio.com/h5news/specialtylanding/searchjson?page=${item+1}&pageId={A2F15EEF-2C14-40FC-A9C5-F54E929EEFD8}","proxy":"","request-body":"","follow-redirect":"1","tls-validate":"1","cookie-auto-set":"1","repeat-enable":"0","shape":"request"} </JsonProperty> </mxCell> <mxCell id="15" value="定义变量" style="variable" parent="1" vertex="1"> <mxGeometry x="360" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"定义变量","loopVariableName":"","variable-name":["cards","titleList"],"variable-description":["",""],"loopCount":"","variable-value":["${json.parse(resp.html).cards}","${json.parse(resp.html).cards}"],"shape":"variable"} </JsonProperty> </mxCell> <mxCell id="16" value="" style="strokeWidth=2;sharp=1;" parent="1" source="11" target="15" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="17" value="循环" style="loop" parent="1" vertex="1"> <mxGeometry x="510" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"循环","loopItem":"","loopVariableName":"index","loopCount":"${list.length(cards)}","loopStart":"0","loopEnd":"-1","shape":"loop"} </JsonProperty> </mxCell> <mxCell id="18" value="" style="strokeWidth=2;sharp=1;" parent="1" source="15" target="17" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="22" value="定义每篇文章的信息" style="variable" parent="1" vertex="1"> <mxGeometry x="510" y="160" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"定义每篇文章的信息","loopVariableName":"","variable-name":["articleDate","articalAbstract","articalUrl","articalTitle"],"variable-description":["发布日期","简介","",""],"loopCount":"","variable-value":["${cards[index].PostedDate}","${cards[index].FirstParagraph}","https://www.healio.com${cards[index].Link}","${cards[index].Title}"],"shape":"variable"} </JsonProperty> </mxCell> <mxCell id="24" value="" style="strokeWidth=2;strokeColor=blue;sharp=1;" parent="1" source="17" target="22" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"blue","condition":"${cards[index].Link != null}","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="27" value="循环" style="loop" parent="1" vertex="1"> <mxGeometry x="90" y="130" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"循环","loopItem":"","loopVariableName":"","loopCount":"4","loopStart":"0","loopEnd":"-1","shape":"loop"} </JsonProperty> </mxCell> <mxCell id="28" value="" style="strokeWidth=2;sharp=1;" parent="1" source="2" target="27" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="29" value="" style="strokeWidth=2;sharp=1;" parent="1" source="27" target="11" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="30" value="输出" style="output" parent="1" vertex="1"> <mxGeometry x="630" y="160" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"输出","loopVariableName":"","datasourceId":"4c05381d67a342390df8b0847a1275c4","tableName":"artical_literature","csvName":"C:/Users/18752/Desktop/新建文本文档 (2)","csvEncoding":"UTF-8","output-name":["article_date","artical_abstract","artical_url","artical_title","batch","artical_type"],"loopCount":"","output-value":["${articleDate}","${articalAbstract}","${articalUrl}","${articalTitle}","${date.format(date.now(),'yyyy-MM-dd')}","healio"],"output-all":"1","output-database":"0","output-csv":"0","shape":"output"} </JsonProperty> </mxCell> <mxCell id="31" value="" style="strokeWidth=2;sharp=1;" parent="1" source="22" target="30" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> </root> </mxGraphModel>
这是第一个-------------------------------------------------------------
<mxGraphModel> <root> <mxCell id="0"> <JsonProperty as="data"> {"spiderName":"文献爬虫frontiers_in_immunology","submit-strategy":"random","threadCount":""} </JsonProperty> </mxCell> <mxCell id="1" parent="0"/> <mxCell id="2" value="开始" style="start" parent="1" vertex="1"> <mxGeometry x="10" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"shape":"start"} </JsonProperty> </mxCell> <mxCell id="15" value="定义变量" style="variable" parent="1" vertex="1"> <mxGeometry x="360" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"定义变量","loopVariableName":"","variable-name":["articles"],"variable-description":[""],"loopCount":"","variable-value":["${json.parse(resp.html).ArticleCollection.Articles}"],"shape":"variable"} </JsonProperty> </mxCell> <mxCell id="17" value="循环" style="loop" parent="1" vertex="1"> <mxGeometry x="510" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"循环","loopItem":"","loopVariableName":"index","loopCount":"${list.length(articles)}","loopStart":"0","loopEnd":"-1","shape":"loop"} </JsonProperty> </mxCell> <mxCell id="18" value="" style="strokeWidth=2;sharp=1;" parent="1" source="15" target="17" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="22" value="定义每篇文章的信息" style="variable" parent="1" vertex="1"> <mxGeometry x="510" y="160" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"定义每篇文章的信息","loopVariableName":"","variable-name":["articleDate","articalAbstract","articalUrl","articalTitle","articalauthor","doi"],"variable-description":["发布日期","简介","","","",""],"loopCount":"","variable-value":["${articles[index].OnlineSinceDate}","${articles[index].OnlineSinceDate}","${articles[index].Url}","${articles[index].Title}","${json.stringify(articles[index].Authors)}","${articles[index].DOI}"],"shape":"variable"} </JsonProperty> </mxCell> <mxCell id="24" value="" style="strokeWidth=2;strokeColor=black;sharp=1;" parent="1" source="17" target="22" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="25" value="输出" style="output" parent="1" vertex="1"> <mxGeometry x="510" y="240" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"输出","loopVariableName":"","datasourceId":"4c05381d67a342390df8b0847a1275c4","tableName":"artical_literature","csvName":"C:/Users/18752/Desktop/新建文本文档 (2)","csvEncoding":"UTF-8","output-name":["article_date","artical_abstract","artical_url","artical_title","batch","artical_type","artical_author","doi"],"loopCount":"","output-value":["${articleDate}","${articalAbstract}","${articalUrl}","${articalTitle}","${date.format(date.now(),'yyyy-MM-dd')}","frontiers_in_immunology","${articalauthor}","${doi}"],"output-all":"1","output-database":"0","output-csv":"0","shape":"output"} </JsonProperty> </mxCell> <mxCell id="26" value="" style="strokeWidth=2;sharp=1;" parent="1" source="22" target="25" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="27" value="循环" style="loop" parent="1" vertex="1"> <mxGeometry x="90" y="130" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"循环","loopItem":"","loopVariableName":"","loopCount":"5","loopStart":"0","loopEnd":"-1","shape":"loop"} </JsonProperty> </mxCell> <mxCell id="28" value="" style="strokeWidth=2;sharp=1;" parent="1" source="2" target="27" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="32" value="开始抓取" style="request" parent="1" vertex="1"> <mxGeometry x="230" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"开始抓取","loopVariableName":"","method":"POST","sleep":"","timeout":"","response-charset":"","retryCount":"","retryInterval":"","body-type":"form-data","body-content-type":"text/plain","parameter-form-name":["JournalId","SectionId","FromDate","ToDate","SortType"],"parameter-form-type":["text","text","text","text","text"],"parameter-form-description":["","","","",""],"loopCount":"","url":"https://www.frontiersin.org/api/journals/article/filter?index=${item}","proxy":"","request-body":"","parameter-form-filename":["","","","",""],"parameter-form-value":["276","0","","","recentdate"],"follow-redirect":"1","tls-validate":"1","cookie-auto-set":"1","repeat-enable":"0","shape":"request"} </JsonProperty> </mxCell> <mxCell id="33" value="" style="strokeWidth=2;sharp=1;" parent="1" source="27" target="32" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="34" value="" style="strokeWidth=2;sharp=1;" parent="1" source="32" target="15" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> </root> </mxGraphModel>
---------------------这是第二个 先启动第一个,再启动第二个,然后就会一直在while的地方卡住,debug看下来是queue中的Future的isDone都为false,导致选不出max max的值为Optional.empty
平台最大线程数 spider.thread.max=4 #单任务默认最大线程数 spider.thread.default=2 #设置为true时定时任务才生效 spider.job.enable=true 这个是线程的配置 环境是windows10 idea2021.1.3x64 jdk8
首先说明的是:这不是 bug。简单说一下机制:每个流程都会启动两个线程工作,一个线程阻塞等待结果,一个线程用来调度,因此如果你同时启动两个流程,至少要保证最大线程数的限制大于 4。你可以试着将最大限制调整成 5 再尝试一下。
可以了,谢谢大佬,然后有对应的群吗?想加群,也想了解下您的新项目slime
<mxGraphModel> <root> <mxCell id="0"> <JsonProperty as="data"> {"spiderName":"文献爬虫healio","submit-strategy":"random","threadCount":""} </JsonProperty> </mxCell> <mxCell id="1" parent="0"/> <mxCell id="2" value="开始" style="start" parent="1" vertex="1"> <mxGeometry x="10" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"shape":"start"} </JsonProperty> </mxCell> <mxCell id="11" value="开始抓取" style="request" parent="1" vertex="1"> <mxGeometry x="200" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"开始抓取","loopVariableName":"","method":"GET","sleep":"5000","timeout":"5000","response-charset":"","retryCount":"","retryInterval":"","body-type":"none","body-content-type":"text/plain","loopCount":"","url":"https://www.healio.com/h5news/specialtylanding/searchjson?page=${item+1}&pageId={A2F15EEF-2C14-40FC-A9C5-F54E929EEFD8}","proxy":"","request-body":"","follow-redirect":"1","tls-validate":"1","cookie-auto-set":"1","repeat-enable":"0","shape":"request"} </JsonProperty> </mxCell> <mxCell id="15" value="定义变量" style="variable" parent="1" vertex="1"> <mxGeometry x="360" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"定义变量","loopVariableName":"","variable-name":["cards","titleList"],"variable-description":["",""],"loopCount":"","variable-value":["${json.parse(resp.html).cards}","${json.parse(resp.html).cards}"],"shape":"variable"} </JsonProperty> </mxCell> <mxCell id="16" value="" style="strokeWidth=2;sharp=1;" parent="1" source="11" target="15" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="17" value="循环" style="loop" parent="1" vertex="1"> <mxGeometry x="510" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"循环","loopItem":"","loopVariableName":"index","loopCount":"${list.length(cards)}","loopStart":"0","loopEnd":"-1","shape":"loop"} </JsonProperty> </mxCell> <mxCell id="18" value="" style="strokeWidth=2;sharp=1;" parent="1" source="15" target="17" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="22" value="定义每篇文章的信息" style="variable" parent="1" vertex="1"> <mxGeometry x="510" y="160" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"定义每篇文章的信息","loopVariableName":"","variable-name":["articleDate","articalAbstract","articalUrl","articalTitle"],"variable-description":["发布日期","简介","",""],"loopCount":"","variable-value":["${cards[index].PostedDate}","${cards[index].FirstParagraph}","https://www.healio.com${cards[index].Link}","${cards[index].Title}"],"shape":"variable"} </JsonProperty> </mxCell> <mxCell id="24" value="" style="strokeWidth=2;strokeColor=blue;sharp=1;" parent="1" source="17" target="22" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"blue","condition":"${cards[index].Link != null}","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="27" value="循环" style="loop" parent="1" vertex="1"> <mxGeometry x="90" y="130" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"循环","loopItem":"","loopVariableName":"","loopCount":"4","loopStart":"0","loopEnd":"-1","shape":"loop"} </JsonProperty> </mxCell> <mxCell id="28" value="" style="strokeWidth=2;sharp=1;" parent="1" source="2" target="27" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="29" value="" style="strokeWidth=2;sharp=1;" parent="1" source="27" target="11" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="30" value="输出" style="output" parent="1" vertex="1"> <mxGeometry x="630" y="160" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"输出","loopVariableName":"","datasourceId":"4c05381d67a342390df8b0847a1275c4","tableName":"artical_literature","csvName":"C:/Users/18752/Desktop/新建文本文档 (2)","csvEncoding":"UTF-8","output-name":["article_date","artical_abstract","artical_url","artical_title","batch","artical_type"],"loopCount":"","output-value":["${articleDate}","${articalAbstract}","${articalUrl}","${articalTitle}","${date.format(date.now(),'yyyy-MM-dd')}","healio"],"output-all":"1","output-database":"0","output-csv":"0","shape":"output"} </JsonProperty> </mxCell> <mxCell id="31" value="" style="strokeWidth=2;sharp=1;" parent="1" source="22" target="30" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> </root> </mxGraphModel>
这是第一个-------------------------------------------------------------
<mxGraphModel> <root> <mxCell id="0"> <JsonProperty as="data"> {"spiderName":"文献爬虫frontiers_in_immunology","submit-strategy":"random","threadCount":""} </JsonProperty> </mxCell> <mxCell id="1" parent="0"/> <mxCell id="2" value="开始" style="start" parent="1" vertex="1"> <mxGeometry x="10" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"shape":"start"} </JsonProperty> </mxCell> <mxCell id="15" value="定义变量" style="variable" parent="1" vertex="1"> <mxGeometry x="360" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"定义变量","loopVariableName":"","variable-name":["articles"],"variable-description":[""],"loopCount":"","variable-value":["${json.parse(resp.html).ArticleCollection.Articles}"],"shape":"variable"} </JsonProperty> </mxCell> <mxCell id="17" value="循环" style="loop" parent="1" vertex="1"> <mxGeometry x="510" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"循环","loopItem":"","loopVariableName":"index","loopCount":"${list.length(articles)}","loopStart":"0","loopEnd":"-1","shape":"loop"} </JsonProperty> </mxCell> <mxCell id="18" value="" style="strokeWidth=2;sharp=1;" parent="1" source="15" target="17" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="22" value="定义每篇文章的信息" style="variable" parent="1" vertex="1"> <mxGeometry x="510" y="160" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"定义每篇文章的信息","loopVariableName":"","variable-name":["articleDate","articalAbstract","articalUrl","articalTitle","articalauthor","doi"],"variable-description":["发布日期","简介","","","",""],"loopCount":"","variable-value":["${articles[index].OnlineSinceDate}","${articles[index].OnlineSinceDate}","${articles[index].Url}","${articles[index].Title}","${json.stringify(articles[index].Authors)}","${articles[index].DOI}"],"shape":"variable"} </JsonProperty> </mxCell> <mxCell id="24" value="" style="strokeWidth=2;strokeColor=black;sharp=1;" parent="1" source="17" target="22" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="25" value="输出" style="output" parent="1" vertex="1"> <mxGeometry x="510" y="240" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"输出","loopVariableName":"","datasourceId":"4c05381d67a342390df8b0847a1275c4","tableName":"artical_literature","csvName":"C:/Users/18752/Desktop/新建文本文档 (2)","csvEncoding":"UTF-8","output-name":["article_date","artical_abstract","artical_url","artical_title","batch","artical_type","artical_author","doi"],"loopCount":"","output-value":["${articleDate}","${articalAbstract}","${articalUrl}","${articalTitle}","${date.format(date.now(),'yyyy-MM-dd')}","frontiers_in_immunology","${articalauthor}","${doi}"],"output-all":"1","output-database":"0","output-csv":"0","shape":"output"} </JsonProperty> </mxCell> <mxCell id="26" value="" style="strokeWidth=2;sharp=1;" parent="1" source="22" target="25" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="27" value="循环" style="loop" parent="1" vertex="1"> <mxGeometry x="90" y="130" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"循环","loopItem":"","loopVariableName":"","loopCount":"5","loopStart":"0","loopEnd":"-1","shape":"loop"} </JsonProperty> </mxCell> <mxCell id="28" value="" style="strokeWidth=2;sharp=1;" parent="1" source="2" target="27" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="32" value="开始抓取" style="request" parent="1" vertex="1"> <mxGeometry x="230" y="80" width="32" height="32" as="geometry"/> <JsonProperty as="data"> {"value":"开始抓取","loopVariableName":"","method":"POST","sleep":"","timeout":"","response-charset":"","retryCount":"","retryInterval":"","body-type":"form-data","body-content-type":"text/plain","parameter-form-name":["JournalId","SectionId","FromDate","ToDate","SortType"],"parameter-form-type":["text","text","text","text","text"],"parameter-form-description":["","","","",""],"loopCount":"","url":"https://www.frontiersin.org/api/journals/article/filter?index=${item}","proxy":"","request-body":"","parameter-form-filename":["","","","",""],"parameter-form-value":["276","0","","","recentdate"],"follow-redirect":"1","tls-validate":"1","cookie-auto-set":"1","repeat-enable":"0","shape":"request"} </JsonProperty> </mxCell> <mxCell id="33" value="" style="strokeWidth=2;sharp=1;" parent="1" source="27" target="32" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> <mxCell id="34" value="" style="strokeWidth=2;sharp=1;" parent="1" source="32" target="15" edge="1"> <mxGeometry relative="1" as="geometry"/> <JsonProperty as="data"> {"value":"","exception-flow":"0","lineWidth":"2","line-style":"sharp","lineColor":"black","condition":"","transmit-variable":"1"} </JsonProperty> </mxCell> </root> </mxGraphModel>
---------------------这是第二个 先启动第一个,再启动第二个,然后就会一直在while的地方卡住,debug看下来是queue中的Future的isDone都为false,导致选不出max max的值为Optional.empty
平台最大线程数 spider.thread.max=4 #单任务默认最大线程数 spider.thread.default=2 #设置为true时定时任务才生效 spider.job.enable=true 这个是线程的配置 环境是windows10 idea2021.1.3x64 jdk8
首先说明的是:这不是 bug。简单说一下机制:每个流程都会启动两个线程工作,一个线程阻塞等待结果,一个线程用来调度,因此如果你同时启动两个流程,至少要保证最大线程数的限制大于 4。你可以试着将最大限制调整成 5 再尝试一下。
可以了,谢谢大佬,然后有对应的群吗?想加群,也想了解下您的新项目slime
群你可以在 issue 里边找找,不过好像加不了了,我不是管理员,不太清楚。还有就是 slime 并不算新项目,只是对 spider-flow 的精简和重构,方便开箱即用和服务端部署的。
filter的时候没数据