Closed intrigus-lgtm closed 1 year ago
I got:
56 pending pages
visiting: https://ctf.kitctf.de/teams
done: /tmp/gpnctf-21/teams.html
done: /tmp/gpnctf-21/themes/factory/static/assets/teams_list.727a715a.js
106 pending pages
visiting: https://ctf.kitctf.de/scoreboard
https://ctf.kitctf.de/scoreboard /tmp/gpnctf-21/scoreboard.html
ProtocolError: Protocol error (Network.getResponseBody): Request content was evicted from inspector cache
at new Callback (/home/zhuyifei1999/ctfd2pages/node_modules/puppeteer-core/lib/cjs/puppeteer/common/Connection.js:62:35)
at CallbackRegistry.create (/home/zhuyifei1999/ctfd2pages/node_modules/puppeteer-core/lib/cjs/puppeteer/common/Connection.js:108:26)
at Connection._rawSend (/home/zhuyifei1999/ctfd2pages/node_modules/puppeteer-core/lib/cjs/puppeteer/common/Connection.js:224:26)
at CDPSessionImpl.send (/home/zhuyifei1999/ctfd2pages/node_modules/puppeteer-core/lib/cjs/puppeteer/common/Connection.js:433:78)
at /home/zhuyifei1999/ctfd2pages/node_modules/puppeteer-core/lib/cjs/puppeteer/common/HTTPResponse.js:97:100
at async /home/zhuyifei1999/ctfd2pages/01_scrape/index.js:184:24
node:internal/process/promises:289
triggerUncaughtException(err, true /* fromPromise */);
^
ProtocolError: Protocol error (Network.getResponseBody): Request content was evicted from inspector cache
at new Callback (/home/zhuyifei1999/ctfd2pages/node_modules/puppeteer-core/lib/cjs/puppeteer/common/Connection.js:62:35)
at CallbackRegistry.create (/home/zhuyifei1999/ctfd2pages/node_modules/puppeteer-core/lib/cjs/puppeteer/common/Connection.js:108:26)
at Connection._rawSend (/home/zhuyifei1999/ctfd2pages/node_modules/puppeteer-core/lib/cjs/puppeteer/common/Connection.js:224:26)
at CDPSessionImpl.send (/home/zhuyifei1999/ctfd2pages/node_modules/puppeteer-core/lib/cjs/puppeteer/common/Connection.js:433:78)
at /home/zhuyifei1999/ctfd2pages/node_modules/puppeteer-core/lib/cjs/puppeteer/common/HTTPResponse.js:97:100
at async /home/zhuyifei1999/ctfd2pages/01_scrape/index.js:184:24
Node.js v20.3.1
I did:
diff --git a/01_scrape/index.js b/01_scrape/index.js
index 93d6dde..0fd2e77 100644
--- a/01_scrape/index.js
+++ b/01_scrape/index.js
@@ -246,6 +246,14 @@ class PageHandler {
const page = await this.browser.newPage();
this.browseCompleted = new HeartBeat();
+ // Allow fetching large resources
+ // https://github.com/puppeteer/puppeteer/issues/1599#issuecomment-355473214
+ // https://github.com/puppeteer/puppeteer/issues/6647#issuecomment-1610949415
+ page._client().send('Network.enable', {
+ maxResourceBufferSize: 100 << 20,
+ maxTotalBufferSize: 200 << 20,
+ });
+
this.setHooks(page);
console.log('visiting:', this.pageUrl);
But it still seems to get stuck on scoreboard.
But it still seems to get stuck on scoreboard.
Seems to be caused by https://ctf.kitctf.de/stream. Is this a ctfd plugin?
Also what is this scoreboard plugin? It looks so cool
Added this patch to ignore text/event-stream
diff --git a/01_scrape/index.js b/01_scrape/index.js
index 93d6dde..86f6a39 100644
--- a/01_scrape/index.js
+++ b/01_scrape/index.js
@@ -204,6 +204,30 @@ class PageHandler {
}
}
});
+
+ // consider pages with text/event-stream failed since they never finish
+ // responding and cannot be archived.
+ page.on('response', (response) => {
+ const request = response.request();
+ const requestUrl = request.url();
+
+ let contenttype = response.headers()['content-type'];
+ if (!contenttype) {
+ return;
+ }
+ if (contenttype.includes(';')) {
+ contenttype = contenttype.substring(0, contenttype.indexOf(';'));
+ }
+
+ if (contenttype === 'text/event-stream') {
+ if (requestUrl.startsWith(this.parent.origin)) {
+ this.pendingRequests.delete(requestUrl);
+ heartbeat();
+
+ this.parent.completedPaths.add(this.parent.urlToPath(requestUrl));
+ }
+ }
+ });
}
async handleSpecials(page) {
After the above patch I ran it overnight. It did finish:
1 pending pages
visiting: https://ctf.kitctf.de/users?page=27
done: /tmp/gpnctf-21/users?page=27
7 pending pages
visiting: https://ctf.kitctf.de/users/1335
done: /tmp/gpnctf-21/users/1335.html
6 pending pages
visiting: https://ctf.kitctf.de/users/1336
done: /tmp/gpnctf-21/users/1336.html
5 pending pages
visiting: https://ctf.kitctf.de/users/1339
done: /tmp/gpnctf-21/users/1339.html
4 pending pages
visiting: https://ctf.kitctf.de/users/1348
done: /tmp/gpnctf-21/users/1348.html
3 pending pages
visiting: https://ctf.kitctf.de/users/1349
done: /tmp/gpnctf-21/users/1349.html
2 pending pages
visiting: https://ctf.kitctf.de/users/1350
done: /tmp/gpnctf-21/users/1350.html
1 pending pages
visiting: https://ctf.kitctf.de/users/1351
done: /tmp/gpnctf-21/users/1351.html
done: /tmp/gpnctf-21/files/2c30d6d694d70eaf77351188b8e3e276/ref4ctory.tar.gz
done: /tmp/gpnctf-21/files/4df80a13ec1695740d9b4c324da78d5d/diffHell.tar.gz
done: /tmp/gpnctf-21/files/9030c1f3e30f28cd5dd4b3df0a36d3d1/winterfactory.tar.gz
done: /tmp/gpnctf-21/files/ea3f8f7f51d1815ccd6fec82000cdaf6/number-lock.tar.gz
done: /tmp/gpnctf-21/files/21a8aa7daf8995989679cb6fcab5861f/one-true-pairing.tar.gz
[...]
done: /tmp/gpnctf-21/files/6be053e4c8ce505f571483609a4b8971/dot-shortage.tar.gz
done: /tmp/gpnctf-21/files/3d41512d0157132b1d604386d67abd33/icefox.tar.gz
done: /tmp/gpnctf-21/files/c7036f154b9e5f93853f0afd1cd02081/js.tar.gz
done: /tmp/gpnctf-21/files/41c99be924e06a092e315cb2703f37d5/type-this.tar.gz
+ git -C /tmp/gpnctf-21 add -A
+ commit_simple 'Initial commit: Add scraped site'
+ git -C /tmp/gpnctf-21 commit -m 'Initial commit: Add scraped site'
[main (root-commit) d463918] Initial commit: Add scraped site
5507 files changed, 957196 insertions(+)
create mode 100644 404.html
create mode 100644 api/v1/challenges.json
create mode 100644 api/v1/challenges/10.json
create mode 100644 api/v1/challenges/10/solves.json
create mode 100644 api/v1/challenges/11.json
create mode 100644 api/v1/challenges/11/solves.json
create mode 100644 api/v1/challenges/14.json
create mode 100644 api/v1/challenges/14/solves.json
[...]
It works for me with the above patch :tada:
However, I have a problem in stage 05. It looks like we added some extraneous space and the nodefinder does not work anymore. I only had to change line 45, but for consistency maybe line 40 should also be changed.
diff --git a/05_easter_404/index.js b/05_easter_404/index.js
index ce1bfc6..8d2d866 100644
--- a/05_easter_404/index.js
+++ b/05_easter_404/index.js
@@ -42,7 +42,7 @@ const main = async function() {
findReplace(
'Powered by CTFd',
'Powered by <s>CTFd</s> GitHub Pages',
- (src) => textNodes.filter((node) => node.textContent === src),
+ (src) => textNodes.filter((node) => node.textContent.trim() === src),
);
findReplace(
'https://ctfd.io',
Also what is this scoreboard plugin? It looks so cool
As far as I know It's based on https://github.com/itszn/ctfd-matrix-scoreboard-plugin/ but it has been modified.
Awesome! Thanks for testing!
Running
./stage 01
does not finish after 778523ab94951542a57f7961d678e30dc2d1eb22. (I bisected it usinggit bisect
bad: 6462c1c9ccd6796e2bc18a8a01faef18217c823c, good: d5212df1499c0e6186753723bd5ccbf16fd5cde7)Reproduce:
In my case, the log ends like this and it just hangs:
I will produce an
strace
log the next day in case you can not reproduce this. (I wanted to create an issue for theglob
stuff, but you already fixed that :tada:)