this script utilizes singlefile CLI's api to bulk download pages with the added assistance from puppeteer in case it needs to scroll or interact with the webpage in any way. The URLs file had 8 lines in it, each line being a url for downloading, but the script only visited about half of it:
import { initialize } from './single-file-cli-api.js'
import puppeteer from 'puppeteer-core'
import readline from 'readline'
import fs from 'fs/promises' // Using the Promise-based version of fs
;(async () => {
let browser
try {
browser = await initialize()
console.log(browser)
var conn = browser.apiPort
console.log(`Received browser debug URL: ${conn}`)
console.log('launching browser')
} catch (error) {
console.error('Failed to launch browser:', error)
process.exit(1)
}
await wait(10000)
const pupurl = conn
console.log('Puppeteer attempt URL: ' + pupurl)
let pc
try {
pc = await puppeteer.connect({ browserURL: pupurl })
} catch (error) {
console.error('Failed to connect to Puppeteer:', error)
process.exit(1)
}
let page
try {
page = await pc.newPage()
await page.goto('https://bbs.quantclass.cn')
} catch (error) {
console.error('Failed to open page:', error)
process.exit(1)
}
try {
console.log('Waiting for User Login')
await waitForEnter()
console.log('User Logged In. Continuing...')
} catch (error) {
await browser.finish()
console.error(error.message)
process.exit(1)
}
let urls
try {
const data = await fs.readFile('./urls.txt', 'utf8')
urls = data.split(/\n/)
} catch (error) {
console.error('Failed to read URLs file:', error)
process.exit(1)
}
try {
await browser.capture(urls)
} catch (error) {
console.error('Failed to capture page for URL:', urls, error)
}
console.log('download finished, cleaning up')
await browser.finish()
})()
async function waitForEnter () {
return new Promise((resolve, reject) => {
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout
})
rl.question(
'Press Enter to confirm login (or any other key to exit)...',
input => {
rl.close()
if (input.trim() === '') {
resolve()
} else {
reject(new Error('User entered a value instead of pressing Enter.'))
}
}
)
})
}
function wait (ms) {
return new Promise(resolve => {
setTimeout(() => resolve(), ms)
})
}
Ignore the wierd naming on some of the variables
any suggestions on the code being buggy or optimizable are also welcome (considering I am only a beginner in javascript)
this script utilizes singlefile CLI's api to bulk download pages with the added assistance from puppeteer in case it needs to scroll or interact with the webpage in any way. The URLs file had 8 lines in it, each line being a url for downloading, but the script only visited about half of it:
Ignore the wierd naming on some of the variables any suggestions on the code being buggy or optimizable are also welcome (considering I am only a beginner in javascript)