Closed marcpre closed 2 months ago
Describe the bug
When opening a cloudflare protected page I get this:
Code Snippet
const puppeteer = require("puppeteer-extra"); const StealthPlugin = require("puppeteer-extra-plugin-stealth"); const path = require("path"); const fs = require("fs"); const winston = require("winston"); const DailyRotateFile = require("winston-daily-rotate-file"); const mysql = require("mysql"); const util = require("util"); // Apply the stealth plugin to avoid being detected as a bot const puppeteerStealth = StealthPlugin(); puppeteerStealth.enabledEvasions.delete('user-agent-override'); puppeteer.use(puppeteerStealth); require("dotenv").config({ path: path.resolve(__dirname, "../../.env") }); require("dotenv").config({ path: path.resolve(__dirname, "../.env") }); const connection = mysql.createConnection({ host: process.env.DB_HOST, user: process.env.DB_USERNAME, password: process.env.DB_PASSWORD, database: process.env.DB_DATABASE, port: process.env.DB_PORT }); connection.connect(); const appEnvironment = process.env.APP_ENV; const imgDirectory = path.join(__dirname, "_img"); if (!fs.existsSync(imgDirectory)) { fs.mkdirSync(imgDirectory, { recursive: true }); } const logDirectory = path.join(__dirname, "_logs"); if (!fs.existsSync(logDirectory)) { fs.mkdirSync(logDirectory, { recursive: true }); } const logger = winston.createLogger({ level: "info", format: winston.format.combine( winston.format.label({ label: "quietlight" }), winston.format.timestamp(), winston.format.printf(({ level, message, label, timestamp }) => { return `${timestamp} [${label}] ${level}: ${message}`; }) ), transports: [ new winston.transports.Console(), new DailyRotateFile({ filename: path.join(logDirectory, "quietlight_%DATE%.log"), datePattern: "YYYY-MM-DD", zippedArchive: true, maxSize: "20m", maxFiles: "7d", }), ], }); async function captureScreenshot(page, tag) { // Generate a dynamic filename based on the current time const timestamp = new Date().toISOString().replace(/[^0-9]/g, ""); const filename = `screenshot_${tag}_${timestamp}.png`; const screenshotPath = path.join(imgDirectory, filename); // Try to capture and save the screenshot try { await page.screenshot({ path: screenshotPath, fullPage: true }); logger.info( `Screenshot of the page has been captured and saved to ${screenshotPath}.` ); } catch (error) { logger.error( "An error occurred while capturing a screenshot of the page:", error ); } } // This function calculates the duration and logs it function logDuration(startTime) { const endTime = new Date(); const duration = (endTime - startTime) / 1000; // Duration in seconds logger.info(`Script execution time: ${duration} seconds`); } const startTime = new Date(); logger.info(`############# Starting Quietlight Scrapper #############`); logger.info(`Running on Node.js ${process.version}`); logger.info(`App Environment: ${appEnvironment}`); (async () => { const browser = await puppeteer.launch({ // headless: appEnvironment === "local" ? false : "new", headless: true, // THIS, if you want to see the browser args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-accelerated-2d-canvas', '--disable-gpu', '--lang=en-US,en', // Set language explicitly ], }); try { // const [page] = await browser.pages(); const page = (await browser.pages())[0]; // <-- bypasses Cloudflare await page.setViewport({ width: 1366, height: 768 }); await page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"); await page.setExtraHTTPHeaders({ 'Accept-Language': 'en-US,en;q=0.9' }); page.setDefaultNavigationTimeout(0); await page.goto("https://quietlight.com/", { waitUntil: 'networkidle2' }); captureScreenshot(page, "quietlightPage") await page.waitForSelector("img"); await page.waitForTimeout(10000); } catch (error) { captureScreenshot(page, "errorInMainLoop") logger.error("An error occurred during the script execution:", error); } finally { if (browser) { await browser.close(); } connection.end(); logDuration(startTime); logger.info(`############# End quietlight Scrapper #############`); process.exit(); } })();
Versions
This is my package.json:
{ "name": "quietlight", "version": "1.0.0", "description": "", "main": "quietlight.js", "scripts": { "test": "echo \"Error: no test specified\" && exit 1", "start": "node quietlight.js" }, "keywords": [], "author": "", "license": "ISC", "devDependencies": { "axios": "^1.4.0", "dotenv": "^16.4.5", "minimist": "^1.2.8", "mysql": "^2.18.1", "puppeteer": "^19.11.1", "puppeteer-extra": "^3.3.6", "puppeteer-extra-plugin-stealth": "^2.11.2", "winston": "^3.8.2", "winston-daily-rotate-file": "^4.7.1" }, "dependencies": { "google-auth-library": "^9.6.3", "google-spreadsheet": "^4.1.1" } }
The issue was with my calling of the puppeteer stealth plugin in my script
Describe the bug
When opening a cloudflare protected page I get this:
Code Snippet
Versions
This is my package.json: