berstend / puppeteer-extra

💯 Teach puppeteer new tricks through plugins.
https://extra.community
MIT License
6.44k stars 739 forks source link

[Bug] Page Content does not load #903

Closed marcpre closed 2 months ago

marcpre commented 2 months ago

Describe the bug

When trying to load a page via puppeteer the page content does not load. The page looks like that:

image

Code Snippet

const puppeteer = require("puppeteer-extra");
const StealthPlugin = require("puppeteer-extra-plugin-stealth");

puppeteer.use(StealthPlugin());

(async () => {
    const browser = await puppeteer.launch({
        headless: true,
        args: [
            "--no-sandbox",
            "--disable-setuid-sandbox",
            "--disable-dev-shm-usage",
            "--disable-accelerated-2d-canvas",
            "--disable-gpu",
        ],
    });

    try {
        const page = await browser.newPage();
        await page.setViewport({ width: 1366, height: 768 });
        await page.setUserAgent(
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
        );

        await page.setExtraHTTPHeaders({
            'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate, br, zstd',
            'Accept-Language': 'de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7,nl;q=0.6',
            'Priority': 'i',
            'Referer': 'https://www.semrush.com/',
            'Sec-Ch-Ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
            'Sec-Ch-Ua-Mobile': '?0',
            'Sec-Ch-Ua-Platform': '"Windows"',
            'Sec-Fetch-Dest': 'image',
            'Sec-Fetch-Mode': 'no-cors',
            'Sec-Fetch-Site': 'cross-site'
        });

        const pageUrl = "https://www.semrush.com/trending-websites/global/all";
        await page.goto(pageUrl, { waitUntil: "networkidle2" });

        console.log("Page loaded successfully");
    } catch (error) {
        console.error("An error occurred:", error);
    } finally {
        await browser.close();
    }
})();

Versions

These are my dependencies in my package.json:

  "devDependencies": {
    "axios": "^1.4.0",
    "dotenv": "^16.4.5",
    "minimist": "^1.2.8",
    "mysql": "^2.18.1",
    "puppeteer": "^19.11.1",
    "puppeteer-extra": "^3.3.6",
    "puppeteer-extra-plugin-stealth": "^2.11.2",
    "winston": "^3.8.2",
    "winston-daily-rotate-file": "^4.7.1"
  },
  "dependencies": {
    "csv-writer": "^1.6.0",
    "google-auth-library": "^9.6.3",
    "google-spreadsheet": "^4.1.1"
  }

This is my system:


  System:
    OS: Linux 5.15 Ubuntu 20.04.6 LTS (Focal Fossa)
    CPU: (4) x64 11th Gen Intel(R) Core(TM) i9-11900 @ 2.50GHz
    Memory: 20.59 GB / 23.87 GB
    Container: Yes
    Shell: 5.0.17 - /bin/bash
  Binaries:
    Node: 16.15.0 - /usr/local/bin/node
    npm: 9.6.3 - /usr/local/bin/npm
  npmPackages:
    puppeteer: ^19.11.1 => 19.11.1 
    puppeteer-extra: ^3.3.6 => 3.3.6 
    puppeteer-extra-plugin-stealth: ^2.11.2 => 2.11.2 
vladtreny commented 2 months ago

Try

// puppeteer.use(StealthPlugin())  <-- remove this
        const stealth = StealthPlugin()
        stealth.enabledEvasions.delete('iframe.contentWindow')
        puppeteer.use(stealth)
marcpre commented 2 months ago

Thank you. Works