berstend / puppeteer-extra

💯 Teach puppeteer new tricks through plugins.
https://extra.community
MIT License
6.37k stars 736 forks source link

[Bug] production build cant launch puppeteer #704

Open mihail727 opened 2 years ago

mihail727 commented 2 years ago

Describe the bug

I am writing a web application for scraping a site using the Nuxt 3 framework. in development mode, everything works, but in the production build, the server throws an error:

Code Snippet

my puppeteer service ```ts import puppeteer from 'puppeteer-extra'; import AdblockerPlugin from 'puppeteer-extra-plugin-adblocker'; import StealthPlugin from 'puppeteer-extra-plugin-stealth'; import BlockResourcesPlugin from 'puppeteer-extra-plugin-block-resources'; import proxyChain from 'proxy-chain'; import type { Browser } from 'puppeteer'; const ProxyUrl = 'xxxxxxxx'; const userAgents = [ 'Mozilla/5.0 (Linux; Android 4.4; Nexus 5 Build/_BuildID_) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/30.0.0.0 Mobile Safari/537.36', 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) CriOS/56.0.2924.75 Mobile/14E5239e Safari/602.1', 'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19', 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1', ]; export async function getPagesContent(urls: string[], targetSelector: string) { const newProxyUrl = await proxyChain.anonymizeProxy(ProxyUrl); puppeteer.use(StealthPlugin()); puppeteer.use(AdblockerPlugin({ blockTrackers: true, blockTrackersAndAnnoyances: true })); puppeteer.use( BlockResourcesPlugin({ blockedTypes: new Set(['font', 'image']), }), ); try { const browser = await puppeteer.launch({ headless: false, ignoreHTTPSErrors: true, defaultViewport: { height: 800, width: 480, }, args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-accelerated-2d-canvas', '--disable-gpu', '--window-size=480,800', '--lang=ru,ru-RU,en-US,en', `--proxy-server=${newProxyUrl}`, ], executablePath: '/usr/bin/chromium-browser', }); const currentUserAgent = getUserAgent(); const reqList: Promise[] = []; console.log(urls); urls?.forEach?.((url) => { reqList.push(getPageContent(browser, url, targetSelector, currentUserAgent)); }); const data = await Promise.all(reqList); await browser.close(); await proxyChain.closeAnonymizedProxy(newProxyUrl, true); return data as string[]; } catch (error) { throw error; } } async function getPageContent( browser: Browser, url: string, targetSelector: string, userAgent: string, ) { const page = await browser.newPage(); await page.setUserAgent(userAgent); await page.setExtraHTTPHeaders({ 'Accept-Language': 'ru,ru-RU,en-US,en', }); await page.evaluateOnNewDocument(() => { Object.defineProperty(navigator, 'language', { get: function () { return 'ru-RU'; }, }); Object.defineProperty(navigator, 'languages', { get: function () { return ['ru', 'ru-RU', 'en-US', 'en']; }, }); }); await page.goto(url, { waitUntil: 'networkidle2' }); const searchedBlock = await page.waitForSelector(targetSelector); let content = await searchedBlock.evaluate((el) => el.textContent); content = content.replaceAll(/[₽\s]/gm, '').trim(); await page.close(); return content; } function getUserAgent() { return userAgents[Math.floor(Math.random() * userAgents.length)]; } ```
my package.json ```json { "private": true, "version": "0.0.0", "scripts": { "build": "nuxt build", "dev": "nuxt dev", "generate": "nuxt generate", "preview": "nuxt preview ./build", "bns": "yarn build; yarn preview" }, "devDependencies": { "@heroicons/vue": "^2.0.10", "@nuxtjs/tailwindcss": "^5.3.2", "@tailwindcss/forms": "^0.5.3", "@types/bcrypt": "^5.0.0", "@types/jsonwebtoken": "^8.5.9", "@types/jwt-decode": "^3.1.0", "@types/node": "^18.7.15", "@types/nodemailer": "^6.4.5", "@vueuse/core": "^9.1.1", "@vueuse/nuxt": "^9.1.1", "nuxt": "^3.0.0-rc.9", "@types/puppeteer": "^5.4.6", "sass": "^1.54.8" }, "dependencies": { "bcrypt": "^5.0.1", "dotenv": "^16.0.2", "jsonwebtoken": "^8.5.1", "jwt-decode": "^3.1.2", "nodemailer": "^6.7.8", "pg": "^8.8.0", "proxy-chain": "^2.0.7", "puppeteer": "^17.1.1", "puppeteer-extra": "^3.3.4", "puppeteer-extra-plugin-adblocker": "^2.13.5", "puppeteer-extra-plugin-block-resources": "^2.4.2", "puppeteer-extra-plugin-stealth": "^2.11.1", "reflect-metadata": "^0.1.13", "typeorm": "^0.3.9" } } ```

Versions

System: OS: Linux 5.19 Nobara Linux 36 (Thirty Six) CPU: (4) x64 Intel(R) Core(TM) i5-3470 CPU @ 3.20GHz Memory: 8.61 GB / 15.59 GB Container: Yes Shell: 5.8.1 - /usr/bin/zsh Binaries: Node: 16.14.0 - /usr/bin/node Yarn: 1.22.17 - /usr/bin/yarn npm: 8.3.1 - /usr/bin/npm npmPackages: puppeteer: ^17.1.1 => 17.1.1 puppeteer-extra: ^3.3.4 => 3.3.4 puppeteer-extra-plugin-adblocker: ^2.13.5 => 2.13.5 puppeteer-extra-plugin-block-resources: ^2.4.2 => 2.4.2 puppeteer-extra-plugin-stealth: ^2.11.1 => 2.11.1

mihail727 commented 1 year ago

the solution for me was - installing on the hosting in the project folder, after the project was built, two dependencies yarn add puppeteer-extra-plugin-user-preferences yarn add puppeteer-extra-plugin-stealth the most interesting thing is that I don't use puppeteer-extra-plugin-user-preferences inside the project

radoslavkarlik commented 1 year ago

Happened to me after upgrading the stealth plugin and puppeteer-extra. It is listed as a regular dependency though, not a peer-dependency which is just weird. The same for data-dir plugin.