thomasdondorf / puppeteer-cluster

Puppeteer Pool, run a cluster of instances in parallel
MIT License
3.24k stars 310 forks source link

Use same URL but diffetent logic on each browser #519

Closed webmip closed 1 year ago

webmip commented 1 year ago

Let's say something like that. I need the code using puppeteer-cluster to call the same url (http://google.vom) at the same time (in parallel) but on each instance perform a different action. And in order to call my actions I want to use express to create a simple api who allow me to run one instance of the cluster individually or all at the same time.

Example I have logic1.js and logic2.js

I would like to ru call http://localhost.com/api/1 and execute logic1.js or call http://localhost.com/all and execute logic1.js and logic2.js at the same time.

I have something like that but is not working properly

import express from 'express';
import { Cluster } from 'puppeteer-cluster';
import performAction from './actions.js';
import useProxy from 'puppeteer-page-proxy';

const app = express();
const port = 3000;

app.get('/run-all', async (req, res) => {

  const cluster = await Cluster.launch({
    concurrency: Cluster.CONCURRENCY_CONTEXT,
    maxConcurrency: 4, // number of instances to run in parallel
    monitor: true,
    puppeteerOptions: {
        headless: false,
        ignoreHTTPSErrors: true,
        defaultViewport: false,
        //slowMo: 100,
        args: [
            `--window-size=${1680},${970}`,
            '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393',
            '--disable-site-isolation-trials',
            '--disable-features=site-per-process,SitePerProcess',
            '--disable-blink-features=AutomationControlled',
            '--disable-web-security'
        ],
      },
  });

  await cluster.task(async ({ page }) => {
    console.log(page.url());
    await useProxy(page, `direct://${getProxy()}`);

    await performAction(page, null);

  });

  cluster.queue('https://google.es');
  await cluster.idle();
  await cluster.close();

  res.send('All instances completed!');
});

app.get('/run-instance/:id', async (req, res) => {
  const { id } = req.params;

  const cluster = await Cluster.launch({
    concurrency: Cluster.CONCURRENCY_CONTEXT,
    maxConcurrency: 1, // run only one instance
    monitor: false,
      puppeteerOptions: {
        headless: false,
        ignoreHTTPSErrors: true,
        defaultViewport: false,
        //slowMo: 1000,
        args: [
            `--window-size=${1680},${970}`,
            '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393',
            '--disable-site-isolation-trials',
            '--disable-features=site-per-process,SitePerProcess',
            '--disable-blink-features=AutomationControlled',
            '--disable-web-security'
        ],
      },
  });

  await cluster.task(async ({ page }) => {
    await performAction(page, id);
  });

  cluster.queue();

  await cluster.idle();
  await cluster.close();

  res.send(`Instance ${id} completed!`);
});

app.listen(port, () => {
  console.log(`Express app listening at http://localhost:${port}`);
});
// actions.js
async function performAction(page, id) {
    await page.goto('https://httpbin.org/anything');

    // do something with the page based on the id parameter, for example:
    if (id === '1') {
      const title = await page.title();
      console.log(`Instance ${id}: ${title}`);
        // Capture screenshot
        await page.screenshot({
            path: 'screenshot.jpg'
        });

    } else {
      console.log(`Instance ${id}: Do something else`);
              // Capture screenshot
              await page.screenshot({
                path: 'all.jpg'
            });
    }
  }

  export default performAction;

Thanks

webmip commented 1 year ago

Already fixed. Let me put the code here just in case works for someone:

´´´ app.get('/run-instance/xxxx', async (req, res) => { const { id } = req.params;

const cluster = await Cluster.launch({
  concurrency: Cluster.CONCURRENCY_CONTEXT,
  maxConcurrency: 1, // run only one instance
  timeout: 100000, // 100sec
  monitor: true,
  puppeteer,
    puppeteerOptions: {
      headless: false,
      ignoreHTTPSErrors: true,
      defaultViewport: false,
      //slowMo: 1000,
      args: [
          '--proxy-server=xxxxx',
          '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
          '--disable-site-isolation-trials',
          '--disable-features=site-per-process,SitePerProcess',
          '--disable-blink-features=AutomationControlled',
          '--disable-web-security'
      ],
    },
});

  cluster.queue(TARGET_URI, name);

await cluster.idle();
await cluster.close();

res.send(`Instance ${id} completed!`);

});

´´´