thomasdondorf / puppeteer-cluster

Puppeteer Pool, run a cluster of instances in parallel
MIT License
3.2k stars 308 forks source link

Pupeteer Cluster - PDF Responses getting Exchanged #467

Open jestinosam opened 2 years ago

jestinosam commented 2 years ago

I am running a Puppeteer Cluster with maxConcurrency as 10. I am using Puppeteer here to receive HTML content via a POST endpoint exposed from this program. Also, the HTML content received via POST endpoint is 2-3 pages max. However while doing some performance tests, I am observing that the generated PDFs are getting exchanged when the response is sent from the endpoint i.e. Assume 3 requests are triggered parallelly to the endpoint. So the PDF of the 3rd request at times goes as the response of the 2nd request & vice versa. Can someone explain or point out how the PDF responses are getting exchanged for the incoming requests with the below implementation. Below is the code for the same:

Puppeteer Cluster Configuration

const cluster = await Cluster.launch({
        concurrency: Cluster.CONCURRENCY_PAGE,
        maxConcurrency: 10,
        puppeteerOptions: {
          headless: true,
          args: [
            "--no-sandbox",
            "--disabled-setupid-sandbox",
            "--font-render-hinting=none",
            "--force-color-profile=srgb",
          ],
        },
      });

POST Controller

app.post("/pdfengine/v1/api/generatePdf", async (request, response) => {
        logger.info("Document Name is %s :: Correlation Id is :: %s", request.body.documentName, request.body.correlationId);
        logger.info("Invoking the Generate PDF method!");

          try {
            const generatedPdf = await cluster.execute(
              generatePdf(
                request.body.documentName,
                request.body.correlationId,
                request.body.htmlContent
              )
            );

            logger.info(
              "PDF generated successfully for Document Name :: %s and CorrelationId :: %s",
              request.body.documentName,
              request.body.correlationId
            );

            response.contentType("application/pdf");
            response.status(200).send(generatedPdf);
          } catch (err) {
            logger.error(err);
            response.status(500).send("Request Failed!");
            next(err);
          }

      });

Generate PDF Method

async function generatePdf(documentName, correlationId, htmlContent) {
        await cluster.task(async ({ page }) => {
          let hrstart = process.hrtime();
          let startTime;

          try {

            await page.addScriptTag({
              path: "jquery-3.6.0.min.js",
            });

            startTime = process.hrtime();
            await page.setContent(htmlContent);
            calculateExecutionTime(startTime, "HTML Content was set in :: %d ms for DocumentName :: %s and CorrelationId :: %s");

            await page.emulateMediaType("print");

            await page.evaluateHandle("document.fonts.ready");

            startTime = process.hrtime();
            const pdf = await page.pdf({
              format: "Letter",
              printBackground: true,
              preferCSSPageSize: true,
            });

            calculateExecutionTime(
              startTime,
              "PDF generated by Puppeteer in :: %d ms for DocumentName :: %s and CorrelationId :: %s",
              documentName,
              correlationId
            );

            calculateExecutionTime(
              hrstart,
              "Total Time taken to generate PDF is :: %d ms for DocumentName :: %s and CorrelationId :: %s",
              documentName,
              correlationId
            );

            return pdf;
          } catch (err) {
            logger.error("Error in PDF Generation");
            logger.error(err);
          }
        });
      }

Full Code

(async () => {
      const cluster = await Cluster.launch({
        concurrency: Cluster.CONCURRENCY_PAGE,
        maxConcurrency: 10,
        puppeteerOptions: {
          headless: true,
          args: [
            "--no-sandbox",
            "--disabled-setupid-sandbox",
            "--font-render-hinting=none",
            "--force-color-profile=srgb",
          ],
        },
      });

      /**
       * Register additional utilities with Express
       */
      app.use(
        rTracer.expressMiddleware(),
        express.json({ limit: "50mb" }),
        express.static(process.env.ASSETS_ENV_PATH)
      );

      /**
       * Node JS Request Interceptor
       */
      app.all("/pdfengine/v1/api/generatePdf", function (request, response, next) {
        requestMetadata = metadata(request);

        traceId = request.get("traceId");
        parentSpanId = request.get("parentSpanId");
        next();
      });

      /**
       * Custom Logger Format
       */
      const myFormat = printf(({ level, message, timestamp }) => {
        var spanId = rTracer.id();

        var logMetadata = Object.assign({}, requestMetadata);
        let msg = `{"level": "${level}", "timestamp": "${timestamp}", "parentSpanId": "${parentSpanId}", "spanId": "${spanId}", "traceId": "${spanId}", "message": "${message}", "metadata": ${JSON.stringify(
          logMetadata
        )}}`;
        if (!spanId) {
          msg = `{"level": "${level}", "timestamp": "${timestamp}", "message": "${message}"}`;
        }
        return msg;
      });

      /**
       * Set Metadata for Logging
       * @param {*} request
       * @returns {JSON} Object
       */
      const metadata = function setMetadata(request) {
        return {
          documentName: request.body.documentName,
          correlationId: request.body.correlationId,
        };
      };

      /**
       * Create a Custom Logger
       */
      const logger = winston.createLogger({
        format: combine(timestamp(), splat(), myFormat), //format.json() myFormat
        transports: [new winston.transports.Console()],
      });

      /**
       * Post Endpoint
       */
      app.post("/pdfengine/v1/api/generatePdf", async (request, response) => {
        logger.info("Document Name is %s :: Correlation Id is :: %s", request.body.documentName, request.body.correlationId);
        logger.info("Invoking the Generate PDF method!");

          try {
            const generatedPdf = await cluster.execute(
              generatePdf(
                request.body.documentName,
                request.body.correlationId,
                request.body.htmlContent
              )
            );

            logger.info(
              "PDF generated successfully for Document Name :: %s and CorrelationId :: %s",
              request.body.documentName,
              request.body.correlationId
            );

            response.contentType("application/pdf");
            response.status(200).send(generatedPdf);
          } catch (err) {
            logger.error(err);
            response.status(500).send("Request Failed!");
            next(err);
          }

      });

      /**
       * Create a new browser instance and launch the same.
       * @returns browser
       */
      async function generatePdf(documentName, correlationId, htmlContent) {
        await cluster.task(async ({ page }) => {
          let hrstart = process.hrtime();
          let startTime;

          try {

            await page.addScriptTag({
              path: "jquery-3.6.0.min.js",
            });

            startTime = process.hrtime();
            await page.setContent(htmlContent);
            calculateExecutionTime(startTime, "HTML Content was set in :: %d ms for DocumentName :: %s and CorrelationId :: %s");

            await page.emulateMediaType("print");

            await page.evaluateHandle("document.fonts.ready");

            startTime = process.hrtime();
            const pdf = await page.pdf({
              format: "Letter",
              printBackground: true,
              preferCSSPageSize: true,
            });

            calculateExecutionTime(
              startTime,
              "PDF generated by Puppeteer in :: %d ms for DocumentName :: %s and CorrelationId :: %s",
              documentName,
              correlationId
            );

            calculateExecutionTime(
              hrstart,
              "Total Time taken to generate PDF is :: %d ms for DocumentName :: %s and CorrelationId :: %s",
              documentName,
              correlationId
            );

            return pdf;
          } catch (err) {
            logger.error("Error in PDF Generation");
            logger.error(err);
          }
        });
      }

      /**
       * Method to calculate the time difference for any execution
       *
       * @param {NodeJS.Process} startTime
       * @param {String} message
       */
      function calculateExecutionTime(startTime, message, documentName, correlationId) {
        var precision = 2;
        var timeDifference = (process.hrtime(startTime)[1] / 1000000).toFixed(
          precision
        );
        logger.info(message, timeDifference, documentName, correlationId);
      }

      /**
       * Expose Application
       */
      app.listen(3000, function () {
        logger.info("Application started! Server running on port 3000");
        logger.info("Welcome To Node JS App!");
        console.log("Environment is :: ", process.env.NODE_ENV);
        console.log("Assets path is ::", process.env.ASSETS_ENV_PATH);
      });
    })();
pradeepnelakuditifsmk commented 2 years ago

Hi @jestinosam

Does it still happen? Exchange of responses amongst the requests? If so, how did you fix it? Can you please let me know this? Thanks.