[BUG] Navigation failed due to disconnected browser

FarooqBukhari commented 11 months ago

Environment

chromium Version: 119.0
puppeteer / puppeteer-core Version: 21.5
Node.js Version: 16
Lambda / GCF Runtime: nodejs16

Expected Behavior

I want to generate a pdf

Current Behavior

I am getting Navigation Failed error on .setContent()

Steps to Reproduce

I have a docker based aws lambda. I was previously using @Sparticuz/chrome-aws-lambda which has been archived. Now I can't get this one to work. Help Needed here.

"use strict"; const chromium = require("@sparticuz/chromium"); const puppeteer = require("puppeteer-core"); const PDF_PARSE = require("pdf-parse"); const pdf2img = require('pdf-img-convert'); const imgToPDF = require('image-to-pdf'); // deps installed and packaged const AWS = require("aws-sdk"); const AXIOS = require("axios"); // local file imports const config = require("./config"); //Set up SQS const SQS = new AWS.SQS({ apiVersion: "2012-11-05" }); process.env.FONTCONFIG_PATH = "/usr/share/fonts/truetype"; module.exports.handleResponse = async (event, context) => { // received an Amazon SQS event console.log("Event: ", event); console.log("Context: ", context); console.log("Received records: ", event.Records.length); let browser = null; try { // TODO: pull out browser initialization from loop console.log(chromium.args); browser = await puppeteer.launch({ args: chromium.args, defaultViewport: chromium.defaultViewport, executablePath: await chromium.executablePath(), headless: chromium.headless, ignoreHTTPSErrors: true, }); console.log("browser created"); const s3 = new AWS.S3(); // event can have multiple records for (let record of event.Records) { const body = JSON.parse(record.body); console.log("Processing record: ", body); // This can act as a blueprint of the request object const { bucket, key, fileName, responseUrl, html, wfItemUuid, successWfEvent, failureWfEvent, language, } = body; // const bucket = body["bucket"], key = body["key"], fileName = body["fileName"], // responseUrl = body["responseUrl"], html = body["html"], wfItemUuid = body["wfItemUuid"], // successWfEvent = body["successWfEvent"], failureWfEvent = body["failureWfEvent"]; const response = { wfItemUuid, fileName, wfEvent: failureWfEvent, generationTimestamp: new Date().getTime(), message: null, }; try { let message = null; let page = null; console.log("generating pdf"); page = await browser.newPage(); await page.setContent(html, { waitUntil: ["networkidle0", "load", "domcontentloaded"] }); let pdf = await page.pdf(config.getConfig(language)); console.log("Language: " + language); const pdfInfo = await PDF_PARSE(pdf); //numpages if (pdfInfo.numpages > 2) { message = "generated pdf has more than 2 pages"; } else { console.log("flattening image"); try { const pngPages = await pdf2img.convert(pdf, { width: 596*4, //Number in px height: 842*4, // Number in px page_numbers: [1, 2], // A list of pages to render instead of all of them base64: true, scale: 2.0 }); const pages = [ "data:image/png;base64,"+pngPages[0], "data:image/png;base64,"+pngPages[1] ] pdf = imgToPDF(pages, imgToPDF.sizes.A4); } catch (err) { console.log(err); message = "failure in flattening pdf"; } console.log("uploading to s3"); try { const uploadResponse = await s3 .upload({ Body: pdf, Bucket: bucket, Key: key, }) .promise(); console.log(uploadResponse); } catch (err) { console.log(err); message = "failure in uploading the generated pdf to s3"; } } if ("local" == process.env.ENVIRONMENT) { console.log("sending lambda response to", responseUrl); await AXIOS.post( responseUrl, !message ? { ...response, wfEvent: successWfEvent } : { ...response, message } ) .then(function (r) { console.log("lambda response sent", r); }) .catch(function (e) { console.log("lambda response failed", e); }); } else { // call sendSQS message await sendSQS(response, successWfEvent, message); } if (page !== null) { await page.close(); } } catch (error) { console.log("error processing record: ", record, error); } } // for loop ends } catch (error) { console.log("error in function body: ", error); return context.fail(error); } finally { if (browser !== null) { await browser.close(); } } };

amansatija commented 11 months ago

hey i am facing the same issue

puja2718 commented 11 months ago

Hi, I am facing the same issue, any update on this?

amansatija commented 11 months ago

@Sparticuz can u please test this i m getting this on all versions of chromium (116 to 119 )with puppeteer(21.1.0 to 21.5.2) from using sam application (with node version from 16 to 18 respectively as my local as well as platform nodejs version i also tried the -next-0 build with nodejs version 20 and the same thing was returned in all casses Error : Target frame detached page.navigate

funny thing is i have a lambda deployed with the same code with node version 16 , chromuim 116 and puppeteer 21.1.0 and it is working fine , and even testing it on local was working fine till around last week or so ... no when ever i test it on local it gives the following error not matter wat i do i did update the aws and sam sdks to the latest

the following is my code for pdf handling


const puppeteer = require("puppeteer-core");
const chromium = require("@sparticuz/chromium");
const AWS = require('aws-sdk')
const awsHelper = require("./aws")
const fs = require('fs')
const path = require('path')
const ejs =  require('ejs')
var moment = require('moment');
const converter = require('./tzconverter')
const restclient = require('./restclient')

// const handlebars = require('handlebars')

// Optional: If you'd like to use the legacy headless mode. "new" is the default.
chromium.setHeadlessMode = true;

// Optional: If you'd like to disable webgl, true is the default.
chromium.setGraphicsMode = false;
let browser = null
module.exports.pdf = async (event, context, data) => {
  try {
    **const executablePath = event?.isOffline
      ? './node_modules/puppeteer/.local-chromium/mac-674921/chrome-mac/Chromium.app/Contents/MacOS/Chromium'
      : await chromium.executablePath()**

    let reportId = null
    if(data?.reportData?._id){
      reportId=data?.reportData?._id
    }

    let reportType = data?.reportType;
    if(!reportType){
      throw Error("Invalid Report type :: ",data?.reportType)
    }
    if(reportType!="somereportype"){
      throw Error("Invalid Report type :: ",data?.reportType)
    }
    let templateName = "sometemplate.template.ejs";
    let bucketname = "some_bucket_name"

    console.log("templateName ::::::",templateName)
    console.log("bucketname ::::::",bucketname)

    console.log("Fetching  template file ..... ::::::")
    const file = fs.readFileSync(path.resolve(__dirname, 'reports',templateName), 'utf8')

    data.moment=moment;
    data.converter = converter
    data.aws_bucket_url = process.env.cloudBucketUrlMedia
    let template = ejs.render(file,data);
    const html = template
    console.log(html)
  ///// Generating Pdf ..... \\\\\\\ 
  console.log("Generating Pdf ..... ::::::")
    // browser = await puppeteer.launch(
    //   {ignoreDefaultArgs: ['--disable-extensions'], args: chromium.args, executablePath,})

    **browser = await puppeteer.launch({
        args: chromium.args,
        ignoreDefaultArgs: ['--disable-extensions'],
        defaultViewport: chromium.defaultViewport,
        executablePath: executablePath,
        headless: chromium.headless,
      });

    const page = await browser.newPage()
    // page
    // .on('console', message =>
    //   console.log(`${message.type().substr(0, 3).toUpperCase()} ${message.text()}`))
    // .on('pageerror', ({ message }) => console.log(message))
    // .on('response', response =>
    //   console.log(`${response.status()} ${response.url()}`))
    // .on('requestfailed', request =>
    //   console.log(`${request.failure().errorText} ${request.url()}`))

    await page.setContent(html, { waitUntil: ['load', 'domcontentloaded', 'networkidle0'] })
    const pdf = await page.pdf({
      format: 'A4',
      printBackground: true,
      margin: { top: '1cm', right: '1cm', bottom: '1cm', left: '1cm' }
    })**

    const randomNumber = Math.ceil(Math.random() * 10000000000)
    // const output_filename = `Pdf-${randomNumber}.pdf`
    let output_filename = `pdftest5.pdf`

    if(reportId){
      output_filename = ""+reportId+".pdf"
    }

    // await parquet.parquet();
    // context?.succeed(response)

  } catch (error) {
    console.log(error)
    // return context?.fail(error)
    return error
  } finally {
    if (browser !== null) {
      await browser.close()
    }

  }
}

anevsevra commented 11 months ago

I have the same issue in AWS Lambda, runtime is Node.js 20.x. However, exactly the same code works properly in local environment (Linux x86_64). I figured out that in my case lambda throws an error if HTML string contains <img /> tags. Without them lambda generates PDF without issues. Images, that are put in HTML, are hosted on CDN.

puja2718 commented 11 months ago

Yes, I am facing the same issue when html contains <img /> tags. However, I was able to apply a workaround by using await page.setRequestInterception(true) So, intercept any requests made to external urls and apply your own logic.

For example, this works for me: await page.setRequestInterception(true); page.on('request', async (interceptedRequest: any) => { if ( interceptedRequest.url().endsWith('.png') || interceptedRequest.url().endsWith('.jpg') || interceptedRequest.url().endsWith('.jpeg') ) { const data = await axios.get(interceptedRequest.url(), { responseType: 'arraybuffer', }).catch((e) => logger.error(e)); interceptedRequest.respond({ body: data.data, }); } else { interceptedRequest.abort(); } });

jonathanwsilva commented 11 months ago

This might be the same issue as the other two recent bugs https://github.com/Sparticuz/chromium/issues/196 https://github.com/Sparticuz/chromium/issues/201

Namely, it seems like amazon has pushed build images starting on 12/05 that break this library. Turn on dumpio:true in your launch options and see if you can find any more data in the chromium logs.

Also try the workaround in the other two issues, maybe stipulating an older runtime release in your --invoke-image could sort this out for you, for now.

jennydale commented 10 months ago

We are having this issue too.

The workaround @puja2718 posted Dec 13 worked for us, hooray! Only real modifications I made to that snippet were to check that the start of the url was 'http', because axios couldn't fetch file://whatever/whatever.png images, and to continue rather than abort non-matching requests:

    await page.setRequestInterception(true)
    page.on('request', async interceptedRequest => {
        // This part may not be needed since this is our only interceptor, but docs talk a lot about it, and it seems like it shouldn't hurt...
        if (interceptedRequest.isInterceptResolutionHandled()) return
        const url = interceptedRequest.url()
        if (
            url.startsWith('http') &&
            (url.endsWith('.png') ||
                url.endsWith('.jpg') ||
                url.endsWith('.jpeg'))
        ) {
            const data = await axios
                .get(interceptedRequest.url(), {
                    responseType: 'arraybuffer',
                })
                .catch(e => {
                    log('ERROR', e)
                })
            await interceptedRequest.respond({ body: data.data })
        } else {
            await interceptedRequest.continue()
        }
    })

Still feels pretty icky to have this in the codebase, so hoping the issue gets resolved soon so we can remove this workaround.

UPDATE: A better fix is to bump package versions! Once we bumped puppeteer-core to 21.5.2 and @sparticuz/chromium to 118.0.0 the problem with external images went away!

Sparticuz / chromium