puppeteer / puppeteer

JavaScript API for Chrome and Firefox
https://pptr.dev
Apache License 2.0
88.24k stars 9.07k forks source link

[Bug]: Puppeteer Script Fails in CI Pipeline but Works Locally #12968

Closed AJAY0993 closed 2 weeks ago

AJAY0993 commented 3 weeks ago

Minimal, reproducible example

const fs = require("node:fs")
const path = require("node:path")
const puppeteer = require("puppeteer")

const katas = [{
    "id": "51c8991dee245d7ddf00000e",
    "name": "Reversed Words",
    "slug": "reversed-words",
    "completedLanguages": ["javascript"],
    "completedAt": "2023-08-08T19:09:23.411Z"
  },
  {
    "id": "5769b3802ae6f8e4890009d2",
    "name": "Removing Elements",
    "slug": "removing-elements",
    "completedLanguages": ["javascript"],
    "completedAt": "2023-08-08T18:55:58.495Z"
  }
]

const difficultyToDirMap = {
  "1 kyu": "1-kyu",
  "2 kyu": "2-kyu",
  "3 kyu": "3-kyu",
  "4 kyu": "4-kyu",
  "5 kyu": "5-kyu",
  "6 kyu": "6-kyu",
  "7 kyu": "7-kyu",
  "8 kyu": "8-kyu"
}

const extensions = {
  agda: "agda",
  bf: "b",
  c: "c",
  cmlf: "cmfl",
  clojure: "clj",
  cobol: "cob",
  coffeescript: "coffee",
  commonlisp: "lisp",
  coq: "coq",
  cplusplus: "cpp",
  crystal: "cr",
  csharp: "cs",
  dart: "dart",
  elixir: "ex",
  elm: "elm",
  erlang: "erl",
  factor: "factor",
  forth: "fth",
  fortran: "f",
  fsharp: "fs",
  go: "go",
  groovy: "groovy",
  haskell: "hs",
  haxe: "hx",
  idris: "idr",
  java: "java",
  javascript: "js",
  julia: "jl",
  kotlin: "kt",
  lean: "lean",
  lua: "lua",
  nasm: "nasm",
  nimrod: "nim",
  objective: "m",
  ocaml: "ml",
  pascal: "pas",
  perl: "pl",
  php: "php",
  powershell: "ps1",
  prolog: "pro",
  purescript: "purs",
  python: "py",
  r: "r",
  racket: "rkt",
  ruby: "rb",
  rust: "rs",
  scala: "scala",
  shell: "sh",
  sql: "sql",
  swift: "swift",
  typescript: "ts",
  vb: "vb"
}

function sanitizeFolderName(name) {
  // Define a regex pattern to match invalid characters
  const invalidChars = /[<>:"/\\|?*]+/g

  // Replace invalid characters with an underscore
  return name.replace(invalidChars, "_")
}

const maxRetries = 10000

const scrapKatas = async (katas) => {
  console.log("It failed on line 14")
  const browser = await puppeteer.launch()

  console.log("It failed on line 17")
  const page = await browser.newPage()

  // Navigate to the page
  console.log("It failed on line 20")
  await page.goto("https://www.codewars.com/users/sign_in", {
    waitUntil: "networkidle2"
  })

  console.log("It failed on line 25")
  await page.setViewport({ width: 1080, height: 1024 })

  // Fill in email and password
  console.log("It failed on line 29")
  await page.type("#user_email", process.env.CODEWARS_EMAIL)
  await page.type("#user_password", process.env.CODEWARS_PASSWORD)

  // Click the submit button
  console.log("It failed on line 34")
  await page.click("#new_user button[type=submit]")

  console.log("It failed on line 37")
  await page.waitForNavigation({ waitUntil: "networkidle2" })
  let retryCount = 0
  let waitTime = 1000

  while (retryCount < maxRetries) {
    for (let index in katas) {
      try {
        const kata = katas[index]
        const language = kata["completedLanguages"][0]
        const extension = extensions[language]

        console.log("It failed on line 49")

        await page.goto(
          `https://www.codewars.com/kata/${kata.id}/solutions/${language}/me/newest`,
          {
            waitUntil: "networkidle2"
          }
        )

        const rank = await page.evaluate(() => {
          const rankElement = document.querySelector(".is-white-rank")
          if (rankElement) {
            return rankElement.textContent
          }
          return null
        })

        if (rank) {
          const codeText = await page.evaluate(() => {
            // Locate the element with ID 'solutions_list'
            const solutionsList = document.getElementById("solutions_list")
            if (solutionsList) {
              // Find the first <div> inside 'solutions_list'
              const solutionItem = solutionsList.querySelector("div")
              if (solutionItem) {
                // Extract text from <pre> inside the <div>
                const preTag = solutionItem.querySelector("pre")
                if (preTag) {
                  return preTag.textContent
                }
              }
            }
            return "//null"
          })

          fs.writeFile(
            path.join(
              `${__dirname}/katas/${
                difficultyToDirMap[rank]
              }/${sanitizeFolderName(kata.slug)}/solution.${extension}`
            ),
            codeText,
            "utf-8",
            (err) => {
              if (err) console.log(err)
              else console.log("success")
            }
          )
          console.log("File done", kata.slug, rank)
        }
      } catch (error) {
        console.log(error, `\n Retrying in ${waitTime}`)
        await new Promise((resolve) => setTimeout(resolve, waitTime))
        waitTime *= 2
      }
    }

    break
  } // Close the browser
  await browser.close()
}

module.exports = scrapKatas

Background

My Puppeteer-based Node.js script is consistently failing when run in the CI pipeline, despite working perfectly on local machine. The issue appears to be environment-specific, possibly related to headless mode, missing dependencies, or timing issues in the CI environment.

Error message ->

Error: Execution context was destroyed, most likely because of a navigation. at rewriteError (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/cdp/ExecutionContext.js:452:15) at async #evaluate (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/cdp/ExecutionContext.js:384:60) at async ExecutionContext.evaluateHandle (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/cdp/ExecutionContext.js:325:16) at async IsolatedWorld.evaluateHandle (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/cdp/IsolatedWorld.js:88:16) at async CdpJSHandle.evaluateHandle (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/api/JSHandle.js:150:20) at async CdpElementHandle.evaluateHandle (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/api/ElementHandle.js:344:20) at async CSSQueryHandler.queryOne (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/common/QueryHandler.js:130:59) at async CdpElementHandle.$ (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/api/ElementHandle.js:399:21) at async CdpElementHandle. (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/api/ElementHandle.js:268:32) at async CdpFrame.$ (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/api/Frame.js:377:20) ~

Environment Details: Local: OS: Windows 11 Node.js version: v22.5.1 Puppeteer version: 23.1.0 CI: CI service: Base image: ubuntu@latest Node.js version: 20 Puppeteer version:20

Expectation

The Puppeteer script should perform identically in the CI environment as it does locally, navigating to the Codewars site, logging in, retrieving kata solutions, and saving them to the file system.

Reality

The script fails to execute successfully in the CI pipeline. Failures occur during launch or , page navigation, form submissions, or while waiting for network idle. The CI logs indicate timeouts, missing elements, or unhandled promises.

Puppeteer configuration file (if used)

No response

Puppeteer version

23.1.0

Node version

20

Package manager

npm

Package manager version

10.8.2

Operating system

Linux

OrKoN commented 3 weeks ago

Could you please capture browser logs as described here https://pptr.dev/guides/debugging#print-browser-logs? Do you have all required system dependencies installed https://pptr.dev/guides/system-requirements?

AJAY0993 commented 3 weeks ago

Hello,

The script breaks when it tries to enter the password. I found this by adding a bunch of console.log statements at each step. I've also screen-recorded the process in the CI pipeline.

https://github.com/user-attachments/assets/922115d7-254e-47a1-b99e-fb204fe68f32

Regarding the dependencies, I’ve already installed the following:

- name: Install Chrome dependencies
  run: |
    sudo apt-get update
    sudo apt-get install -y \
      libnss3 \
      lsof \
      libx11-6 \
      libx11-xcb1 \
      libxcb1 \
      libxcomposite1 \
      libxcursor1 \
      libxdamage1 \
      libxi6 \
      libxtst6 \
      libnss3 \
      libcups2 \
      libxrandr2 \
      libasound2 \
      libpangocairo-1.0-0 \
      libpango-1.0-0 \
      libcairo2 \
      libatk1.0-0 \
      libatk-bridge2.0-0 \
      libgbm1 \
      libxshmfence1 \
      libegl1 \
      libxkbcommon0 \
      libdbus-1-3

This is entire logged info of the process

Katas page 0 done
Katas page 1 done
Katas page 2 done
All kata details fetched.
Starting scraping solutions
Starting the scraping process...
Browser launched successfully.
Navigating to Codewars sign-in page...
BROWSER LOG: The value "320px" for key "width" was truncated to its numeric prefix.
BROWSER LOG: [DOM] Input elements should have autocomplete attributes (suggested: "current-password"): (More info: https://goo.gl/9p2vKq) %o
Setting viewport size...
Waiting for email and password input fields...
Email: Set
Password: Set
Typing email
Typed email
Typing password
BROWSER REQUEST FAILED: https://analytics.google.com/g/collect?v=2&tid=G-M3JYSQLS8M&gtm=45je48j0v887093415za200&_p=1724221495726&gcd=13l3l3l3l1l1&npa=0&dma=0&tag_exp=0&cid=1057102533.1724221496&ul=en-us&sr=800x600&uaa=x86&uab=64&uafvl=Chromium%3B127.0.6533.119%7CNot)A%253BBrand%3B99.0.0.0&uamb=0&uam=&uap=Linux&uapv=6.5.0&uaw=0&are=1&pae=1&frm=0&pscdl=&_s=3&dl=https%3A%2F%2Fwww.codewars.com%2Fusers%2Fsign_in&dt=Sign%20in%20%7C%20Codewars&sid=1724221496&sct=1&seg=0&en=user_engagement&_et=2268&tfd=2893 - net::ERR_ABORTED
BROWSER REQUEST FAILED: https://analytics.google.com/g/collect?v=2&tid=G-M3JYSQLS8M&gtm=45je48j0v887093415za200&_p=1724221495726&gcd=13l3l3l3l1l1&npa=0&dma=0&tag_exp=0&cid=1057102533.1724221496&ul=en-us&sr=800x600&uaa=x86&uab=64&uafvl=Chromium%3B127.0.6533.119%7CNot)A%253BBrand%3B99.0.0.0&uamb=0&uam=&uap=Linux&uapv=6.5.0&uaw=0&are=1&pae=1&frm=0&pscdl=&_eu=AEA&_s=2&dl=https%3A%2F%2Fwww.codewars.com%2Fusers%2Fsign_in&dt=Sign%20in%20%7C%20Codewars&sid=1724221496&sct=1&seg=0&en=scroll&epn.percent_scrolled=90&_et=10&tfd=2892 - net::ERR_ABORTED
Error during the login process 💥 Error: Execution context was destroyed, most likely because of a navigation.
    at rewriteError (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/cdp/ExecutionContext.js:452:15)
    at async #evaluate (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/cdp/ExecutionContext.js:384:60)
    at async ExecutionContext.evaluateHandle (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/cdp/ExecutionContext.js:325:16)
    at async IsolatedWorld.evaluateHandle (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/cdp/IsolatedWorld.js:88:16)
    at async CdpJSHandle.evaluateHandle (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/api/JSHandle.js:150:20)
    at async CdpElementHandle.evaluateHandle (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/api/ElementHandle.js:344:20)
    at async CSSQueryHandler.queryOne (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/common/QueryHandler.js:130:59)
    at async CdpElementHandle.$ (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/api/ElementHandle.js:399:21)
    at async CdpElementHandle.<anonymous> (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/api/ElementHandle.js:268:32)
    at async CdpFrame.$ (/home/runner/work/kata-scrapper/kata-scrapper/node_modules/puppeteer-core/lib/cjs/puppeteer/api/Frame.js:377:20)
BROWSER LOG: The value "320px" for key "width" was truncated to its numeric prefix.

Thanks for your help!

OrKoN commented 3 weeks ago

I do not see CdpFrame.$ calls in the script you posted. Which code calls it? it appears to be a symptom of evaluation while the navigation is happening.

AJAY0993 commented 2 weeks ago

Hello orkon, It was a really weird issue the same code was working in another repository. So i just deleted the repo and and created a new one and now it works fine Thanks

OrKoN commented 2 weeks ago

Thanks for clarifying, probably it was some issue with the repository, and some other code caused this error.