TooTallNate / proxy-agents

Node.js HTTP Proxy Agents Monorepo
https://proxy-agents.n8.io
871 stars 229 forks source link

[agent-base] `maxSockets` not being respected #299

Closed lukekarrys closed 2 months ago

lukekarrys commented 3 months ago

When used with the maxSockets option agent-base creates more than that number of open sockets.

This was found in npm which recently moved to using agent-base in v10 (https://github.com/npm/cli/issues/7072). Here's a comment from someone who debugged and found an incompatibility between how Node tracks open sockets and how agent-base calls super.createSocket: https://github.com/npm/cli/issues/7072#issuecomment-2001980218.

I was able to create a more minimal reproduction which I believe backs up that assertion. However, I'm happy to be told I'm holding it wrong too 😄

Reproduction

import * as tls from 'node:tls'
import * as https from 'node:https'
import { spawn } from 'node:child_process'
import timers from 'node:timers/promises'
import { fileURLToPath } from 'node:url'
import { join, dirname } from 'node:path'
import { parseArgs } from 'node:util'
import { Agent as AgentBase } from 'agent-base'
import { HttpsProxyAgent } from 'https-proxy-agent'
const Proxy = fileURLToPath(import.meta.resolve('proxy'))

const ARGV = (() => {
  const args = parseArgs({
    options: {
      maxSockets: {
        type: 'string',
        default: '10',
      },
      requests: {
        type: 'string',
        default: 'Infinity',
      },
      agent: {
        type: 'string',
        multiple: true,
        default: [],
      },
    },
  }).values

  return {
    maxSockets: +args.maxSockets,
    requests: +args.requests,
    agent: new RegExp(
      args.agent
        .map((v) => v.trim().toLowerCase())
        .filter(Boolean)
        .join('|') || '.',
      'i',
    ),
  }
})()

// Run `lsof` in a loop for the current `process.pid` and count the highest
// number of open sockets that we get to. Not a perfect way to count open
// sockets and it requires lsof to be available, but I wanted some way to do
// this without relying on patching any node internals.
const watchOpenSockets = () => {
  let max = 0
  let stopped = false
  let stopCount = 0

  const getOpenSockets = () =>
    new Promise((resolve) => {
      const proc = spawn('lsof', [
        '-a',
        '-i',
        '-n',
        '-p',
        process.pid,
        '-s',
        'TCP:LISTEN,ESTABLISHED',
      ])
      let data = ''
      proc.stdout.on('data', (c) => (data += c.toString()))
      proc.on('close', () => resolve(data.trim().split('\n').slice(1).length))
    })

  const start = async () => {
    while (true) {
      max = Math.max(max, await getOpenSockets())
      if (stopped) {
        return
      }
    }
  }

  const stop = async () => {
    stopped = true
    while (true) {
      const current = await getOpenSockets()
      if (current === 0) {
        return
      }
      if (stopCount > 10) {
        console.log(`Warning: %s sockets left open`, current)
        return
      }
      stopCount++
    }
  }

  start()

  return {
    stop,
    get max() {
      return max
    },
  }
}

const run = async (Agent) => {
  const openSockets = watchOpenSockets()

  const agent = new Agent({
    keepAlive: true,
    maxSockets: ARGV.maxSockets,
    rejectUnauthorized: false,
  })

  const get = (url) =>
    new Promise((resolve, reject) =>
      https
        // `family: 4` is not necessary but I was getting EHOSTUNREACH and ETIMEDOUT
        // and errors for agent-base if I removed it. Could be another symptom of the
        // maxSocket behavior (or a red herring).
        .get(url, { family: 4, agent }, (res) => {
          let data = ''
          res.on('data', (c) => (data += c.toString()))
          res.on('end', () =>
            resolve(url.endsWith('.json') ? JSON.parse(data) : data),
          )
        })
        .on('error', reject),
    )

  const versions = await get(
    `https://nodejs.org/download/release/index.json`,
  ).then((r) => r.map((v) => v.version).slice(0, ARGV.requests))

  // Used to simulate getting many small files without any other form on concurrency controls
  const results = await Promise.all(
    versions.map((v) =>
      get(`https://nodejs.org/download/release/${v}/SHASUMS256.txt`).then(
        (r) => new Blob([r]).size,
      ),
    ),
  )

  agent.destroy()
  await openSockets.stop()

  console.log(`%s max sockets`, openSockets.max)
  console.log(`%s total versions`, versions.length)
  console.log(
    `%s total bytes`,
    results.reduce((acc, c) => acc + c),
  )
}

const main = async () => {
  const proxy = await new Promise((resolve, reject) => {
    // Start proxy server as a child process so it doesn't report as an open
    // socket for this process
    const proc = spawn('node', [
      join(dirname(Proxy), 'bin/proxy.js'),
      '--port',
      '0',
    ])
    proc.stdout.on('data', (c) =>
      resolve({
        port: c.toString().match(/port (\d+)/)[1],
        kill: () => proc.kill(),
      }),
    )
    let err = ''
    proc.stderr.on('data', (c) => (err += c.toString()))
    proc.on('close', (code) =>
      reject(new Error(`Proxy closed with code ${code}\n${err}`)),
    )
  })

  const AGENTS = {
    core: https.Agent,
    'agent-base-sync': class extends AgentBase {
      connect(_, opts) {
        return tls.connect(opts)
      }
    },
    'agent-base-async': class extends AgentBase {
      async connect(_, opts) {
        await timers.setTimeout(100, null)
        return tls.connect(opts)
      }
    },
    'https-proxy-agent': class extends HttpsProxyAgent {
      constructor(opts) {
        super(`http://localhost:${proxy.port}`, opts)
      }
    },
  }

  for (const [name, Agent] of Object.entries(AGENTS)) {
    console.log(name)
    if (ARGV.agent.test(name)) {
      await run(Agent).catch(console.log)
    } else {
      console.log('Skipping')
    }
    console.log('-'.repeat(40))
  }

  proxy.kill()
}

// in case some sockets get left open
main().then(() => process.exit(0))
melroy89 commented 3 months ago

No pressure or anything, but.. I would like to add it has a "Prio 0" label at NPM. Just saying 😆 lol.

Meaning this issue is causing quite some problems on routes/switches/proxies/Jfrog Artifactory/NPM package repository and more. Both for individuals (at home) as well as in businesses alike. After all the huge amount of socket connections isn't scaling very well on consumer hardware or even server hardware.

lukekarrys commented 2 months ago

Updated the reproduction to include sync and async versions of agent-base and https-proxy-agent as well because I wanted to see if that was also susceptible to this behavior since it also subclasses agent-base.

Here's what I get when running it now:

❯ node index.js
core
10 max sockets
727 total versions
1873533 total bytes
----------------------------------------
agent-base-sync
591 max sockets
727 total versions
1873533 total bytes
----------------------------------------
agent-base-async
727 max sockets
727 total versions
1873533 total bytes
----------------------------------------
https-proxy-agent
548 max sockets
727 total versions
1873533 total bytes
----------------------------------------