nodejs / import-in-the-middle

Like `require-in-the-middle`, but for ESM import
https://www.npmjs.com/package/import-in-the-middle
Apache License 2.0
72 stars 27 forks source link

no such file or directory #59

Closed gajus closed 5 months ago

gajus commented 10 months ago

Expected Behavior

getExports is throwing an error:

{
  context: { format: 'module', importAttributes: {} },
  error: Error: ENOENT: no such file or directory, open '/Users/x/Developer/contra/gaia/node_modules/.pnpm/graphql-yoga@5.1.1_graphql@16.8.1/node_modules/graphql-yoga/esm/@graphql-yoga/logger'
      at async open (node:internal/fs/promises:633:25)
      at async readFile (node:internal/fs/promises:1242:14)
      at async getSource (node:internal/modules/esm/load:46:14)
      at async defaultLoad (node:internal/modules/esm/load:137:34)
      at async nextLoad (node:internal/modules/esm/hooks:750:22)
      at async getExports (/Users/x/Developer/contra/gaia/node_modules/.pnpm/import-in-the-middle@1.7.3/node_modules/import-in-the-middle/lib/get-exports.js:68:17)
      at async processModule (/Users/x/Developer/contra/gaia/node_modules/.pnpm/import-in-the-middle@1.7.3/node_modules/import-in-the-middle/hook.js:134:23)
      at async processModule (/Users/x/Developer/contra/gaia/node_modules/.pnpm/import-in-the-middle@1.7.3/node_modules/import-in-the-middle/hook.js:160:20)
      at async getSource (/Users/x/Developer/contra/gaia/node_modules/.pnpm/import-in-the-middle@1.7.3/node_modules/import-in-the-middle/hook.js:269:60)
      at async load (/Users/x/Developer/contra/gaia/node_modules/.pnpm/import-in-the-middle@1.7.3/node_modules/import-in-the-middle/hook.js:334:26) {
    errno: -2,
    code: 'ENOENT',
    syscall: 'open',
    path: '/Users/x/Developer/contra/gaia/node_modules/.pnpm/graphql-yoga@5.1.1_graphql@16.8.1/node_modules/graphql-yoga/esm/@graphql-yoga/logger'
  },
  url: 'file:///Users/x/Developer/contra/gaia/node_modules/.pnpm/graphql-yoga@5.1.1_graphql@16.8.1/node_modules/graphql-yoga/esm/@graphql-yoga/logger'
}

I added the following code to catch the above error.

let parentCtx;

try {
  parentCtx = await parentLoad(url, context)
} catch (error) {
  console.log({
    context,
    error,
    url
  });

  throw error;
}

The same code works if I remove --loader=import-in-the-middle/hook.mjs.

Happy to provide more context.

Specifications

gajus commented 10 months ago

~Hard to tell from import URLs what's happening here, but I wonder if it is related to how pnpm works.~

Confirmed that the above is not due to the way how node modules are linked.

gajus commented 10 months ago

The paths mentioned in the URLs do not actually exist, e.g.

{
  context: { format: 'module', importAttributes: {} },
  error: Error: ENOENT: no such file or directory, open '/Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/@graphql-yoga/subscription'
      at async open (node:internal/fs/promises:633:25)
      at async readFile (node:internal/fs/promises:1242:14)
      at async getSource (node:internal/modules/esm/load:46:14)
      at async defaultLoad (node:internal/modules/esm/load:137:34)
      at async nextLoad (node:internal/modules/esm/hooks:750:22)
      at async getExports (/Users/x/Developer/contra/gaia/node_modules/import-in-the-middle/lib/get-exports.js:66:17)
      at async processModule (/Users/x/Developer/contra/gaia/node_modules/import-in-the-middle/hook.js:134:23)
      at async processModule (/Users/x/Developer/contra/gaia/node_modules/import-in-the-middle/hook.js:160:20)
      at async processModule (/Users/x/Developer/contra/gaia/node_modules/import-in-the-middle/hook.js:160:20)
      at async getSource (/Users/x/Developer/contra/gaia/node_modules/import-in-the-middle/hook.js:269:60) {
    errno: -2,
    code: 'ENOENT',
    syscall: 'open',
    path: '/Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/@graphql-yoga/subscription'
  },
  url: 'file:///Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/@graphql-yoga/subscription'
}

There is no such directory as /Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/@graphql-yoga/subscription

The closest directly that exists is /Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/.

gajus commented 10 months ago

An example of file that triggers this is:

export * from 'styled-components';
//# sourceMappingURL=index.js.map

I can see that when processModule is trying to process it, it derives wrong URL for the module.

I added this line just before processModule:

console.log({
  modFile,
  normalizedModName,
  modUrl,
  modName,
});

which tells us:

{
  modFile: 'styled-components',
  normalizedModName: 'styledComponents',
  modUrl: 'file:///Users/x/Developer/contra/gaia/packages/styled-components/dist/styled-components',
  modName: '17062121655529e604482'
}

/Users/x/Developer/contra/gaia/packages/styled-components/dist/styled-components is wrong. It should be: /Users/x/Developer/contra/gaia/node_modules/styled-components/. But there is no way of knowing that just from analyzing the URLs, which is what this module does.

https://github.com/DataDog/import-in-the-middle/blob/c3c2c52c1915b47994af59d507c59029c1f1fae9/hook.js#L155-L166

I am confused how this works for other people use cases, since this module is clearly used by others.

gajus commented 10 months ago

Another example it fails:

Object.defineProperty(exports, "TimeoutFailure", { enumerable: true, get: function () { return common_1.TimeoutFailure; } });
+__exportStar(require("@temporalio/common/lib/errors"), exports);
__exportStar(require("@temporalio/common/lib/interfaces"), exports);
__exportStar(require("@temporalio/common/lib/workflow-handle"), exports);
{
  exports: [
    '__esModule',
    'LOCAL_TARGET',
    'Connection',
    'TimeoutFailure',
    'TerminatedFailure',
    'TemporalFailure',
    'ServerFailure',
    'defaultPayloadConverter',
    'ChildWorkflowFailure',
    'CancelledFailure',
    'ApplicationFailure',
    'ActivityFailure'
  ],
  reexports: [
    '@temporalio/common/lib/errors',
    '@temporalio/common/lib/interfaces',
    '@temporalio/common/lib/workflow-handle',
    './async-completion-client',
    './client',
    './errors',
    './grpc-retry',
    './interceptors',
    './types',
    './workflow-client',
    './workflow-options',
    './schedule-types',
    './schedule-client',
    './task-queue-client'
  ]
}
Error: Cannot find module '@temporalio/common/lib/errors'
Require stack:
- /Users/x/Developer/contra/import-in-the-middle/lib/get-exports.js
- /Users/x/Developer/contra/import-in-the-middle/hook.js
    at Module._resolveFilename (node:internal/modules/cj

In this case though the file does actually exist.

node ../../node_modules/@temporalio/common/lib/errors.js
gajus commented 10 months ago

An example of file that triggers this is:

export * from 'styled-components';
//# sourceMappingURL=index.js.map

I can see that when processModule is trying to process it, it derives wrong URL for the module.

I added this line just before processModule:

console.log({
  modFile,
  normalizedModName,
  modUrl,
  modName,
});

which tells us:

{
  modFile: 'styled-components',
  normalizedModName: 'styledComponents',
  modUrl: 'file:///Users/x/Developer/contra/gaia/packages/styled-components/dist/styled-components',
  modName: '17062121655529e604482'
}

/Users/x/Developer/contra/gaia/packages/styled-components/dist/styled-components is wrong. It should be: /Users/x/Developer/contra/gaia/node_modules/styled-components/. But there is no way of knowing that just from analyzing the URLs, which is what this module does.

https://github.com/DataDog/import-in-the-middle/blob/c3c2c52c1915b47994af59d507c59029c1f1fae9/hook.js#L155-L166

I am confused how this works for other people use cases, since this module is clearly used by others.

gajus commented 10 months ago

So all of these failures are happening when resolving export * from ... pattern, e.g.

{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@envelop/core/esm/index.js',
  modFile: '@envelop/types',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@envelop/core/esm/@envelop/types',
  n: '* from @envelop/types'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/index.js',
  modFile: '@graphql-yoga/logger',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/@graphql-yoga/logger',
  n: '* from @graphql-yoga/logger'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@floating-ui/react-dom-interactions/dist/floating-ui.react-dom-interactions.mjs',
  modFile: '@floating-ui/react-dom',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@floating-ui/react-dom-interactions/dist/@floating-ui/react-dom',
  n: '* from @floating-ui/react-dom'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/playwright-chromium/index.mjs',
  modFile: 'playwright-core',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/playwright-chromium/playwright-core',
  n: '* from playwright-core'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/packages/styled-components/dist/index.js',
  modFile: 'styled-components',
  modUrl: 'file:///Users/x/Developer/contra/gaia/packages/styled-components/dist/styled-components',
  n: '* from styled-components'
}

Tagging @bengl in case you have a better sense of what's breaking.

gajus commented 10 months ago

For what it is worth, if we update the logic to resolve import paths relative to the srcUrl, it resolves to correct modules (?).

-      const data = await processModule({
-        srcUrl: modUrl,
-        context,
-        parentGetSource,
-        ns: `$${modName}`,
-        defaultAs: normalizedModName
-      })
+      let data;
+      
+      try {
+        data = await processModule({
+          srcUrl: modUrl,
+          context,
+          parentGetSource,
+          ns: `$${modName}`,
+          defaultAs: normalizedModName
+        });
+      } catch (error) {
+        console.log({
+          srcUrl,
+          modFile,
+          modUrl,
+          n,
+        });
+      }
+
+      if (!data) {
+        const newModUrl = pathToFileURL(require.resolve(modFile, {
+          paths: [srcUrl]
+        })).toString();
+
+        console.log({
+          newModUrl
+        });
+
+        data = await processModule({
+          srcUrl: newModUrl,
+          context,
+          parentGetSource,
+          ns: `$${modName}`,
+          defaultAs: normalizedModName
+        });
+      }
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@envelop/core/esm/index.js',
  modFile: '@envelop/types',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@envelop/core/esm/@envelop/types',
  n: '* from @envelop/types'
}
{
  newModUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@envelop/types/cjs/index.js'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/index.js',
  modFile: '@graphql-yoga/logger',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/@graphql-yoga/logger',
  n: '* from @graphql-yoga/logger'
}
{
  newModUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@graphql-yoga/logger/cjs/index.js'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/subscription.js',
  modFile: '@graphql-yoga/subscription',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/@graphql-yoga/subscription',
  n: '* from @graphql-yoga/subscription'
}
{
  newModUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@graphql-yoga/subscription/cjs/index.js'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/subscription.js',
  modFile: '@graphql-yoga/subscription',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/@graphql-yoga/subscription',
  n: '* from @graphql-yoga/subscription'
}
{
  newModUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@graphql-yoga/subscription/cjs/index.js'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@floating-ui/react-dom-interactions/dist/floating-ui.react-dom-interactions.mjs',
  modFile: '@floating-ui/react-dom',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@floating-ui/react-dom-interactions/dist/@floating-ui/react-dom',
  n: '* from @floating-ui/react-dom'
}
{
  newModUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@floating-ui/react-dom/dist/floating-ui.react-dom.umd.js'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/playwright-chromium/index.mjs',
  modFile: 'playwright-core',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/playwright-chromium/playwright-core',
  n: '* from playwright-core'
}
{
  newModUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/playwright-core/index.js'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/packages/styled-components/dist/index.js',
  modFile: 'styled-components',
  modUrl: 'file:///Users/x/Developer/contra/gaia/packages/styled-components/dist/styled-components',
  n: '* from styled-components'
}
{
  newModUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/styled-components/dist/styled-components.cjs.js'
}
gajus commented 10 months ago

Looks like depending on how modules are installed (hoisted or non-hoisted), you will need to try both variants, i.e.

const newModUrl = pathToFileURL(require.resolve(modFile, {
  paths: [srcUrl]
})).toString();

and

const newModUrl = pathToFileURL(require.resolve(modFile)).toString();
gajus commented 10 months ago

This is the patch that we ended up with.

diff --git a/hook.js b/hook.js
index aa22356f370a34cd49ae3d5ea8fd076202e71bf5..074c388edf40f10e3e4274cf38c308371f7ce4fe 100644
--- a/hook.js
+++ b/hook.js
@@ -22,6 +22,48 @@ if (NODE_MAJOR >= 20 || (NODE_MAJOR === 18 && NODE_MINOR >= 19)) {
   getExports = ({ url }) => import(url).then(Object.keys)
 }

+function shouldIgnore (url) {
+  // In short, some packages are not compatible with this hook.
+  // I have attempted to patch this hook to work with every package, but that just sent me down a rabbit hole.
+  // See attempts and related issues here:
+  // * https://github.com/DataDog/import-in-the-middle/issues/60
+  // * https://github.com/DataDog/import-in-the-middle/issues/59
+  //
+  // However, for this hook to be useful, it does not need to work with every package.
+  // It just needs to work with the packages that we want OpenTelemetry to instrument.
+  const patterns = [
+    '@envelop/core',
+    '@floating-ui/react-dom-interactions',
+    'dataloader',
+    'graphql-yoga',
+    // Does not get patched properly.
+    // Produces errors such as:
+    // > TypeError: Cannot read properties of undefined (reading 'sendCommand')
+    // > at instrumentation_1.InstrumentationNodeModuleDefinition.moduleExports [as patch] (/Users/x/Developer/contra/gaia/node_modules/.pnpm/@opentelemetry+instrumentation-ioredis@0.36.1_@opentelemetry+api@1.7.0/node_modules/@opentelemetry/instrumentation-ioredis/build/src/instrumentation.js:40:78)
+    'ioredis',
+    'openai',
+    'playwright-chromium',
+    'styled-components',
+  ];
+
+  for (const pattern of patterns) {
+    if (url === pattern || url.startsWith(pattern + '/') || url.includes('/' + pattern + '/')) {
+      return true;
+    }
+  }
+
+  // If you miss a pattern, then you will get an error along the lines of:
+  //
+  // > Error: ENOENT: no such file or directory, open '/Users/x/Developer/contra/gaia/packages/styled-components/dist/styled-components'
+  //
+  // Usually, it is easy to tell what package is causing the issue by just looking at the path (e.g. 'styled-components' in the above example).
+  // However, other times, it is not so obvious. In those cases, you can uncomment the following line to get everything that was attempted up to the point of failure.
+  //
+  // console.log('url', url)
+
+  return false;
+}
+
 function hasIitm (url) {
   try {
     return new URL(url).searchParams.has('iitm')
@@ -230,6 +272,9 @@ function addIitm (url) {

 function createHook (meta) {
   async function resolve (specifier, context, parentResolve) {
+    if (shouldIgnore(specifier)) {
+      return parentResolve(specifier)
+    }
     const { parentURL = '' } = context
     const newSpecifier = deleteIitm(specifier)
     if (isWin && parentURL.indexOf('file:node') === 0) {
@@ -264,6 +309,9 @@ function createHook (meta) {

   const iitmURL = new URL('lib/register.js', meta.url).toString()
   async function getSource (url, context, parentGetSource) {
+    if (shouldIgnore(url)) {
+      return parentGetSource(url)
+    }
     if (hasIitm(url)) {
       const realUrl = deleteIitm(url)
       const { imports, namespaces, setters: mapSetters } = await processModule({
@@ -330,6 +378,9 @@ register(${JSON.stringify(realUrl)}, _, set, ${JSON.stringify(specifiers.get(rea

   // For Node.js 16.12.0 and higher.
   async function load (url, context, parentLoad) {
+    if (shouldIgnore(url)) {
+      return parentLoad(url)
+    }
     if (hasIitm(url)) {
       const { source } = await getSource(url, context, parentLoad)
       return {
jsumners-nr commented 9 months ago

I think hard coding a bunch of module identifiers is not the correct path forward. We should be able to concoct a replication that we can solve.

timfish commented 5 months ago

This was fixed by #78

timfish commented 5 months ago

This was closed by #78.

Feel free to open another issue if this is not the case!