nodejs / import-in-the-middle

Like `require-in-the-middle`, but for ESM import
https://www.npmjs.com/package/import-in-the-middle
Apache License 2.0
55 stars 22 forks source link

no such file or directory #59

Closed gajus closed 2 weeks ago

gajus commented 5 months ago

Expected Behavior

getExports is throwing an error:

{
  context: { format: 'module', importAttributes: {} },
  error: Error: ENOENT: no such file or directory, open '/Users/x/Developer/contra/gaia/node_modules/.pnpm/graphql-yoga@5.1.1_graphql@16.8.1/node_modules/graphql-yoga/esm/@graphql-yoga/logger'
      at async open (node:internal/fs/promises:633:25)
      at async readFile (node:internal/fs/promises:1242:14)
      at async getSource (node:internal/modules/esm/load:46:14)
      at async defaultLoad (node:internal/modules/esm/load:137:34)
      at async nextLoad (node:internal/modules/esm/hooks:750:22)
      at async getExports (/Users/x/Developer/contra/gaia/node_modules/.pnpm/import-in-the-middle@1.7.3/node_modules/import-in-the-middle/lib/get-exports.js:68:17)
      at async processModule (/Users/x/Developer/contra/gaia/node_modules/.pnpm/import-in-the-middle@1.7.3/node_modules/import-in-the-middle/hook.js:134:23)
      at async processModule (/Users/x/Developer/contra/gaia/node_modules/.pnpm/import-in-the-middle@1.7.3/node_modules/import-in-the-middle/hook.js:160:20)
      at async getSource (/Users/x/Developer/contra/gaia/node_modules/.pnpm/import-in-the-middle@1.7.3/node_modules/import-in-the-middle/hook.js:269:60)
      at async load (/Users/x/Developer/contra/gaia/node_modules/.pnpm/import-in-the-middle@1.7.3/node_modules/import-in-the-middle/hook.js:334:26) {
    errno: -2,
    code: 'ENOENT',
    syscall: 'open',
    path: '/Users/x/Developer/contra/gaia/node_modules/.pnpm/graphql-yoga@5.1.1_graphql@16.8.1/node_modules/graphql-yoga/esm/@graphql-yoga/logger'
  },
  url: 'file:///Users/x/Developer/contra/gaia/node_modules/.pnpm/graphql-yoga@5.1.1_graphql@16.8.1/node_modules/graphql-yoga/esm/@graphql-yoga/logger'
}

I added the following code to catch the above error.

let parentCtx;

try {
  parentCtx = await parentLoad(url, context)
} catch (error) {
  console.log({
    context,
    error,
    url
  });

  throw error;
}

The same code works if I remove --loader=import-in-the-middle/hook.mjs.

Happy to provide more context.

Specifications

gajus commented 5 months ago

~Hard to tell from import URLs what's happening here, but I wonder if it is related to how pnpm works.~

Confirmed that the above is not due to the way how node modules are linked.

gajus commented 5 months ago

The paths mentioned in the URLs do not actually exist, e.g.

{
  context: { format: 'module', importAttributes: {} },
  error: Error: ENOENT: no such file or directory, open '/Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/@graphql-yoga/subscription'
      at async open (node:internal/fs/promises:633:25)
      at async readFile (node:internal/fs/promises:1242:14)
      at async getSource (node:internal/modules/esm/load:46:14)
      at async defaultLoad (node:internal/modules/esm/load:137:34)
      at async nextLoad (node:internal/modules/esm/hooks:750:22)
      at async getExports (/Users/x/Developer/contra/gaia/node_modules/import-in-the-middle/lib/get-exports.js:66:17)
      at async processModule (/Users/x/Developer/contra/gaia/node_modules/import-in-the-middle/hook.js:134:23)
      at async processModule (/Users/x/Developer/contra/gaia/node_modules/import-in-the-middle/hook.js:160:20)
      at async processModule (/Users/x/Developer/contra/gaia/node_modules/import-in-the-middle/hook.js:160:20)
      at async getSource (/Users/x/Developer/contra/gaia/node_modules/import-in-the-middle/hook.js:269:60) {
    errno: -2,
    code: 'ENOENT',
    syscall: 'open',
    path: '/Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/@graphql-yoga/subscription'
  },
  url: 'file:///Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/@graphql-yoga/subscription'
}

There is no such directory as /Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/@graphql-yoga/subscription

The closest directly that exists is /Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/.

gajus commented 5 months ago

An example of file that triggers this is:

export * from 'styled-components';
//# sourceMappingURL=index.js.map

I can see that when processModule is trying to process it, it derives wrong URL for the module.

I added this line just before processModule:

console.log({
  modFile,
  normalizedModName,
  modUrl,
  modName,
});

which tells us:

{
  modFile: 'styled-components',
  normalizedModName: 'styledComponents',
  modUrl: 'file:///Users/x/Developer/contra/gaia/packages/styled-components/dist/styled-components',
  modName: '17062121655529e604482'
}

/Users/x/Developer/contra/gaia/packages/styled-components/dist/styled-components is wrong. It should be: /Users/x/Developer/contra/gaia/node_modules/styled-components/. But there is no way of knowing that just from analyzing the URLs, which is what this module does.

https://github.com/DataDog/import-in-the-middle/blob/c3c2c52c1915b47994af59d507c59029c1f1fae9/hook.js#L155-L166

I am confused how this works for other people use cases, since this module is clearly used by others.

gajus commented 5 months ago

Another example it fails:

Object.defineProperty(exports, "TimeoutFailure", { enumerable: true, get: function () { return common_1.TimeoutFailure; } });
+__exportStar(require("@temporalio/common/lib/errors"), exports);
__exportStar(require("@temporalio/common/lib/interfaces"), exports);
__exportStar(require("@temporalio/common/lib/workflow-handle"), exports);
{
  exports: [
    '__esModule',
    'LOCAL_TARGET',
    'Connection',
    'TimeoutFailure',
    'TerminatedFailure',
    'TemporalFailure',
    'ServerFailure',
    'defaultPayloadConverter',
    'ChildWorkflowFailure',
    'CancelledFailure',
    'ApplicationFailure',
    'ActivityFailure'
  ],
  reexports: [
    '@temporalio/common/lib/errors',
    '@temporalio/common/lib/interfaces',
    '@temporalio/common/lib/workflow-handle',
    './async-completion-client',
    './client',
    './errors',
    './grpc-retry',
    './interceptors',
    './types',
    './workflow-client',
    './workflow-options',
    './schedule-types',
    './schedule-client',
    './task-queue-client'
  ]
}
Error: Cannot find module '@temporalio/common/lib/errors'
Require stack:
- /Users/x/Developer/contra/import-in-the-middle/lib/get-exports.js
- /Users/x/Developer/contra/import-in-the-middle/hook.js
    at Module._resolveFilename (node:internal/modules/cj

In this case though the file does actually exist.

node ../../node_modules/@temporalio/common/lib/errors.js
gajus commented 5 months ago

An example of file that triggers this is:

export * from 'styled-components';
//# sourceMappingURL=index.js.map

I can see that when processModule is trying to process it, it derives wrong URL for the module.

I added this line just before processModule:

console.log({
  modFile,
  normalizedModName,
  modUrl,
  modName,
});

which tells us:

{
  modFile: 'styled-components',
  normalizedModName: 'styledComponents',
  modUrl: 'file:///Users/x/Developer/contra/gaia/packages/styled-components/dist/styled-components',
  modName: '17062121655529e604482'
}

/Users/x/Developer/contra/gaia/packages/styled-components/dist/styled-components is wrong. It should be: /Users/x/Developer/contra/gaia/node_modules/styled-components/. But there is no way of knowing that just from analyzing the URLs, which is what this module does.

https://github.com/DataDog/import-in-the-middle/blob/c3c2c52c1915b47994af59d507c59029c1f1fae9/hook.js#L155-L166

I am confused how this works for other people use cases, since this module is clearly used by others.

gajus commented 5 months ago

So all of these failures are happening when resolving export * from ... pattern, e.g.

{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@envelop/core/esm/index.js',
  modFile: '@envelop/types',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@envelop/core/esm/@envelop/types',
  n: '* from @envelop/types'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/index.js',
  modFile: '@graphql-yoga/logger',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/@graphql-yoga/logger',
  n: '* from @graphql-yoga/logger'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@floating-ui/react-dom-interactions/dist/floating-ui.react-dom-interactions.mjs',
  modFile: '@floating-ui/react-dom',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@floating-ui/react-dom-interactions/dist/@floating-ui/react-dom',
  n: '* from @floating-ui/react-dom'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/playwright-chromium/index.mjs',
  modFile: 'playwright-core',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/playwright-chromium/playwright-core',
  n: '* from playwright-core'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/packages/styled-components/dist/index.js',
  modFile: 'styled-components',
  modUrl: 'file:///Users/x/Developer/contra/gaia/packages/styled-components/dist/styled-components',
  n: '* from styled-components'
}

Tagging @bengl in case you have a better sense of what's breaking.

gajus commented 5 months ago

For what it is worth, if we update the logic to resolve import paths relative to the srcUrl, it resolves to correct modules (?).

-      const data = await processModule({
-        srcUrl: modUrl,
-        context,
-        parentGetSource,
-        ns: `$${modName}`,
-        defaultAs: normalizedModName
-      })
+      let data;
+      
+      try {
+        data = await processModule({
+          srcUrl: modUrl,
+          context,
+          parentGetSource,
+          ns: `$${modName}`,
+          defaultAs: normalizedModName
+        });
+      } catch (error) {
+        console.log({
+          srcUrl,
+          modFile,
+          modUrl,
+          n,
+        });
+      }
+
+      if (!data) {
+        const newModUrl = pathToFileURL(require.resolve(modFile, {
+          paths: [srcUrl]
+        })).toString();
+
+        console.log({
+          newModUrl
+        });
+
+        data = await processModule({
+          srcUrl: newModUrl,
+          context,
+          parentGetSource,
+          ns: `$${modName}`,
+          defaultAs: normalizedModName
+        });
+      }
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@envelop/core/esm/index.js',
  modFile: '@envelop/types',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@envelop/core/esm/@envelop/types',
  n: '* from @envelop/types'
}
{
  newModUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@envelop/types/cjs/index.js'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/index.js',
  modFile: '@graphql-yoga/logger',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/@graphql-yoga/logger',
  n: '* from @graphql-yoga/logger'
}
{
  newModUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@graphql-yoga/logger/cjs/index.js'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/subscription.js',
  modFile: '@graphql-yoga/subscription',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/@graphql-yoga/subscription',
  n: '* from @graphql-yoga/subscription'
}
{
  newModUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@graphql-yoga/subscription/cjs/index.js'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/subscription.js',
  modFile: '@graphql-yoga/subscription',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/graphql-yoga/esm/@graphql-yoga/subscription',
  n: '* from @graphql-yoga/subscription'
}
{
  newModUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@graphql-yoga/subscription/cjs/index.js'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@floating-ui/react-dom-interactions/dist/floating-ui.react-dom-interactions.mjs',
  modFile: '@floating-ui/react-dom',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@floating-ui/react-dom-interactions/dist/@floating-ui/react-dom',
  n: '* from @floating-ui/react-dom'
}
{
  newModUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/@floating-ui/react-dom/dist/floating-ui.react-dom.umd.js'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/playwright-chromium/index.mjs',
  modFile: 'playwright-core',
  modUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/playwright-chromium/playwright-core',
  n: '* from playwright-core'
}
{
  newModUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/playwright-core/index.js'
}
{
  srcUrl: 'file:///Users/x/Developer/contra/gaia/packages/styled-components/dist/index.js',
  modFile: 'styled-components',
  modUrl: 'file:///Users/x/Developer/contra/gaia/packages/styled-components/dist/styled-components',
  n: '* from styled-components'
}
{
  newModUrl: 'file:///Users/x/Developer/contra/gaia/node_modules/styled-components/dist/styled-components.cjs.js'
}
gajus commented 5 months ago

Looks like depending on how modules are installed (hoisted or non-hoisted), you will need to try both variants, i.e.

const newModUrl = pathToFileURL(require.resolve(modFile, {
  paths: [srcUrl]
})).toString();

and

const newModUrl = pathToFileURL(require.resolve(modFile)).toString();
gajus commented 5 months ago

This is the patch that we ended up with.

diff --git a/hook.js b/hook.js
index aa22356f370a34cd49ae3d5ea8fd076202e71bf5..074c388edf40f10e3e4274cf38c308371f7ce4fe 100644
--- a/hook.js
+++ b/hook.js
@@ -22,6 +22,48 @@ if (NODE_MAJOR >= 20 || (NODE_MAJOR === 18 && NODE_MINOR >= 19)) {
   getExports = ({ url }) => import(url).then(Object.keys)
 }

+function shouldIgnore (url) {
+  // In short, some packages are not compatible with this hook.
+  // I have attempted to patch this hook to work with every package, but that just sent me down a rabbit hole.
+  // See attempts and related issues here:
+  // * https://github.com/DataDog/import-in-the-middle/issues/60
+  // * https://github.com/DataDog/import-in-the-middle/issues/59
+  //
+  // However, for this hook to be useful, it does not need to work with every package.
+  // It just needs to work with the packages that we want OpenTelemetry to instrument.
+  const patterns = [
+    '@envelop/core',
+    '@floating-ui/react-dom-interactions',
+    'dataloader',
+    'graphql-yoga',
+    // Does not get patched properly.
+    // Produces errors such as:
+    // > TypeError: Cannot read properties of undefined (reading 'sendCommand')
+    // > at instrumentation_1.InstrumentationNodeModuleDefinition.moduleExports [as patch] (/Users/x/Developer/contra/gaia/node_modules/.pnpm/@opentelemetry+instrumentation-ioredis@0.36.1_@opentelemetry+api@1.7.0/node_modules/@opentelemetry/instrumentation-ioredis/build/src/instrumentation.js:40:78)
+    'ioredis',
+    'openai',
+    'playwright-chromium',
+    'styled-components',
+  ];
+
+  for (const pattern of patterns) {
+    if (url === pattern || url.startsWith(pattern + '/') || url.includes('/' + pattern + '/')) {
+      return true;
+    }
+  }
+
+  // If you miss a pattern, then you will get an error along the lines of:
+  //
+  // > Error: ENOENT: no such file or directory, open '/Users/x/Developer/contra/gaia/packages/styled-components/dist/styled-components'
+  //
+  // Usually, it is easy to tell what package is causing the issue by just looking at the path (e.g. 'styled-components' in the above example).
+  // However, other times, it is not so obvious. In those cases, you can uncomment the following line to get everything that was attempted up to the point of failure.
+  //
+  // console.log('url', url)
+
+  return false;
+}
+
 function hasIitm (url) {
   try {
     return new URL(url).searchParams.has('iitm')
@@ -230,6 +272,9 @@ function addIitm (url) {

 function createHook (meta) {
   async function resolve (specifier, context, parentResolve) {
+    if (shouldIgnore(specifier)) {
+      return parentResolve(specifier)
+    }
     const { parentURL = '' } = context
     const newSpecifier = deleteIitm(specifier)
     if (isWin && parentURL.indexOf('file:node') === 0) {
@@ -264,6 +309,9 @@ function createHook (meta) {

   const iitmURL = new URL('lib/register.js', meta.url).toString()
   async function getSource (url, context, parentGetSource) {
+    if (shouldIgnore(url)) {
+      return parentGetSource(url)
+    }
     if (hasIitm(url)) {
       const realUrl = deleteIitm(url)
       const { imports, namespaces, setters: mapSetters } = await processModule({
@@ -330,6 +378,9 @@ register(${JSON.stringify(realUrl)}, _, set, ${JSON.stringify(specifiers.get(rea

   // For Node.js 16.12.0 and higher.
   async function load (url, context, parentLoad) {
+    if (shouldIgnore(url)) {
+      return parentLoad(url)
+    }
     if (hasIitm(url)) {
       const { source } = await getSource(url, context, parentLoad)
       return {
jsumners-nr commented 5 months ago

I think hard coding a bunch of module identifiers is not the correct path forward. We should be able to concoct a replication that we can solve.

timfish commented 1 month ago

This was fixed by #78

timfish commented 2 weeks ago

This was closed by #78.

Feel free to open another issue if this is not the case!