tsy77 / blog

78 stars 2 forks source link

Node.js源码-bootstrap_node.js #9

Open tsy77 opened 6 years ago

tsy77 commented 6 years ago

上面文章提到过在src/node.cc中的LoadEnvironment方法会执行internal/bootstrap/loaders.jsinternal/bootstrap/node.js,本文就来看看这两个模块做了什么,小伙伴们注意一下,这里会包含威名远扬的vm、模块加载等方面的讲解。

GetBootstrapper

首先我们来看下如何获取到internal/bootstrap/loaders.jsinternal/bootstrap/node.js文件内容。

我们注意到上述两个文件内容的形式如下面所示:

(function(){})

那么是如何获取到其中的函数的呢?

void LoadEnvironment(Environment* env) {
    ......
    Local<Function> loaders_bootstrapper =
      GetBootstrapper(env, LoadersBootstrapperSource(env), loaders_name);
   .......
}

static Local<Function> GetBootstrapper(Environment* env, Local<String> source,
    ......
  // Execute the bootstrapper javascript file
  Local<Value> bootstrapper_v = ExecuteString(env, source, script_name);
  ......
 }

原来在GetBootstrapper中先去执行一下文件,得到了其中的函数。下面去执行的时候,就可以直接执行函数了。

internal/bootstrap/loaders.js

internal/bootstrap/loaders.js主要用于native模块的loader。函数输入是process、getBinding、 getLinkedBinding、getInternalBinding;输出是NativeModule构造函数和internalBinding方法。

下面我们来一步步看下代码:

1.初始化process.binding、internalBinding

// Set up process.binding() and process._linkedBinding()
  {
    const bindingObj = Object.create(null);

    process.binding = function binding(module) {
      module = String(module);
      let mod = bindingObj[module];
      if (typeof mod !== 'object') {
        mod = bindingObj[module] = getBinding(module);
        moduleLoadList.push(`Binding ${module}`);
      }
      return mod;
    };

    process._linkedBinding = function _linkedBinding(module) {
      module = String(module);
      let mod = bindingObj[module];
      if (typeof mod !== 'object')
        mod = bindingObj[module] = getLinkedBinding(module);
      return mod;
    };
}
// Set up internalBinding() in the closure
  let internalBinding;
  {
    const bindingObj = Object.create(null);
    internalBinding = function internalBinding(module) {
      let mod = bindingObj[module];
      if (typeof mod !== 'object') {
        mod = bindingObj[module] = getInternalBinding(module);
        moduleLoadList.push(`Internal Binding ${module}`);
      }
      return mod;
    };
  }

process.binding和process._linkedBinding其实调用src/node.cc中的GetBinding和GetLinkedBinding方法。internalBinding调用的是GetInternalBinding方法。

2.引入node_contextify模块

const ContextifyScript = process.binding('contextify').ContextifyScript;

contextify是node中相当重要的一个模块,主要的作用是用来执行js的代码。

ContextifyScript这个js类中,主要挂载RunInContext和RunInThisContext两个方法,后面会做详细介绍。挂载的方法用到了V8中的env->SetProtoMethod来将C++方法挂载到js类的原型上,挂载代码如下所示:

static void Init(Environment* env, Local<Object> target) {
    HandleScope scope(env->isolate());
    Local<String> class_name =
        FIXED_ONE_BYTE_STRING(env->isolate(), "ContextifyScript");

    Local<FunctionTemplate> script_tmpl = env->NewFunctionTemplate(New);
    script_tmpl->InstanceTemplate()->SetInternalFieldCount(1);
    script_tmpl->SetClassName(class_name);
    env->SetProtoMethod(script_tmpl, "runInContext", RunInContext);
    env->SetProtoMethod(script_tmpl, "runInThisContext", RunInThisContext);

    target->Set(class_name, script_tmpl->GetFunction());
    env->set_script_context_constructor_template(script_tmpl);

    Local<Symbol> parsing_context_symbol =
        Symbol::New(env->isolate(),
                    FIXED_ONE_BYTE_STRING(env->isolate(),
                                          "script parsing context"));
    env->set_vm_parsing_context_symbol(parsing_context_symbol);
    target->Set(env->context(),
                FIXED_ONE_BYTE_STRING(env->isolate(), "kParsingContext"),
                parsing_context_symbol)
        .FromJust();
  }

3.定义NativeModule构造函数

// Set up NativeModule
  // 定义NativeModule构造函数
  function NativeModule(id) {
    this.filename = `${id}.js`;
    this.id = id;
    this.exports = {};
    this.loaded = false;
    this.loading = false;
  }

  // _source是所有native模块的map
  NativeModule._source = getBinding('natives');
  NativeModule._cache = {};

  const config = getBinding('config');

NativeModule中主要有id、filename、exports对象等,这也是native模块的数据结构。

NativeModule._source存储的是所有native模块的map,key是模块名称,value是模块的ascII表示。

4.NativeModule.require

顾名思义,NativeModule.require就是用来引用Native模块的。输入是模块id,输出是模块的exports属性。代码如下:

NativeModule.require = function(id) {
    if (id === loaderId) {
      return loaderExports;
    }

    const cached = NativeModule.getCached(id);
    if (cached && (cached.loaded || cached.loading)) {
      return cached.exports;
    }

    if (!NativeModule.exists(id)) {
      // Model the error off the internal/errors.js model, but
      // do not use that module given that it could actually be
      // the one causing the error if there's a bug in Node.js
      // eslint-disable-next-line no-restricted-syntax
      const err = new Error(`No such built-in module: ${id}`);
      err.code = 'ERR_UNKNOWN_BUILTIN_MODULE';
      err.name = 'Error [ERR_UNKNOWN_BUILTIN_MODULE]';
      throw err;
    }

    moduleLoadList.push(`NativeModule ${id}`);

    const nativeModule = new NativeModule(id);

    nativeModule.cache();
    nativeModule.compile();

    return nativeModule.exports;
  };

这里做了下面几件事:

1.判断是否在缓存中,是则直接取出cached.exports。NativeModule的静态属性cache中存储缓存
2.判断是否存在该模块,不存在则抛出错误。exists方法依赖NativeModule._source。
3.创建nativeModule实例
4.缓存
5.编译nativeModule。

编译执行(nativeModule.compile)

上述步骤最为关键的是nativeModule.compile(),下面我们来介绍一下。

NativeModule.prototype.compile = function() {
    let source = NativeModule.getSource(this.id);
    source = NativeModule.wrap(source);

    this.loading = true;

    try {
      const script = new ContextifyScript(source, this.filename);
      // Arguments: timeout, displayErrors, breakOnSigint
      // 返回function (exports, require, module, process)......
      const fn = script.runInThisContext(-1, true, false);
      const requireFn = this.id.startsWith('internal/deps/') ?
        NativeModule.requireForDeps :
        NativeModule.require;
      // 执行,结果直接放在this.exports中
      fn(this.exports, requireFn, this, process);

      this.loaded = true;
    } finally {
      this.loading = false;
    }
  };

这里做了如下几件事:

1.获取源码
2.包装
3.创建ContextifyScript实例script
4.调用script.runInThisContext返回函数fn
5.执行fn,输入时nativeModule的exports属性、require方法、此nativeModule实例和process。

这里重点创建一下ContextifyScript,因为我们熟知的大名鼎鼎的vm最终也是基于此来实现的。

创建ContextifyScript实例script也就是new ContextifyScript(source, this.filename),实际会调用ContextifyScript类的静态方法new,其实就是实例化了class ContextifyScript

script.runInThisContext调用的是class ContextifyScript的静态方法RunInThisContext,代码如下:

static void RunInThisContext(const FunctionCallbackInfo<Value>& args) {
    Environment* env = Environment::GetCurrent(args);

    CHECK_EQ(args.Length(), 3);

    CHECK(args[0]->IsNumber());
    int64_t timeout = args[0]->IntegerValue(env->context()).FromJust();

    CHECK(args[1]->IsBoolean());
    bool display_errors = args[1]->IsTrue();

    CHECK(args[2]->IsBoolean());
    bool break_on_sigint = args[2]->IsTrue();

    // Do the eval within this context
    EvalMachine(env, timeout, display_errors, break_on_sigint, args);
  }

上述代码主要检查了参数,调用了静态方法EvalMachine,执行的js代码的关键也在于EvalMachine,我们下面来看一下。

static bool EvalMachine(Environment* env,
                          const int64_t timeout,
                          const bool display_errors,
                          const bool break_on_sigint,
                          const FunctionCallbackInfo<Value>& args) {
    if (!ContextifyScript::InstanceOf(env, args.Holder())) {
      env->ThrowTypeError(
          "Script methods can only be called on script instances.");
      return false;
    }
    // 获取Local<Script>实例script
    TryCatch try_catch(env->isolate());
    ContextifyScript* wrapped_script;
    ASSIGN_OR_RETURN_UNWRAP(&wrapped_script, args.Holder(), false);
    Local<UnboundScript> unbound_script =
        PersistentToLocal(env->isolate(), wrapped_script->script_);
    Local<Script> script = unbound_script->BindToCurrentContext();

    // 执行
    MaybeLocal<Value> result;
    bool timed_out = false;
    bool received_signal = false;
    if (break_on_sigint && timeout != -1) {
      Watchdog wd(env->isolate(), timeout, &timed_out);
      SigintWatchdog swd(env->isolate(), &received_signal);
      result = script->Run(env->context());
    } else if (break_on_sigint) {
      SigintWatchdog swd(env->isolate(), &received_signal);
      result = script->Run(env->context());
    } else if (timeout != -1) {
      Watchdog wd(env->isolate(), timeout, &timed_out);
      result = script->Run(env->context());
    } else {
      result = script->Run(env->context());
    }

    ......

    args.GetReturnValue().Set(result.ToLocalChecked());
    return true;
  }

这里主要做了两件事:

1.获取Local<Script>对象script
2.调用script->run执行(某些情况下需要watchdog)

watchdog用来监控执行超时,SigintWatchdog用来监听信号。我们下面来看一下watchdog如何实现?

Watchdog::Watchdog(v8::Isolate* isolate, uint64_t ms, bool* timed_out)
    : isolate_(isolate), timed_out_(timed_out) {

  int rc;
  loop_ = new uv_loop_t;
  CHECK(loop_);
  rc = uv_loop_init(loop_);
  if (rc != 0) {
    FatalError("node::Watchdog::Watchdog()",
               "Failed to initialize uv loop.");
  }

  // 线程间通信相关
  rc = uv_async_init(loop_, &async_, &Watchdog::Async);
  CHECK_EQ(0, rc);

  // 启动timer
  rc = uv_timer_init(loop_, &timer_);
  CHECK_EQ(0, rc);

  rc = uv_timer_start(&timer_, &Watchdog::Timer, ms, 0);
  CHECK_EQ(0, rc);

  rc = uv_thread_create(&thread_, &Watchdog::Run, this);
  CHECK_EQ(0, rc);
}

Watchdog::~Watchdog() {
  uv_async_send(&async_);
  uv_thread_join(&thread_);

  uv_close(reinterpret_cast<uv_handle_t*>(&async_), nullptr);

  // 清理loop
  // UV_RUN_DEFAULT so that libuv has a chance to clean up.
  uv_run(loop_, UV_RUN_DEFAULT);

  // 释放loop_指针相关资源
  int rc = uv_loop_close(loop_);
  CHECK_EQ(0, rc);
  delete loop_;
  loop_ = nullptr;
}

void Watchdog::Run(void* arg) {
  Watchdog* wd = static_cast<Watchdog*>(arg);

  // UV_RUN_DEFAULT the loop will be stopped either by the async or the
  // timer handle.
  // UV_RUN_DEFAULT: 默认的循环模式,将会不断重复这个循环,直到"循环引用计数器(ref)"减为0.
  uv_run(wd->loop_, UV_RUN_DEFAULT);

  // Loop ref count reaches zero when both handles are closed.
  // Close the timer handle on this side and let ~Watchdog() close async_
  uv_close(reinterpret_cast<uv_handle_t*>(&wd->timer_), nullptr);
}

void Watchdog::Async(uv_async_t* async) {
  Watchdog* w = ContainerOf(&Watchdog::async_, async);
  uv_stop(w->loop_);
}

node中的watchdog是利用创建一个新的线程来实现的。这里有一个需要铺垫的点是uv_run的第二个参数代表事件循环模式,UV_RUN_DEFAULT是默认的循环模式,将会不断重复这个循环,直到"循环引用计数器(ref)"减为0。

超时的流程大致如下,在新线程里面,首先执行uv_run把event loop跑起来,当timer到时后,执行uv_stop将事件循环终止,接着uv_close执行,关闭了timer handler,这时循环的ref还剩async一个,接着watchdog被析构,给给主线程发送信号,主线程接收到信号后w->isolate()->TerminateExecution(),最后清理了event loop。

internal/bootstrap/node.js

internal/bootstrap/node.jsinternal/bootstrap/loader.js的基础上,做了一系列初始化操作,最终利用CJS模块查找、执行用户的代码。下面将从逻辑流程上展开说明,后面也会重点介绍CJSModule(模块加载也在此)。

流程

1.初始化

setupProcessObject();

    // do this good and early, since it handles errors.
    setupProcessFatal();

    // 初始化map遍历器等
    setupV8();
    // 国际化
    setupProcessICUVersions();

    //为global挂载Symbol.toStringTag、buffer等属性
    setupGlobalVariables();

    const _process = NativeModule.require('internal/process');
    _process.setupConfig(NativeModule._source);
    // 信号监听相关
    _process.setupSignalHandlers();
    _process.setupUncaughtExceptionCapture(exceptionHandlerState);
    // 初始化warning等模块
    NativeModule.require('internal/process/warning').setup();
    NativeModule.require('internal/process/next_tick').setup();
    NativeModule.require('internal/process/stdio').setup();
    NativeModule.require('internal/process/methods').setup();

    const perf = process.binding('performance');
    const {
      NODE_PERFORMANCE_MILESTONE_BOOTSTRAP_COMPLETE,
      NODE_PERFORMANCE_MILESTONE_THIRD_PARTY_MAIN_START,
      NODE_PERFORMANCE_MILESTONE_THIRD_PARTY_MAIN_END,
      NODE_PERFORMANCE_MILESTONE_CLUSTER_SETUP_START,
      NODE_PERFORMANCE_MILESTONE_CLUSTER_SETUP_END,
      NODE_PERFORMANCE_MILESTONE_MODULE_LOAD_START,
      NODE_PERFORMANCE_MILESTONE_MODULE_LOAD_END,
      NODE_PERFORMANCE_MILESTONE_PRELOAD_MODULE_LOAD_START,
      NODE_PERFORMANCE_MILESTONE_PRELOAD_MODULE_LOAD_END
    } = perf.constants;

    _process.setup_hrtime();
    _process.setup_performance();
    _process.setup_cpuUsage();
    // 调用isolate->GetHeapStatistics(&v8_heap_stats);
    _process.setupMemoryUsage();
    _process.setupKillAndExit();
    if (global.__coverage__)
      NativeModule.require('internal/process/write-coverage').setup();

    NativeModule.require('internal/trace_events_async_hooks').setup();
    NativeModule.require('internal/inspector_async_hook').setup();

    _process.setupChannel();
    _process.setupRawDebug();

    const browserGlobals = !process._noBrowserGlobals;
    if (browserGlobals) {
      setupGlobalTimeouts();
      setupGlobalConsole();
      setupGlobalURL();
    }

    // Ensure setURLConstructor() is called before the native
    // URL::ToObject() method is used.
    NativeModule.require('internal/url');

    // On OpenBSD process.execPath will be relative unless we
    // get the full path before process.execPath is used.
    if (process.platform === 'openbsd') {
      const { realpathSync } = NativeModule.require('fs');
      process.execPath = realpathSync.native(process.execPath);
    }

    Object.defineProperty(process, 'argv0', {
      enumerable: true,
      configurable: false,
      value: process.argv[0]
    });
    process.argv[0] = process.execPath;

    // Handle `--debug*` deprecation and invalidation
    if (process._invalidDebug) {
      process.emitWarning(
        '`node --debug` and `node --debug-brk` are invalid. ' +
        'Please use `node --inspect` or `node --inspect-brk` instead.',
        'DeprecationWarning', 'DEP0062', startup, true);
      process.exit(9);
    } else if (process._deprecatedDebugBrk) {
      process.emitWarning(
        '`node --inspect --debug-brk` is deprecated. ' +
        'Please use `node --inspect-brk` instead.',
        'DeprecationWarning', 'DEP0062', startup, true);
    }

    if (process.binding('config').experimentalModules ||
        process.binding('config').experimentalVMModules) {
      if (process.binding('config').experimentalModules) {
        process.emitWarning(
          'The ESM module loader is experimental.',
          'ExperimentalWarning', undefined);
      }
      NativeModule.require('internal/process/esm_loader').setup();
    }

    // 废弃方法说明
    {
      // Install legacy getters on the `util` binding for typechecking.
      // TODO(addaleax): Turn into a full runtime deprecation.
      const { pendingDeprecation } = process.binding('config');
      const { deprecate } = NativeModule.require('internal/util');
      const utilBinding = process.binding('util');
      const types = internalBinding('types');
      for (const name of [
        'isArrayBuffer', 'isArrayBufferView', 'isAsyncFunction',
        'isDataView', 'isDate', 'isExternal', 'isMap', 'isMapIterator',
        'isNativeError', 'isPromise', 'isRegExp', 'isSet', 'isSetIterator',
        'isTypedArray', 'isUint8Array', 'isAnyArrayBuffer'
      ]) {
        utilBinding[name] = pendingDeprecation ?
          deprecate(types[name],
                    'Accessing native typechecking bindings of Node ' +
                    'directly is deprecated. ' +
                    `Please use \`util.types.${name}\` instead.`,
                    'DEP0103') :
          types[name];
      }
    }

这里主要做的初始化有如下几点:

1.初始化process的set_push_values_to_array_function方法
2.初始化V8,map遍历器等
3.为global挂载Symbol.toStringTag、buffer等属性
4.安装信号处理函数
5.初始化warning等模块
6.废弃方法说明

这里说明一下setupV8方法,因为它调用了咱们前面提到的v8 builtin模块。代码如下:

function setupV8() {
    // Warm up the map and set iterator preview functions.  V8 compiles
    // functions lazily (unless --nolazy is set) so we need to do this
    // before we turn off --allow_natives_syntax again.
    const v8 = NativeModule.require('internal/v8');
    // 初始化map遍历器等
    v8.previewMapIterator(new Map().entries());
    v8.previewSetIterator(new Set().entries());
    v8.previewWeakMap(new WeakMap(), 1);
    v8.previewWeakSet(new WeakSet(), 1);
    // Disable --allow_natives_syntax again unless it was explicitly
    // specified on the command line.
    // 自此Disable掉--allow_natives_syntax
    const re = /^--allow[-_]natives[-_]syntax$/;
    if (!process.execArgv.some((s) => re.test(s)))
      process.binding('v8').setFlagsFromString('--noallow_natives_syntax');
  }

其中v8.previewMapIterator就用到了v8 builtin模块:

// Clone the provided Map Iterator.
function previewMapIterator(it) {
  // v8 build-in函数,js中调用时以%开头
  // 函数一般在v8内部代码中调用,用户的js代码中调用需使用--allow-natives-syntax标记执行
  return %MapIteratorClone(it);
}

2.执行

执行阶段的代码如下:

// There is user code to be run
      // 有用户代码要执行
      // If this is a worker in cluster mode, start up the communication
      // channel. This needs to be done before any user code gets executed
      // (including preload modules).
      // 如果在集群模式下有worder,需要先初始化
      if (process.argv[1] && process.env.NODE_UNIQUE_ID) {
        perf.markMilestone(NODE_PERFORMANCE_MILESTONE_CLUSTER_SETUP_START);
        const cluster = NativeModule.require('cluster');
        // 实例化worker
        // 监听disconnect,newconn等
        cluster._setupWorker();
        perf.markMilestone(NODE_PERFORMANCE_MILESTONE_CLUSTER_SETUP_END);
        // Make sure it's not accidentally inherited by child processes.
        delete process.env.NODE_UNIQUE_ID;
      }

      if (process._eval != null && !process._forceRepl) {
        perf.markMilestone(NODE_PERFORMANCE_MILESTONE_MODULE_LOAD_START);
        perf.markMilestone(NODE_PERFORMANCE_MILESTONE_MODULE_LOAD_END);
        // User passed '-e' or '--eval' arguments to Node without '-i' or
        // '--interactive'

        perf.markMilestone(
          NODE_PERFORMANCE_MILESTONE_PRELOAD_MODULE_LOAD_START);
        preloadModules();
        perf.markMilestone(NODE_PERFORMANCE_MILESTONE_PRELOAD_MODULE_LOAD_END);

        const {
          addBuiltinLibsToObject
        } = NativeModule.require('internal/modules/cjs/helpers');
        // 为global加上'assert', 'async_hooks', 'buffer'等属性
        addBuiltinLibsToObject(global);
        evalScript('[eval]');
      } else if (process.argv[1] && process.argv[1] !== '-') {
        perf.markMilestone(NODE_PERFORMANCE_MILESTONE_MODULE_LOAD_START);
        // make process.argv[1] into a full path
        const path = NativeModule.require('path');
        process.argv[1] = path.resolve(process.argv[1]);

        const CJSModule = NativeModule.require('internal/modules/cjs/loader');

        perf.markMilestone(NODE_PERFORMANCE_MILESTONE_MODULE_LOAD_END);
        perf.markMilestone(
          NODE_PERFORMANCE_MILESTONE_PRELOAD_MODULE_LOAD_START);
        preloadModules();
        perf.markMilestone(
          NODE_PERFORMANCE_MILESTONE_PRELOAD_MODULE_LOAD_END);
        // check if user passed `-c` or `--check` arguments to Node.
        if (process._syntax_check_only != null) {
          const fs = NativeModule.require('fs');
          // read the source
          // 查找文件
          const filename = CJSModule._resolveFilename(process.argv[1]);
          const source = fs.readFileSync(filename, 'utf-8');
          // 检测语法,去掉shebang、BOM等
          checkScriptSyntax(source, filename);
          process.exit(0);
        }
        CJSModule.runMain();
      } else {
        perf.markMilestone(NODE_PERFORMANCE_MILESTONE_MODULE_LOAD_START);
        perf.markMilestone(NODE_PERFORMANCE_MILESTONE_MODULE_LOAD_END);
        perf.markMilestone(
          NODE_PERFORMANCE_MILESTONE_PRELOAD_MODULE_LOAD_START);
        preloadModules();
        perf.markMilestone(
          NODE_PERFORMANCE_MILESTONE_PRELOAD_MODULE_LOAD_END);
        // If -i or --interactive were passed, or stdin is a TTY.
        if (process._forceRepl || NativeModule.require('tty').isatty(0)) {
          // REPL
          const cliRepl = NativeModule.require('internal/repl');
          cliRepl.createInternalRepl(process.env, function(err, repl) {
            if (err) {
              throw err;
            }
            repl.on('exit', function() {
              if (repl._flushing) {
                repl.pause();
                return repl.once('flushHistory', function() {
                  process.exit();
                });
              }
              process.exit();
            });
          });

          if (process._eval != null) {
            // User passed '-e' or '--eval'
            evalScript('[eval]');
          }
        } else {
          // Read all of stdin - execute it.
          process.stdin.setEncoding('utf8');

          let code = '';
          process.stdin.on('data', function(d) {
            code += d;
          });

          process.stdin.on('end', function() {
            if (process._syntax_check_only != null) {
              checkScriptSyntax(code, '[stdin]');
            } else {
              process._eval = code;
              evalScript('[stdin]');
            }
          });
        }
      }

主要做了如下几件事:

1.如果在集群模式下有worder,需要先初始化。主要是实例化一个worker,监听disconnect,newconn等消息
2.预加载用户指定的模块(process._preload_modules)
3.利用CJSModule._resolveFilename查找文件真实路径
4.检测语法,去掉shebang、BOM等
5.执行

CJSModule

1.模块查找

模块查找主要依赖CJSModule._resolveFilename方法,输入为想要引入的模块,输出为模块的真实路径。其代码如下所示:

Module._resolveFilename = function(request, parent, isMain, options) {
  if (NativeModule.nonInternalExists(request)) {
    return request;
  }

  var paths;

  if (typeof options === 'object' && options !== null &&
      Array.isArray(options.paths)) {
    const fakeParent = new Module('', null);

    paths = [];

    for (var i = 0; i < options.paths.length; i++) {
      const path = options.paths[i];
      // parent下的node_modules
      fakeParent.paths = Module._nodeModulePaths(path);
      const lookupPaths = Module._resolveLookupPaths(request, fakeParent, true);

      if (!paths.includes(path))
        paths.push(path);

      for (var j = 0; j < lookupPaths.length; j++) {
        if (!paths.includes(lookupPaths[j]))
          paths.push(lookupPaths[j]);
      }
    }
  } else {
    paths = Module._resolveLookupPaths(request, parent, true);
  }

  // look up the filename first, since that's the cache key.
  var filename = Module._findPath(request, paths, isMain);
  if (!filename) {
    // eslint-disable-next-line no-restricted-syntax
    var err = new Error(`Cannot find module '${request}'`);
    err.code = 'MODULE_NOT_FOUND';
    throw err;
  }
  return filename;
};

上述代码首先利用Module._resolveLookupPaths罗列出所有要查找的路径,在利用Module._findPath在其中查找对应模块。

Module._resolveLookupPaths

Module._resolveLookupPaths代码如下所示:

// 'index.' character codes
// 获取所有的查找路径
// 返回一个数组,第一项为模块名称即request,第二项返回一个可能包含这个模块的文件夹路径数组
var indexChars = [ 105, 110, 100, 101, 120, 46 ];
var indexLen = indexChars.length;
Module._resolveLookupPaths = function(request, parent, newReturn) {
  // 不在lib/internal的native模块
  if (NativeModule.nonInternalExists(request)) {
    debug('looking for %j in []', request);
    return (newReturn ? null : [request, []]);
  }

  // 不以'..'、'./'开头的模块,即require('moduleA')
  // Check for relative path
  if (request.length < 2 ||
      request.charCodeAt(0) !== CHAR_DOT ||
      (request.charCodeAt(1) !== CHAR_DOT &&
       request.charCodeAt(1) !== CHAR_FORWARD_SLASH)) {
    //0:"/Users/tsy/.node_modules"
    //1:"/Users/tsy/.node_libraries"
    //2:"/Users/tsy/.nvm/versions/node/v8.2.1/lib/node"
    var paths = modulePaths;
    if (parent) {
      if (!parent.paths)
        paths = parent.paths = [];
      else
        /**
         * 0:"/Users/tsy/devspace/mis/server/third-party/node_modules"
            1:"/Users/tsy/devspace/mis/server/node_modules"
            2:"/Users/tsy/devspace/mis/node_modules"
            3:"/Users/tsy/devspace/node_modules"
            4:"/Users/tsy/node_modules"
            5:"/Users/node_modules"
            6:"/node_modules"
         */
        paths = parent.paths.concat(paths);
    }

    // Maintain backwards compat with certain broken uses of require('.')
    // by putting the module's directory in front of the lookup paths.
    // require('.')
    if (request === '.') {
      if (parent && parent.filename) {
        paths.unshift(path.dirname(parent.filename));
      } else {
        paths.unshift(path.resolve(request));
      }
    }

    debug('looking for %j in %j', request, paths);
    return (newReturn ? (paths.length > 0 ? paths : null) : [request, paths]);
  }

  // with --eval, parent.id is not set and parent.filename is null
  if (!parent || !parent.id || !parent.filename) {
    // make require('./path/to/foo') work - normally the path is taken
    // from realpath(__filename) but with eval there is no filename
    var mainPaths = ['.'].concat(Module._nodeModulePaths('.'), modulePaths);

    debug('looking for %j in %j', request, mainPaths);
    return (newReturn ? mainPaths : [request, mainPaths]);
  }

  ......

  return (newReturn ? parentDir : [id, parentDir]);
};

上面注释已经写的比较详细了,其中有几点要注意:

modulePaths根据环境变量HOME和NODE_PATH得到的路径,比如我本地得到的路径是:

0:"/Users/tsy/.node_modules"
1:"/Users/tsy/.node_libraries"
2:"/Users/tsy/.nvm/versions/node/v8.2.1/lib/node"

parent.paths是在resolveFilename调用该函数时传递下来的参数,表示从现有目录到根目录下的所有node_modules目录,获取的代码如下:

fakeParent.paths = Module._nodeModulePaths(path);

Module._nodeModulePaths = function(from) {
    // guarantee that 'from' is absolute.
    from = path.resolve(from);
    // Return early not only to avoid unnecessary work, but to *avoid* returning
    // an array of two items for a root: [ '//node_modules', '/node_modules' ]
    if (from === '/')
      return ['/node_modules'];

    // note: this approach *only* works when the path is guaranteed
    // to be absolute.  Doing a fully-edge-case-correct path.split
    // that works on both Windows and Posix is non-trivial.
    const paths = [];
    var p = 0;
    var last = from.length;
    for (var i = from.length - 1; i >= 0; --i) {
      const code = from.charCodeAt(i);
      if (code === CHAR_FORWARD_SLASH) {
        if (p !== nmLen)
          paths.push(from.slice(0, last) + '/node_modules');
        last = i;
        p = 0;
      } else if (p !== -1) {
        if (nmChars[p] === code) {
          ++p;
        } else {
          p = -1;
        }
      }
    }

    // Append /node_modules to handle root paths.
    paths.push('/node_modules');

    return paths;
  };

Module._nodeModulePaths获取从起始目录遍历,每一层都加上node_modules。

Module._findPath

Module._findPath方法实在上面列出的查找目录中找到对应的模块,代码如下:

// 文件查找
// 所以从这里可以看出,对于具体的文件的优先级:
// 1. 具体文件
// 2. 加上后缀
// 3. package.json main
// 4  index加上后缀
// 可能的路径以当前文件夹,nodejs系统文件夹和node_module中的文件夹为候选,以上述顺序找到任意一个,
// 就直接返回
var warned = false;
Module._findPath = function(request, paths, isMain) {
  if (path.isAbsolute(request)) {
    paths = [''];
  } else if (!paths || paths.length === 0) {
    return false;
  }

  var cacheKey = request + '\x00' +
                (paths.length === 1 ? paths[0] : paths.join('\x00'));
  var entry = Module._pathCache[cacheKey];
  if (entry)
    return entry;

  var exts;
  var trailingSlash = request.length > 0 &&
    request.charCodeAt(request.length - 1) === CHAR_FORWARD_SLASH;
  if (!trailingSlash) {
    trailingSlash = /(?:^|\/)\.?\.$/.test(request);
  }

  // For each path
  // 一层层遍历
  for (var i = 0; i < paths.length; i++) {
    // Don't search further if path doesn't exist
    const curPath = paths[i];
    if (curPath && stat(curPath) < 1) continue;
    var basePath = path.resolve(curPath, request);
    var filename;

    var rc = stat(basePath);
    if (!trailingSlash) {
      // 找准确的
      if (rc === 0) {  // File.
        if (preserveSymlinks && !isMain) {
          filename = path.resolve(basePath);
        } else {
          filename = toRealPath(basePath);
        }
      }

      // 拼接后缀的
      if (!filename) {
        // try it with each of the extensions
        if (exts === undefined)
          exts = Object.keys(Module._extensions);
        filename = tryExtensions(basePath, exts, isMain);
      }
    }

    if (!filename && rc === 1) {  // Directory.
      // try it with each of the extensions at "index"
      if (exts === undefined)
        exts = Object.keys(Module._extensions);
      // 找package.json中的main
      filename = tryPackage(basePath, exts, isMain);
      if (!filename) {
        filename = tryExtensions(path.resolve(basePath, 'index'), exts, isMain);
      }
    }

    if (filename) {
      // Warn once if '.' resolved outside the module dir
      if (request === '.' && i > 0) {
        if (!warned) {
          warned = true;
          process.emitWarning(
            'warning: require(\'.\') resolved outside the package ' +
            'directory. This functionality is deprecated and will be removed ' +
            'soon.',
            'DeprecationWarning', 'DEP0019');
        }
      }

      Module._pathCache[cacheKey] = filename;
      return filename;
    }
  }
  return false;
};

这里将遍历所有目录,在相应目录中再查找对应模块;在每个查找目录中,查找模块也会有一定的优先级:

1. 具体文件
2. 加上后缀
3. package.json main
4  index加上后缀

2.执行

执行的过程时调用CJSModule.runMain(),在其中调用Module._load(),Module._load代码如下:

Module._load = function(request, parent, isMain) {
  if (parent) {
    debug('Module._load REQUEST %s parent: %s', request, parent.id);
  }

  if (experimentalModules && isMain) {
    asyncESM.loaderPromise.then((loader) => {
      return loader.import(getURLFromFilePath(request).pathname);
    })
    .catch((e) => {
      decorateErrorStack(e);
      console.error(e);
      process.exit(1);
    });
    return;
  }

  // 获取文件路径
  var filename = Module._resolveFilename(request, parent, isMain);

  // 是否有缓存
  var cachedModule = Module._cache[filename];
  if (cachedModule) {
    updateChildren(parent, cachedModule, true);
    return cachedModule.exports;
  }

  if (NativeModule.nonInternalExists(filename)) {
    debug('load native module %s', request);
    return NativeModule.require(filename);
  }

  // Don't call updateChildren(), Module constructor already does.
  var module = new Module(filename, parent);

  if (isMain) {
    process.mainModule = module;
    module.id = '.';
  }

  Module._cache[filename] = module;

  tryModuleLoad(module, filename);

  return module.exports;
};

过程跟NativeModule中的_compile类似:

1.检查缓存
2.获取文件路径
3.实例化CJSModule
4.执行

执行的过程最终调用了CJSModule._compile方法,而CJSModule._compile最终调用的是vm.runInThisContext。

// create wrapper function
// wrap
var wrapper = Module.wrap(content);

var compiledWrapper = vm.runInThisContext(wrapper, {
filename: filename,
lineOffset: 0,
displayErrors: true
});

vm.runInThisContext其实就是调用了本文上面描述的ContextifyScript的runInThisContext,简要代码如下:

class Script extends ContextifyScript {
......

runInThisContext(options) {
    const { breakOnSigint, args } = getRunInContextArgs(options);
    if (breakOnSigint && process.listenerCount('SIGINT') > 0) {
      return sigintHandlersWrap(super.runInThisContext, this, args);
    } else {
      return super.runInThisContext(...args);
    }
  }

......
}

总结

本文主要从lib/internal中的loader.js和node.js入手,讲述了具体执行js代码的过程,其中还加入了相关的模块查找、vm、contextify等方面的东西。