tsy77 / blog

78 stars 2 forks source link

Node.js源码-一个node程序是如何运行的 #7

Open tsy77 opened 6 years ago

tsy77 commented 6 years ago

本文从node入口出发,一步一步的阅读源码,直到运行结束。

node入口

node的入口是node/src/node_main.cc文件,main函数代码如下:

int main(int argc, char *argv[]) {
#if defined(__POSIX__) && defined(NODE_SHARED_MODE)
  // In node::PlatformInit(), we squash all signal handlers for non-shared lib
  // build. In order to run test cases against shared lib build, we also need
  // to do the same thing for shared lib build here, but only for SIGPIPE for
  // now. If node::PlatformInit() is moved to here, then this section could be
  // removed.
  // socket一端clode的情况下,进程第二次write会触发操作系统给进程发送SIGPIPE信号,默认处理操作是关闭进程
  // SIG_IGN作为处理函数,将忽略该信号
  {
    struct sigaction act;
    memset(&act, 0, sizeof(act));
    act.sa_handler = SIG_IGN;
    sigaction(SIGPIPE, &act, nullptr);
  }
#endif

#if defined(__linux__)
  char** envp = environ;
  while (*envp++ != nullptr) {}
  Elf_auxv_t* auxv = reinterpret_cast<Elf_auxv_t*>(envp);
  for (; auxv->a_type != AT_NULL; auxv++) {
    if (auxv->a_type == AT_SECURE) {
      node::linux_at_secure = auxv->a_un.a_val;
      break;
    }
  }
#endif
  // Disable stdio buffering, it interacts poorly with printf()
  // calls elsewhere in the program (e.g., any logging from V8.)
  setvbuf(stdout, nullptr, _IONBF, 0);
  setvbuf(stderr, nullptr, _IONBF, 0);
  return node::Start(argc, argv);
}
#endif

这里主要做了三件事:

1.屏蔽SIGPIPE信号(具体可看代码注释)
2.定义node::linux_at_secure,这里是根据linux中的Elf32_auxv_t(动态链接器所需的辅助信息)
3.node::Start(argc, argv)

node::Start执行流程

node::Start代码如下:

int Start(int argc, char** argv) {
  atexit([] () { uv_tty_reset_mode(); });
  PlatformInit();
  performance::performance_node_start = PERFORMANCE_NOW();

  CHECK_GT(argc, 0);

  // Hack around with the argv pointer. Used for process.title = "blah".
  argv = uv_setup_args(argc, argv);

  // This needs to run *before* V8::Initialize().  The const_cast is not
  // optional, in case you're wondering.
  int exec_argc;
  const char** exec_argv;
  Init(&argc, const_cast<const char**>(argv), &exec_argc, &exec_argv);

#if HAVE_OPENSSL
  {
    std::string extra_ca_certs;
    if (SafeGetenv("NODE_EXTRA_CA_CERTS", &extra_ca_certs))
      crypto::UseExtraCaCerts(extra_ca_certs);
  }
#ifdef NODE_FIPS_MODE
  // In the case of FIPS builds we should make sure
  // the random source is properly initialized first.
  OPENSSL_init();
#endif  // NODE_FIPS_MODE
  // V8 on Windows doesn't have a good source of entropy. Seed it from
  // OpenSSL's pool.
  V8::SetEntropySource(crypto::EntropySource);
#endif  // HAVE_OPENSSL

  v8_platform.Initialize(v8_thread_pool_size);
  // Enable tracing when argv has --trace-events-enabled.
  v8_platform.StartTracingAgent();
  V8::Initialize();
  performance::performance_v8_start = PERFORMANCE_NOW();
  v8_initialized = true;
  const int exit_code =
      Start(uv_default_loop(), argc, argv, exec_argc, exec_argv);
  v8_platform.StopTracingAgent();
  v8_initialized = false;
  V8::Dispose();

  // uv_run cannot be called from the time before the beforeExit callback
  // runs until the program exits unless the event loop has any referenced
  // handles after beforeExit terminates. This prevents unrefed timers
  // that happen to terminate during shutdown from being run unsafely.
  // Since uv_run cannot be called, uv_async handles held by the platform
  // will never be fully cleaned up.
  v8_platform.Dispose();

  delete[] exec_argv;
  exec_argv = nullptr;

  return exit_code;
}

1.PlatformInit

inline void PlatformInit() {
#ifdef __POSIX__
#if HAVE_INSPECTOR
  // 信号集,描述信号的集合
  // 每个信号占用一位(64位)
  sigset_t sigmask;
  sigemptyset(&sigmask);
  sigaddset(&sigmask, SIGUSR1);
  // 屏蔽了除SIGUSR1外的所有信号
  // 一般按照sigdelset(&set, SIGALRM);pthread_sigmask(SIG_SETMASK, &set, NULL);方式使用
  const int err = pthread_sigmask(SIG_SETMASK, &sigmask, nullptr);
#endif  // HAVE_INSPECTOR

  // Make sure file descriptors 0-2 are valid before we start logging anything.
  for (int fd = STDIN_FILENO; fd <= STDERR_FILENO; fd += 1) {
    struct stat ignored;
    if (fstat(fd, &ignored) == 0)
      continue;
    // Anything but EBADF means something is seriously wrong.  We don't
    // have to special-case EINTR, fstat() is not interruptible.
    if (errno != EBADF)
      ABORT();
    if (fd != open("/dev/null", O_RDWR))
      ABORT();
  }

#if HAVE_INSPECTOR
  CHECK_EQ(err, 0);
#endif  // HAVE_INSPECTOR

#ifndef NODE_SHARED_MODE
  // Restore signal dispositions, the parent process may have changed them.
  struct sigaction act;
  memset(&act, 0, sizeof(act));

  // The hard-coded upper limit is because NSIG is not very reliable; on Linux,
  // it evaluates to 32, 34 or 64, depending on whether RT signals are enabled.
  // Counting up to SIGRTMIN doesn't work for the same reason.
  // 跟main中一样,忽略SIGPIPE信号
  // sigaction与pthread_sigmask区别在于线程中调用signal或者sigaction等函数会改变所有线程中的信号处理函数
  for (unsigned nr = 1; nr < kMaxSignal; nr += 1) {
    if (nr == SIGKILL || nr == SIGSTOP)
      continue;
    act.sa_handler = (nr == SIGPIPE) ? SIG_IGN : SIG_DFL;
    CHECK_EQ(0, sigaction(nr, &act, nullptr));
  }
#endif  // !NODE_SHARED_MODE

  RegisterSignalHandler(SIGINT, SignalExit, true);
  RegisterSignalHandler(SIGTERM, SignalExit, true);

  // Raise the open file descriptor limit.
  // 提高进程打开文件数量
  struct rlimit lim;
  if (getrlimit(RLIMIT_NOFILE, &lim) == 0 && lim.rlim_cur != lim.rlim_max) {
    // Do a binary search for the limit.
    rlim_t min = lim.rlim_cur;
    rlim_t max = 1 << 20;
    // But if there's a defined upper bound, don't search, just set it.
    if (lim.rlim_max != RLIM_INFINITY) {
      min = lim.rlim_max;
      max = lim.rlim_max;
    }
    do {
      lim.rlim_cur = min + (max - min) / 2;
      if (setrlimit(RLIMIT_NOFILE, &lim)) {
        max = lim.rlim_cur;
      } else {
        min = lim.rlim_cur;
      }
    } while (min + 1 < max);
  }
#endif  // __POSIX__
#ifdef _WIN32
  for (int fd = 0; fd <= 2; ++fd) {
    auto handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
    if (handle == INVALID_HANDLE_VALUE ||
        GetFileType(handle) == FILE_TYPE_UNKNOWN) {
      // Ignore _close result. If it fails or not depends on used Windows
      // version. We will just check _open result.
      _close(fd);
      if (fd != _open("nul", _O_RDWR))
        ABORT();
    }
  }
#endif  // _WIN32
}

主要以下几件事:

1.利用pthread_sigmask阻塞了线程除SIGUSR1外的所有信号
2.利用STDIN_FILENO、STDERR_FILENO,确定标准输入、输出的文件描述符可用,已备后面去打log
3.对非共享库做信号处理,忽略SIGPIPE信号,跟上述node_main中对共享库做的操作一样
4.利用sigaction注册信号SIGINT、SIGTERM处理函数,当然处理函数是exit
5.提高进程打开文件数量

下面我将挑一些重点的点来讲解。

pthread_sigmask sigaction

pthread_sigmask用来设置线程的信号屏蔽集,注意这里是线程自己的;sigaction用来安装信号的处理函数,这里操作的进程的,进程中所有线程会共享这个出个处理函数。也就是说线程可以有自己的信号屏蔽集,但是处理函数是进程中所有线程共享的。

提高进程打开文件描述符数量

依据上述代码,我们发现其使用的是setrlimit方法,当rlimit中有max属性时,直接setrlimit;没有max属性时,从lim.rlim_cur到2的19次方之间指数递增。

2.uv_setup_args(argc, argv)

其实就是复制一份argv,返回new_argv,给process.title用。

3.Init

void Init(int* argc,
          const char** argv,
          int* exec_argc,
          const char*** exec_argv) {
  // Initialize prog_start_time to get relative uptime.
  prog_start_time = static_cast<double>(uv_now(uv_default_loop()));

  // Register built-in modules
  // 注册内置模块
  RegisterBuiltinModules();

  // Make inherited handles noninheritable.
  // disable掉继承过来的handle
  uv_disable_stdio_inheritance();

#if defined(NODE_V8_OPTIONS)
  // Should come before the call to V8::SetFlagsFromCommandLine()
  // so the user can disable a flag --foo at run-time by passing
  // --no_foo from the command line.
  // 设置v8虚拟机启动的命令行标志
  V8::SetFlagsFromString(NODE_V8_OPTIONS, sizeof(NODE_V8_OPTIONS) - 1);
#endif

  // 从环境变量中获取各种参数
  {
    std::string text;
    config_pending_deprecation =
        SafeGetenv("NODE_PENDING_DEPRECATION", &text) && text[0] == '1';
  }

  // Allow for environment set preserving symlinks.
  {
    std::string text;
    config_preserve_symlinks =
        SafeGetenv("NODE_PRESERVE_SYMLINKS", &text) && text[0] == '1';
  }

  if (config_warning_file.empty())
    SafeGetenv("NODE_REDIRECT_WARNINGS", &config_warning_file);

#if HAVE_OPENSSL
  if (openssl_config.empty())
    SafeGetenv("OPENSSL_CONF", &openssl_config);
#endif

#if !defined(NODE_WITHOUT_NODE_OPTIONS)
  std::string node_options;
  if (SafeGetenv("NODE_OPTIONS", &node_options)) {
    // Smallest tokens are 2-chars (a not space and a space), plus 2 extra
    // pointers, for the prepended executable name, and appended NULL pointer.
    size_t max_len = 2 + (node_options.length() + 1) / 2;
    const char** argv_from_env = new const char*[max_len];
    int argc_from_env = 0;
    // [0] is expected to be the program name, fill it in from the real argv.
    argv_from_env[argc_from_env++] = argv[0];

    char* cstr = strdup(node_options.c_str());
    char* initptr = cstr;
    char* token;
    while ((token = strtok(initptr, " "))) {  // NOLINT(runtime/threadsafe_fn)
      initptr = nullptr;
      argv_from_env[argc_from_env++] = token;
    }
    argv_from_env[argc_from_env] = nullptr;
    int exec_argc_;
    const char** exec_argv_ = nullptr;
    ProcessArgv(&argc_from_env, argv_from_env, &exec_argc_, &exec_argv_, true);
    delete[] exec_argv_;
    delete[] argv_from_env;
    free(cstr);
  }
#endif

  // 获取node和v8的参数
  ProcessArgv(argc, argv, exec_argc, exec_argv);

#if defined(NODE_HAVE_I18N_SUPPORT)
  // If the parameter isn't given, use the env variable.
  if (icu_data_dir.empty())
    SafeGetenv("NODE_ICU_DATA", &icu_data_dir);
  // Initialize ICU.
  // If icu_data_dir is empty here, it will load the 'minimal' data.
  if (!i18n::InitializeICUDirectory(icu_data_dir)) {
    fprintf(stderr,
            "%s: could not initialize ICU "
            "(check NODE_ICU_DATA or --icu-data-dir parameters)\n",
            argv[0]);
    exit(9);
  }
#endif

  // Needed for access to V8 intrinsics.  Disabled again during bootstrapping,
  // see lib/internal/bootstrap/node.js.
  // 允许用户代码去调用v8的内置函数
  // 调用方式以%开头,谋面大家会看见
  const char allow_natives_syntax[] = "--allow_natives_syntax";
  V8::SetFlagsFromString(allow_natives_syntax,
                         sizeof(allow_natives_syntax) - 1);

  // We should set node_is_initialized here instead of in node::Start,
  // otherwise embedders using node::Init to initialize everything will not be
  // able to set it and native modules will not load for them.
  node_is_initialized = true;
}

Init方法主要做了以下几件事:

1.注册内置模块
2.disable掉继承过来的文件描述符
3.设置v8虚拟机启动的命令行标志
4.利用getenv(),从环境变量中获取各种参数
5.获取node和v8的运行参数exec_argv
6.设置v8标志--allow_natives_syntax

还是挑几个重点讲解一下

RegisterBuiltinModules

注册内置模块,也就是src里的.cc文件。

void RegisterBuiltinModules() {
#define V(modname) _register_##modname();
  NODE_BUILTIN_MODULES(V)
#undef V
}

RegisterBuiltinModules做了两件事:

1.宏定义V
2.调用NODE_BUILTIN_MODULES

NODE_BUILTIN_MODULES也是一个宏定义,定义如下:

#define NODE_BUILTIN_MODULES(V)                                               \
  NODE_BUILTIN_STANDARD_MODULES(V)                                            \
  NODE_BUILTIN_OPENSSL_MODULES(V)                                             \
  NODE_BUILTIN_ICU_MODULES(V)

NODE_BUILTIN_STANDARD_MODULES定义如下:

#define NODE_BUILTIN_STANDARD_MODULES(V)                                      \
    V(async_wrap)                                                             \
    V(buffer)                                                                 \
    V(cares_wrap)                                                             \
    ......

也就是注册每个模块,其实调用了register##modname()。

_register_##modname()定义如下:

static node::node_module _module = {                                        \
    NODE_MODULE_VERSION,                                                      \
    flags,                                                                    \
    nullptr,                                                                  \
    __FILE__,                                                                 \
    nullptr,                                                                  \
    (node::addon_context_register_func) (regfunc),                            \
    NODE_STRINGIFY(modname),                                                  \
    priv,                                                                     \
    nullptr                                                                   \
  };                                                                          \
  void _register_ ## modname() {                                              \
    node_module_register(&_module);                                           \
  }

node_module_register定义在src/node.cc中,源码如下:

extern "C" void node_module_register(void* m) {
  struct node_module* mp = reinterpret_cast<struct node_module*>(m);

  if (mp->nm_flags & NM_F_BUILTIN) {
    mp->nm_link = modlist_builtin;
    modlist_builtin = mp;
  } else if (mp->nm_flags & NM_F_INTERNAL) {
    mp->nm_link = modlist_internal;
    modlist_internal = mp;
  } else if (!node_is_initialized) {
    // "Linked" modules are included as part of the node project.
    // Like builtins they are registered *before* node::Init runs.
    mp->nm_flags = NM_F_LINKED;
    mp->nm_link = modlist_linked;
    modlist_linked = mp;
  } else {
    modpending = mp;
  }
}

其实就是把上面定义的module加到了modlist_builtin链表里。

uv_disable_stdio_inheritance

void uv_disable_stdio_inheritance(void) {
  int fd;

  /* Set the CLOEXEC flag on all open descriptors. Unconditionally try the
   * first 16 file descriptors. After that, bail out after the first error.
   */
  for (fd = 0; ; fd++)
    if (uv__cloexec(fd, 1) && fd > 15)
      break;
}

其实就是利用了cloexec,在子进程执行时,关闭相应文件描述符。这里多说几句,为什么要这样呢?原因在于当fork子进程时,会将父进程文件描述符及堆栈信息复制到子进程,但当子进程执行时,原有执行栈被重置,原有的文件描述符对应变量也就不见了,所以将无法关闭对应文件描述符。cloexec就是为了解决这个问题的,在子进程执行时,关闭文件描述符。

--allow_natives_syntax

V8通过设置--allow_natives_syntax来允许用户的代码调用v8的内置函数,但调用时要以%开头。

4.判断OPENSSL

#if HAVE_OPENSSL
  {
    std::string extra_ca_certs;
    if (SafeGetenv("NODE_EXTRA_CA_CERTS", &extra_ca_certs))
      crypto::UseExtraCaCerts(extra_ca_certs);
  }

主要判断是否需要openssl,如果需要从NODE_EXTRA_CA_CERTS中取证书。

5.v8_platform.Initialize

void Initialize(int thread_pool_size) {
    tracing_agent_.reset(new tracing::Agent(trace_file_pattern));
    platform_ = new NodePlatform(thread_pool_size,
        tracing_agent_->GetTracingController());
    V8::InitializePlatform(platform_);
    tracing::TraceEventHelper::SetTracingController(
        tracing_agent_->GetTracingController());
  }

主要对V8做了线程池容积的初始化。

6.V8::Initialize();

这里是v8的初始化,定义再src/deps/v8/src/v8.cc中,

bool V8::Initialize() {
  InitializeOncePerProcess();
  return true;
}

InitializeOncePerProcess做了什么呢?

void V8::InitializeOncePerProcess() {
  base::CallOnce(&init_once, &InitializeOncePerProcessImpl);
}

CallOnce

CallOnce顾名思义就是只调用一次,其通过判断init_once是否为ONCE_STATE_DONE来判断是否曾经调用过。

inline void CallOnce(OnceType* once, NoArgFunction init_func) {
  if (Acquire_Load(once) != ONCE_STATE_DONE) {
    CallOnceImpl(once, init_func);
  }
}

其中Acquire_Load为原子性的获取once的值,CallOnceImpl则再其中修改once值,并且执行init_func。

下面我们看下Acquire_Load的定义:

inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
  return __atomic_load_n(ptr, __ATOMIC_ACQUIRE);
}

__atomic_load_n即为原子性的加载ptr指针所指向的内存所存储的变量。

CallOnceImpl代码如下:

if (state == ONCE_STATE_UNINITIALIZED) {
    // We are the first thread to call this function, so we have to call the
    // function.
    init_func();
    Release_Store(once, ONCE_STATE_DONE);

主要做了两件事:

1.执行init_func
2.原子性的设置once的值,表明在该进程中,已经执行过了。

InitializeOncePerProcessImpl

void V8::InitializeOncePerProcessImpl() {
  FlagList::EnforceFlagImplications();

  if (FLAG_predictable && FLAG_random_seed == 0) {
    // Avoid random seeds in predictable mode.
    FLAG_random_seed = 12347;
  }

  if (FLAG_stress_compaction) {
    FLAG_force_marking_deque_overflows = true;
    FLAG_gc_global = true;
    FLAG_max_semi_space_size = 1;
  }

  base::OS::Initialize(FLAG_hard_abort, FLAG_gc_fake_mmap);

  if (FLAG_random_seed) SetRandomMmapSeed(FLAG_random_seed);

  // 初始化线程
  // 创建TLS,thread_table_data等
  Isolate::InitializeOncePerProcess();

#if defined(USE_SIMULATOR)
  Simulator::InitializeOncePerProcess();
#endif
  sampler::Sampler::SetUp();
  CpuFeatures::Probe(false);
  ElementsAccessor::InitializeOncePerProcess();
  ExternalReference::SetUp();
  Bootstrapper::InitializeOncePerProcess();
}

这里主要做了两件事:

1.操作系统相关的初始化
2.初始化线程,创建TLS,thread_table_data等

Isolate::InitializeOncePerProcess

void Isolate::InitializeOncePerProcess() {
  // 管理互斥锁(二元信号量),lock_guard类似智能指针
  // 栈销毁时析构
  // A lock guard is an object that manages a mutex object by keeping it always locked.
  base::LockGuard<base::Mutex> lock_guard(thread_data_table_mutex_.Pointer());
  CHECK_NULL(thread_data_table_);
  // pthread_create_key()
  // 线程局部存储,TLSaloc();
  isolate_key_ = base::Thread::CreateThreadLocalKey();
#if DEBUG
  base::Relaxed_Store(&isolate_key_created_, 1);
#endif
  thread_id_key_ = base::Thread::CreateThreadLocalKey();
  per_isolate_thread_data_key_ = base::Thread::CreateThreadLocalKey();
  // ThreadDataTable为list链表
  thread_data_table_ = new Isolate::ThreadDataTable();
}

主要做了三件事:

1.加互斥锁
2.利用TLSaloc申请线程本地存储
3.创建thread_data_table_链表

7.Start(uv_default_loop(), argc, argv, exec_argc, exec_argv)

inline int Start(uv_loop_t* event_loop,
                 int argc, const char* const* argv,
                 int exec_argc, const char* const* exec_argv) {
  Isolate::CreateParams params;
  // BufferAllocator,node中buffer不会占用V8分配的内存,而是直接从堆中申请
  ArrayBufferAllocator allocator;
  params.array_buffer_allocator = &allocator;
#ifdef NODE_ENABLE_VTUNE_PROFILING
  params.code_event_handler = vTune::GetVtuneCodeEventHandler();
#endif

  Isolate* const isolate = Isolate::New(params);
  if (isolate == nullptr)
    return 12;  // Signal internal error.

  // 给isolate添加监听处理函数,这里监听message级别
  isolate->AddMessageListener(OnMessage);
  isolate->SetAbortOnUncaughtExceptionCallback(ShouldAbortOnUncaughtException);
  isolate->SetMicrotasksPolicy(v8::MicrotasksPolicy::kExplicit);
  isolate->SetFatalErrorHandler(OnFatalError);
  isolate->SetAllowWasmCodeGenerationCallback(AllowWasmCodeGenerationCallback);

  {
    // lock_guard的升级版本
    Mutex::ScopedLock scoped_lock(node_isolate_mutex);
    CHECK_EQ(node_isolate, nullptr);
    node_isolate = isolate;
  }

  int exit_code;
  {
    // 加互斥锁,因为isolate不是线程安全的
    Locker locker(isolate);
    Isolate::Scope isolate_scope(isolate);
    HandleScope handle_scope(isolate);
    IsolateData isolate_data(
        isolate,
        event_loop,
        v8_platform.Platform(),
        allocator.zero_fill_field());
    if (track_heap_objects) {
      isolate->GetHeapProfiler()->StartTrackingHeapObjects(true);
    }
    exit_code = Start(isolate, &isolate_data, argc, argv, exec_argc, exec_argv);
  }

  {
    Mutex::ScopedLock scoped_lock(node_isolate_mutex);
    CHECK_EQ(node_isolate, isolate);
    node_isolate = nullptr;
  }

  isolate->Dispose();

  return exit_code;
}

这里主要做了如下几件事:

1.初始化isolate的params,这里需要注意的是array_buffer_allocator,设置这个分配器是为了分配buffer时使用,node中buffer不会占用V8的内存,而是直接从堆中申请,这也是buffer不受v8内存限制的原因
2.创建Isolate
3.给Isolate添加监听回调
4.Start(isolate, &isolate_data, argc, argv, exec_argc, exec_argv)

array_buffer_allocator

ArrayBufferAllocator::Allocate其实就是调用了realloc,在原来基础上将pointer所指向的内存大小增加到full_size。

allocated = realloc(pointer, full_size);

Isolate添加监听回调

以AddMessageListener为例,其实最终调用的是Isolate::AddMessageListenerWithErrorLevel,代码如下:

bool Isolate::AddMessageListenerWithErrorLevel(MessageCallback that,
                                               int message_levels,
                                               Local<Value> data) {
  i::Isolate* isolate = reinterpret_cast<i::Isolate*>(this);
  ENTER_V8_NO_SCRIPT_NO_EXCEPTION(isolate);
  i::HandleScope scope(isolate);
  i::Handle<i::TemplateList> list = isolate->factory()->message_listeners();
  i::Handle<i::FixedArray> listener = isolate->factory()->NewFixedArray(3);
  i::Handle<i::Foreign> foreign =
      isolate->factory()->NewForeign(FUNCTION_ADDR(that));
  listener->set(0, *foreign);
  listener->set(1, data.IsEmpty() ? isolate->heap()->undefined_value()
                                  : *Utils::OpenHandle(*data));
  listener->set(2, i::Smi::FromInt(message_levels));
  list = i::TemplateList::Add(isolate, list, listener);
  isolate->heap()->SetMessageListeners(*list);
  return true;
}

其实就是给堆内存增加了监听,在message_listeners中加入对应listener。

8.Start(isolate, &isolate_data, argc, argv, exec_argc, exec_argv)

inline int Start(Isolate* isolate, IsolateData* isolate_data,
                 int argc, const char* const* argv,
                 int exec_argc, const char* const* exec_argv) {
  HandleScope handle_scope(isolate);
  Local<Context> context = NewContext(isolate);
  Context::Scope context_scope(context);
  Environment env(isolate_data, context, v8_platform.GetTracingAgent());
  // 初始化uv handle、process
  env.Start(argc, argv, exec_argc, exec_argv, v8_is_profiling);

  const char* path = argc > 1 ? argv[1] : nullptr;
  StartInspector(&env, path, debug_options);

  if (debug_options.inspector_enabled() && !v8_platform.InspectorStarted(&env))
    return 12;  // Signal internal error.

  env.set_abort_on_uncaught_exception(abort_on_uncaught_exception);

  if (no_force_async_hooks_checks) {
    env.async_hooks()->no_force_checks();
  }

  {
    Environment::AsyncCallbackScope callback_scope(&env);
    env.async_hooks()->push_async_ids(1, 0);
    LoadEnvironment(&env);
    env.async_hooks()->pop_async_id(1);
  }

  ......
}

这里主要做了如下几件事:

1.调用env.Start()来初始化uv handle、process
2.LoadEnvironment()

LoadEnvironment()

void LoadEnvironment(Environment* env) {
  HandleScope handle_scope(env->isolate());

  TryCatch try_catch(env->isolate());
  // Disable verbose mode to stop FatalException() handler from trying
  // to handle the exception. Errors this early in the start-up phase
  // are not safe to ignore.
  try_catch.SetVerbose(false);

  // The bootstrapper scripts are lib/internal/bootstrap/loaders.js and
  // lib/internal/bootstrap/node.js, each included as a static C string
  // defined in node_javascript.h, generated in node_javascript.cc by
  // node_js2c.
  Local<String> loaders_name =
      FIXED_ONE_BYTE_STRING(env->isolate(), "internal/bootstrap/loaders.js");
  // LoadersBootstrapperSource从node_js2c中获取loaders.js的ascII源码
  Local<Function> loaders_bootstrapper =
      GetBootstrapper(env, LoadersBootstrapperSource(env), loaders_name);
  Local<String> node_name =
      FIXED_ONE_BYTE_STRING(env->isolate(), "internal/bootstrap/node.js");
  Local<Function> node_bootstrapper =
      GetBootstrapper(env, NodeBootstrapperSource(env), node_name);

  // Add a reference to the global object
  Local<Object> global = env->context()->Global();

#if defined HAVE_DTRACE || defined HAVE_ETW
  InitDTrace(env, global);
#endif

#if defined HAVE_PERFCTR
  InitPerfCounters(env, global);
#endif

  // Enable handling of uncaught exceptions
  // (FatalException(), break on uncaught exception in debugger)
  //
  // This is not strictly necessary since it's almost impossible
  // to attach the debugger fast enough to break on exception
  // thrown during process startup.
  try_catch.SetVerbose(true);

  env->SetMethod(env->process_object(), "_rawDebug", RawDebug);

  // Expose the global object as a property on itself
  // (Allows you to set stuff on `global` from anywhere in JavaScript.)
  global->Set(FIXED_ONE_BYTE_STRING(env->isolate(), "global"), global);

  // Create binding loaders
  v8::Local<v8::Function> get_binding_fn =
      env->NewFunctionTemplate(GetBinding)->GetFunction(env->context())
          .ToLocalChecked();

  v8::Local<v8::Function> get_linked_binding_fn =
      env->NewFunctionTemplate(GetLinkedBinding)->GetFunction(env->context())
          .ToLocalChecked();

  v8::Local<v8::Function> get_internal_binding_fn =
      env->NewFunctionTemplate(GetInternalBinding)->GetFunction(env->context())
          .ToLocalChecked();

  Local<Value> loaders_bootstrapper_args[] = {
    env->process_object(),
    get_binding_fn,
    get_linked_binding_fn,
    get_internal_binding_fn
  };

  // Bootstrap internal loaders
  Local<Value> bootstrapped_loaders;
  if (!ExecuteBootstrapper(env, loaders_bootstrapper,
                           arraysize(loaders_bootstrapper_args),
                           loaders_bootstrapper_args,
                           &bootstrapped_loaders)) {
    return;
  }

  // Bootstrap Node.js
  Local<Value> bootstrapped_node;
  // bootstrapped_loaders中是loaders_bootstrapper执行返回的{ internalBinding, NativeModule }
  Local<Value> node_bootstrapper_args[] = {
    env->process_object(),
    bootstrapped_loaders
  };
  if (!ExecuteBootstrapper(env, node_bootstrapper,
                           arraysize(node_bootstrapper_args),
                           node_bootstrapper_args,
                           &bootstrapped_node)) {
    return;
  }
}

这里主要做了以下几件事:

1.从node_javascript.cc中获取node.js、loaders.js的ascII源码,这里的node_javascript.cc在[上一篇文章](https://github.com/tsy77/blog/issues/6)中有过简单介绍,通过js2c.py将./lib中所有js文件的ascII码存入node_javascript.cc中。
2.创建v8::Local<v8::Function> get_binding_fn、get_linked_binding_fn、get_internal_binding_fn
3.执行loader.js和node.js。在node.js中,运行了我们想要执行的js文件。

node_js2c

下面便是node_javascript.cc中的一部分:

static const uint8_t raw_internal_bootstrap_loaders_key[] = { 105,110,116,101,114,110,97,108,47,98,111,111,116,115,116,114,97,112,47,108,
111,97,100,101,114,115 };
static struct : public v8::String::ExternalOneByteStringResource {
  const char* data() const override {
    return reinterpret_cast<const char*>(raw_internal_bootstrap_loaders_key);
  }
  size_t length() const override { return arraysize(raw_internal_bootstrap_loaders_key); }
  void Dispose() override { /* Default calls `delete this`. */ }
  v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) {
    return v8::String::NewExternalOneByte(isolate, this).ToLocalChecked();
  }
} internal_bootstrap_loaders_key;

static const uint8_t raw_internal_bootstrap_loaders_value[] = { 47,47,32,84,104,105,115,32,102,105,108,101,32,99,114,101,97,116,101,115,
32,116,104,101,32,105,110,116,101,114,110,97,108,32,109,111,100,117,108,101,
32,38,32,98,105,110,100,105,110,103,32,108,111,97,100,101,114,115,32,117,
115,101,100,32,98,121,32,98,117,105,108,116,45,105,110,10,47,47,32,109,
111,100,117,108,101,115,46,32,73,110,32,99,111,110,116,114,97,115,116,44,
32,117,115,101,114,32,108,97,110,100,32,109,111,100,117,108,101,115,32,97,
114,101,32,108,111,97,100,101,100, };
static struct : public v8::String::ExternalOneByteStringResource {
  const char* data() const override {
    return reinterpret_cast<const char*>(raw_internal_bootstrap_loaders_value);
  }
  size_t length() const override { return arraysize(raw_internal_bootstrap_loaders_value); }
  void Dispose() override { /* Default calls `delete this`. */ }
  v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) {
    return v8::String::NewExternalOneByte(isolate, this).ToLocalChecked();
  }
} internal_bootstrap_loaders_value;

我们可以看到两个数组和两个struct,其中raw_internal_bootstrap_loaders_key和raw_internal_bootstrap_loaders_value分别记录bootstrap_loaders的key和value(文件内容),两个结构体internal_bootstrap_loaders_key和internal_bootstrap_loaders_value均有方法ToStringChecked,而ToStringChecked其实会去找data()方法,也就是说internal_bootstrap_loaders_value.ToStringChecked()便会返回对应的ascII码。

node_javascript.cc又是如何产生的呢?

{
      'target_name': 'node_js2c',
      'type': 'none',
      'toolsets': ['host'],
      'actions': [
        {
          'action_name': 'node_js2c',
          'process_outputs_as_sources': 1,
          'inputs': [
            '<@(library_files)',
            './config.gypi',
            'tools/check_macros.py'
          ],
          'outputs': [
            '<(SHARED_INTERMEDIATE_DIR)/node_javascript.cc',
          ],
          'conditions': [
            [ 'node_use_dtrace=="false" and node_use_etw=="false"', {
              'inputs': [ 'src/notrace_macros.py' ]
            }],
            [ 'node_use_perfctr=="false"', {
              'inputs': [ 'src/noperfctr_macros.py' ]
            }],
            [ 'node_debug_lib=="false"', {
              'inputs': [ 'tools/nodcheck_macros.py' ]
            }],
            [ 'node_debug_lib=="true"', {
              'inputs': [ 'tools/dcheck_macros.py' ]
            }]
          ],
          'action': [
            'python',
            'tools/js2c.py',
            '<@(_outputs)',
            '<@(_inputs)',
          ],
        },
      ],

我看看到在node.gyp中定义了action,其实就是调用了python tools/js2c.py,这个后面文章再来介绍吧,这里先简单提一下。

GetBinding

getBinding又是干什么的呢?

static void GetBinding(const FunctionCallbackInfo<Value>& args) {
  Environment* env = Environment::GetCurrent(args);

  CHECK(args[0]->IsString());

  Local<String> module = args[0].As<String>();
  node::Utf8Value module_v(env->isolate(), module);

  node_module* mod = get_builtin_module(*module_v);
  Local<Object> exports;
  if (mod != nullptr) {
    exports = InitModule(env, mod, module);
  } else if (!strcmp(*module_v, "constants")) {
    exports = Object::New(env->isolate());
    CHECK(exports->SetPrototype(env->context(),
                                Null(env->isolate())).FromJust());
    DefineConstants(env->isolate(), exports);
  } else if (!strcmp(*module_v, "natives")) {
    exports = Object::New(env->isolate());
    DefineJavaScript(env, exports);
  } else {
    return ThrowIfNoSuchModule(env, *module_v);
  }

  args.GetReturnValue().Set(exports);
}

我们不难发现,逻辑上有三个分叉:

1. get_builtin_module,获取buildin模块,如果获取到了(是buildin模块),exports = InitModule(env, mod, module);
2.如果是常量,DefineConstants
3.如果是natives,DefineJavaScript

get_builtin_module又是怎么做的呢?

node_module* get_builtin_module(const char* name) {
  return FindModule(modlist_builtin, name, NM_F_BUILTIN);
}

inline struct node_module* FindModule(struct node_module* list,
                                      const char* name,
                                      int flag) {
  struct node_module* mp;

  for (mp = list; mp != nullptr; mp = mp->nm_link) {
    if (strcmp(mp->nm_modname, name) == 0)
      break;
  }

  CHECK(mp == nullptr || (mp->nm_flags & flag) != 0);
  return mp;
}

很简单,就是从modlist_builtin里面遍历,上述的Init函数中调用RegisterBuiltinModules将所有的内置模块加入到链表modlist_builtin中。

InitModule其实就是执行了module::Initialize(),以async_wrap为例:

oid AsyncWrap::Initialize(Local<Object> target,
                           Local<Value> unused,
                           Local<Context> context) {
  Environment* env = Environment::GetCurrent(context);
  Isolate* isolate = env->isolate();
  HandleScope scope(isolate);

  env->BeforeExit(DestroyAsyncIdsCallback, env);

  env->SetMethod(target, "setupHooks", SetupHooks);
  env->SetMethod(target, "pushAsyncIds", PushAsyncIds);
  env->SetMethod(target, "popAsyncIds", PopAsyncIds);
  env->SetMethod(target, "queueDestroyAsyncId", QueueDestroyAsyncId);
  env->SetMethod(target, "enablePromiseHook", EnablePromiseHook);
  env->SetMethod(target, "disablePromiseHook", DisablePromiseHook);
  env->SetMethod(target, "registerDestroyHook", RegisterDestroyHook);

  ......

  env->set_async_hooks_init_function(Local<Function>());
  env->set_async_hooks_before_function(Local<Function>());
  env->set_async_hooks_after_function(Local<Function>());
  env->set_async_hooks_destroy_function(Local<Function>());
  env->set_async_hooks_promise_resolve_function(Local<Function>());
  env->set_async_hooks_binding(target);
}

上述async_wrap中可以看到其实就是在exports上挂载各种方法,然后初始化。

DefineJavaScript干了什么呢?

CHECK(target->Set(env->context(),
                  internal_bootstrap_loaders_key.ToStringChecked(env->isolate()),
                  internal_bootstrap_loaders_value.ToStringChecked(env->isolate())).FromJust());

我们看到其实就是将node_javascript.cc中的模块以key/value的形式挂载到exports,这里可以注意下上面提到的ToStringChecked。

ExecuteBootstrapper

这里就是执行internal/loader.js和internal/node.js,这里先简单讲下,后面会做详细介绍。其最主要的逻辑如下:

if (process._syntax_check_only != null) {
          const fs = NativeModule.require('fs');
          // read the source
          const filename = CJSModule._resolveFilename(process.argv[1]);
          const source = fs.readFileSync(filename, 'utf-8');
          checkScriptSyntax(source, filename);
          process.exit(0);
        }
        CJSModule.runMain();

检测语法,然后执行。

8.资源释放

v8_platform.StopTracingAgent();
  v8_initialized = false;
  V8::Dispose();

  // uv_run cannot be called from the time before the beforeExit callback
  // runs until the program exits unless the event loop has any referenced
  // handles after beforeExit terminates. This prevents unrefed timers
  // that happen to terminate during shutdown from being run unsafely.
  // Since uv_run cannot be called, uv_async handles held by the platform
  // will never be fully cleaned up.
  v8_platform.Dispose();

  delete[] exec_argv;
  exec_argv = nullptr;

  return exit_code;

这里把v8_platform、exec_argv等资源释放,此次运行结束。

总结

本次主要沿着node::Start函数的逻辑,将运行一个node程序完整的流程呈现给大家,后面会对其中涉及的一些点以及一些模块进行分别介绍。