raszi / node-tmp

Temporary file and directory creator for node.js
MIT License
737 stars 93 forks source link

Duplicate delete in `_rmdirRecursiveSync` #62

Closed voltrevo closed 9 years ago

voltrevo commented 9 years ago

The iterative solution to _rmdirRecursiveSync introduced a bug:

function _rmdirRecursiveSync(root) {
  var dirs = [root];

  do {
    var
      dir = dirs.pop(),
      canRemove = true,
      files = fs.readdirSync(dir);

    for (var i = 0, length = files.length; i < length; i++) {
      var
        file = path.join(dir, files[i]),
        stat = fs.lstatSync(file); // lstat so we don't recurse into symlinked directories

      if (stat.isDirectory()) {                                                                                                                      
        canRemove = false;

        // ------ Added Commentary ------
        // Re-adding the directory here so it is deleted later. However, we keep doing
        // this for every sub-directory. This ultimately leads to a duplicate delete and
        // fs throws an exception.
        dirs.push(dir);

        dirs.push(file);
      } else {
        fs.unlinkSync(file);
      }  
    }  

    if (canRemove) {
      fs.rmdirSync(dir);
    }  
  } while (dirs.length !== 0);
}

You can see this happen with the following structure:

$ find removeDir
removeDir
removeDir/bar
removeDir/bar/baz.txt
removeDir/foo
removeDir/foo/baz.txt

To see this happen with minimal effort, I placed _rmdirRecursiveSync in a standalone script:

'use strict';                                                                                                                                        

var fs = require('fs');
var path = require('path');

function _rmdirRecursiveSync(root) {
  var dirs = [root];

  do {
    var
      dir = dirs.pop(),
      canRemove = true,
      files = fs.readdirSync(dir);

    for (var i = 0, length = files.length; i < length; i++) {
      var
        file = path.join(dir, files[i]),
        stat = fs.lstatSync(file); // lstat so we don't recurse into symlinked directories

      if (stat.isDirectory()) {
        canRemove = false;
        dirs.push(dir);
        dirs.push(file);
      } else {
        fs.unlinkSync(file);
      }  
    }  

    if (canRemove) {
      fs.rmdirSync(dir);
    }  
  } while (dirs.length !== 0);
}

_rmdirRecursiveSync('./removeDir');

Which for me (OSX Yosemite, node 0.12.2) results in this error:

fs.js:761
  return binding.readdir(pathModule._makeLong(path));
                 ^
Error: ENOENT, no such file or directory 'removeDir/bar'
    at Error (native)
    at Object.fs.readdirSync (fs.js:761:18)
    at _rmdirRecursiveSync (/Users/andrew/workspaces/repo-checker/tmpRmdirIssue/broken/script.js:13:18)
    at Object.<anonymous> (/Users/andrew/workspaces/repo-checker/tmpRmdirIssue/broken/script.js:35:1)
    at Module._compile (module.js:460:26)
    at Object.Module._extensions..js (module.js:478:10)
    at Module.load (module.js:355:32)
    at Function.Module._load (module.js:310:12)
    at Function.Module.runMain (module.js:501:10)
    at startup (node.js:129:16)

This can be avoided with a check that makes sure that the directory is re-added only once. Here is an equivalent script:

'use strict';                                                                                                                                        

var fs = require('fs');
var path = require('path');

function _rmdirRecursiveSync(root) {
  var dirs = [root];

  do {
    var
      dir = dirs.pop(),
      deferred = false,
      files = fs.readdirSync(dir);

    for (var i = 0, length = files.length; i < length; i++) {
      var
        file = path.join(dir, files[i]),
        stat = fs.lstatSync(file); // lstat so we don't recurse into symlinked directories

      if (stat.isDirectory()) {
        if (!deferred) {
          deferred = true;
          dirs.push(dir);
        }  
        dirs.push(file);
      } else {
        fs.unlinkSync(file);
      }  
    }  

    if (!deferred) {
      fs.rmdirSync(dir);
    }  
  } while (dirs.length !== 0);
}

_rmdirRecursiveSync('./removeDir');

I don't really understand how the original recursive solution was blowing the stack. Shouldn't the recursion have only been as deep as the directory structure? To blow the stack you'd need a crazy deep directory structure and that's probably the real issue.

In any case, I'll submit a PR to make this iterative fix.

voltrevo commented 9 years ago

Issue that introduced the iterative solution: https://github.com/raszi/node-tmp/issues/17

silkentrance commented 9 years ago

@voltrevo Nice find! The other day i was also stumbling across the line where it added the dir and the file to the stack, not realizing that the dir should be pushed once only for the remainder of subdirectories, yet it had a certain smell to it :grin:. And yes, I am the culprit who devised this bugger.

voltrevo commented 9 years ago

@silkentrance Cheers man. Just saw this now and had a look at the existing tests to see how it would fit in. I don't have time to add it now but hopefully I can write something up later today (I'm in Australia/UTC+10).

AaronJan commented 9 years ago

Also made a PR here

Sorry for the duplicate PR, I just need this can be fixed.

If you like, I can write a test for this, need a couple of days.

raszi commented 9 years ago

Fixed with #63