rjeczalik / notify

File system event notification library on steroids.
MIT License
902 stars 128 forks source link

Missing file events #174

Open matarc-zz opened 5 years ago

matarc-zz commented 5 years ago

Hello, First of all thanks for all the work on this library, it looks very promising!

I tried making a test program that creates many files in nested directories while a watch was set on the top level directory. My goal here was to see if notify would capture every single events.

From the results, it seems that a few events are getting dropped depending on the OS.

go version 1.11.4

This piece of code can be run using :

go run nested.go -depth 10

This will create a top folder that contains 10 nested folders, each having 1000 folders. 10 * 1000 files will be created at random places within that tree with a write on each of them and removed afterwards. I apologise for the long piece of code :

package main

import (
    "flag"
    "fmt"
    "io/ioutil"
    "log"
    "math/rand"
    "os"
    "path"
    "time"

    "github.com/rjeczalik/notify"
)

func main() {
    log.SetFlags(log.LstdFlags | log.Lshortfile)
    depth := flag.Int("depth", 10000, "`Number` of nested folders to create")
    keep := flag.Bool("keep", false, "Set if you want to keep the files and folders generated")
    flag.Parse()
    tmpDir, err := ioutil.TempDir("", "notify")
    if err != nil {
        log.Fatal(err)
    }
    if !*keep {
        defer os.RemoveAll(tmpDir)
    }
    log.Println(tmpDir)
    numberOfCreate := *depth * 1000
    log.Printf("Generating %d folders...", numberOfCreate+1)
    generateNestedFolders(tmpDir, *depth)
    log.Println("Done generating folders...")
    eventCh := make(chan notify.EventInfo, 1000000)
    log.Println("Watching directories")
    err = notify.Watch(path.Join(tmpDir, "..."), eventCh, notify.All)
    if err != nil {
        log.Fatal(err)
    }
    defer notify.Stop(eventCh)
    log.Println("Watch successful")
    resCh := make(chan int, 3)
    go func() {
        mapFile := make(map[string]bool)
        fileCreated := 0
        log.Println("Creating files...")
        for i := 0; i < numberOfCreate; i++ {
            filePath := createRandomFile(tmpDir, *depth)
            if filePath == "" {
                continue
            }
            mapFile[filePath] = true
            fileCreated++
        }
        if !*keep {
            log.Println("Removing files...")
            for filePath := range mapFile {
                err := os.Remove(filePath)
                if err != nil {
                    log.Fatal(err)
                }
            }
        }
        log.Printf("%d files were created", fileCreated)
        log.Println("Sending results...")
        resCh <- fileCreated
        log.Println("Results sent")
    }()
    log.Println("Listening on file events")
    fileCreated := 0
    fileWritten := 0
    fileRemoved := 0
    result := 0
    mapWrite := make(map[string]bool)
    reportTimer := time.NewTimer(time.Second * 5)
    timer := time.NewTimer(time.Second * 10)
    timer.Stop()
    done := false
    for {
        select {
        case event := <-eventCh:
            switch event.Event() {
            case notify.Write:
                if mapWrite[event.Path()] {
                    continue
                }
                fileWritten++
                mapWrite[event.Path()] = true
            case notify.Create:
                fileCreated++
            case notify.Remove:
                fileRemoved++
            }
        case result = <-resCh:
            log.Printf("Result received : '%d'", result)
            done = true
            timer.Reset(time.Second * 10)
        case <-reportTimer.C:
            log.Printf("Created '%d', Written '%d', Removed '%d'", fileCreated, fileWritten, fileRemoved)
            reportTimer.Reset(time.Second * 5)
            continue
        case <-timer.C:
            log.Printf("Haven't received any events in 10 seconds, checking results '%d'", result)
            if fileCreated != result {
                log.Printf("Expected %d file created, got %d", result, fileCreated)
            } else {
                log.Println("All files create event were caught")
            }
            if fileWritten != result {
                log.Printf("Expected %d file written, got %d", result, fileWritten)
            } else {
                log.Println("All files written event were caught")
            }
            if *keep {
                log.Println("keep flag is set, skipping removed events")
                return
            }
            if fileRemoved != result {
                log.Printf("Expected %d file removed, got %d", result, fileRemoved)
            } else {
                log.Println("All files removed event were caught")
            }
            return
        }
        if done {
            if !timer.Stop() {
                <-timer.C
            }
            timer.Reset(time.Second * 10)
        }
    }
}

func createRandomFile(folder string, depth int) string {
    depth = rand.Int() % depth
    for i := 0; i < depth; i++ {
        folder = path.Join(folder, "f")
    }
    for i := 0; i < 5; i++ {
        file, err := ioutil.TempFile(folder, "")
        if err != nil {
            log.Printf("Couldn't create file, retrying in 1s : '%s'", err)
            time.Sleep(time.Second)
            continue

        }
        defer file.Close()
        _, err = file.WriteString("hello")
        if err != nil {
            log.Fatal(err)
        }
        file.Sync()
        return file.Name()
    }
    log.Println("Couldn't create file after 5 retries")
    return ""
}

func generateNestedFolders(folder string, depth int) {
    for i := 0; i < depth; i++ {
        folder = generateFolder(folder, i)
    }
}

func generateFolder(folder string, i int) string {
    for j := 1; j < 1000; j++ {
        folderPath := path.Join(folder, fmt.Sprintf("f%d", j))
        err := os.Mkdir(folderPath, 0755)
        if err != nil {
            log.Fatalf("'%s', generating %dth folder", err, i)
        }
    }
    folder = path.Join(folder, "f")
    err := os.Mkdir(folder, 0755)
    if err != nil {
        log.Fatalf("'%s', generating %dth folder", err, i)
    }
    return folder
}

I tried running it on macOS and on a Windows 10 VM and here are the results I get (numbers vary) : macOS 10.4.2

2019/02/10 07:17:11 nested.go:104: Expected 10000 file created, got 9368
2019/02/10 07:17:11 nested.go:109: Expected 10000 file written, got 9360
2019/02/10 07:17:11 nested.go:120: All files removed event were caught

Windows 10 VM

2019/02/10 07:18:53 nested.go:106: All files create event were caught
2019/02/10 07:18:53 nested.go:111: All files written event were caught
2019/02/10 07:18:53 nested.go:118: Expected 10000 file removed, got 9348

On macOS I seem to be able to reliably catch all remove Events, but write & create seem to fluctuate (I ran that code many times) On Windows Create and Write seem to work reliably, but remove are dropped.

Am I doing something wrong with the library or is that a bug ? Originally I only had a 1000 buffered event channel and that seemed to make me drop a few create and write events on windows, so I increased it to 1 000 000 (which seems a bit much) and it seems to behave a bit better.

There is a -keep flag option that allows you to keep the files and folders generated if you'd like to check the tree.

Hope this makes sense to you :)

matarc-zz commented 5 years ago

I did some more digging and found the problem in macOS. Basically the FSEvents API is sending a kFSEventStreamEventFlagMustScanSubDirs and it's being ignored in watcher_fsevents.go

const failure = uint32(FSEventsMustScanSubDirs | FSEventsUserDropped | FSEventsKernelDropped)
//....
                if ev[i].Flags&failure != 0 {                                                                          
                        // TODO(rjeczalik): missing error handling                                                     
                        continue                                                                                       
                }  

I think that at the very least we should send that Event to the User if he subscribed to it so that he knows he needs to rescan the folder in question (and its sub folders), looking at the code it's not very hard to do and I'll probably submit a patch fairly soon for it.

Ideally this wouldn't happen, but that would mean finding the bottleneck and hopefully being able to remove it, I would have to do a lot more digging for that.

Any ideas ?