llimllib / git-ls

The Unlicense
4 stars 0 forks source link

test if go-git is faster than git cli #15

Open llimllib opened 5 months ago

llimllib commented 5 months ago

https://github.com/go-git/go-git

I'm not sure if it would be worth testing if we could combine and speed up operations by using go-git

llimllib commented 5 months ago

My first cut at a test program to find the most recent commit for each file was tremendously slower than git log:

package main

import (
    "fmt"
    "log"
    "os"
    "strings"

    git "github.com/go-git/go-git/v5"
    "github.com/go-git/go-git/v5/plumbing/object"
    "github.com/go-git/go-git/v5/plumbing/storer"
)

func hasTo(e error) {
    if e != nil {
        log.Fatalf("%v", e)
    }
}

func must[T any](a T, e error) T {
    if e != nil {
        log.Fatalf("%v", e)
    }
    return a
}

type Diff struct {
    plus  int
    minus int
}

type File struct {
    entry        os.DirEntry
    status       string
    diffSum      *Diff
    diffStat     string
    author       string
    authorEmail  string
    hash         string
    lastModified string
    message      string
    isDir        bool
    isExe        bool
}

func main() {
    hasTo(os.Chdir(os.Args[1]))

    keys := make(map[string]any)
    files := make(map[string]*File)
    for _, file := range must(os.ReadDir(".")) {
        stat := must(os.Stat(file.Name()))
        keys[file.Name()] = nil
        files[file.Name()] = &File{
            entry: file,
            isDir: file.IsDir(),
            isExe: !file.IsDir() && stat.Mode()&0111 != 0,
        }
    }

    r := must(git.PlainOpen("."))
    ref := must(r.Head())

    // ... retrieves the commit history
    // Possibly using PathFilter would work? But I'm not sure if I can break
    // out of the loop once I've found the most recent commits for all my
    // files?
    cIter := must(r.Log(&git.LogOptions{From: ref.Hash()}))

    // ... just iterates over the commits, printing it
    hasTo(cIter.ForEach(func(c *object.Commit) error {
        // let's try to check the files in the commit
        // Nope, this lists _every_ file in the tree at the time
        // of the commit.
        // must(c.Files()).ForEach(func(f *object.File) error {
        //  fmt.Println(f.Name)
        //  return nil
        // })
        for _, fstat := range must(c.Stats()) {
            if _, ok := keys[fstat.Name]; ok {
                fmt.Printf("+%d -%d %s %s %s\n", fstat.Addition, fstat.Deletion, fstat.Name, c.Hash, strings.Split(c.Message, "\n")[0])
                delete(keys, fstat.Name)
            } else {
                // fmt.Printf("%s not in %#v\n", fstat.Name, keys)
            }
        }
        if len(keys) == 0 {
            return storer.ErrStop
        }
        return nil
    }))
}

took almost 5 minutes to complete

llimllib commented 5 months ago

Scott Chacon notes git commit-graph in this talk:

The commit-graph file is a supplemental data structure that accelerates commit graph walks.

Which I think is very similar to the index I mentioned in #5 maybe?

I wonder if I could do this for the user, and if it would speed up git-ls?

llimllib commented 5 months ago

this article explains in detail what's happening; git log is using simplified mode, while my code above is using full history mode:

The --full-history mode changes from the simplified history mode by walking every commit in the history, regardless of treesame parents on merge commits. A merge commit is marked as interesting if there is at least one parent that is different at the path.