Open bzz opened 5 years ago
At current state, simplistic version of this is possible by hard-coding log statements in
diff --git a/common.go b/common.go
index 949db71..d4a6c57 100644
--- a/common.go
+++ b/common.go
@@ -3,11 +3,14 @@ package enry
import (
"bufio"
"bytes"
+ "log"
"path/filepath"
"strings"
"gopkg.in/src-d/enry.v1/data"
"gopkg.in/src-d/enry.v1/regex"
+
+ "github.com/sanity-io/litter"
)
// OtherLanguage is used as a zero value when a function can not return a specific language.
@@ -118,6 +121,7 @@ func GetLanguageBySpecificClassifier(content []byte, candidates []string, classi
// At least one of arguments should be set. If content is missing, language detection will be based on the filename.
// The function won't read the file, given an empty content.
func GetLanguages(filename string, content []byte) []string {
+ log.Printf("file:%s\n", filename)
if IsBinary(content) {
return nil
}
@@ -126,6 +130,8 @@ func GetLanguages(filename string, content []byte) []string {
candidates := []string{}
for _, strategy := range DefaultStrategies {
languages = strategy(filename, content, candidates)
+ log.Printf("\tstrategy:%s, langs:%q\n", litter.Sdump(strategy), languages)
+
if len(languages) == 1 {
return languages
}
diff --git a/data/heuristics.go b/data/heuristics.go
index dc3663d..c894985 100644
--- a/data/heuristics.go
+++ b/data/heuristics.go
@@ -1,6 +1,11 @@
package data
-import "regexp"
+import (
+ "log"
+ "regexp"
+
+ "github.com/sanity-io/litter"
+)
type (
Heuristics []Matcher
@@ -20,7 +25,10 @@ type (
func (h *Heuristics) Match(data []byte) []string {
var matchedLangs []string
+ litter.Config.Compact = true
+
for _, matcher := range *h {
+ log.Printf("matcher:%s\n", litter.Sdump(matcher))
if matcher.Match(data) {
for _, langOrAlias := range matcher.(Rule).GetLanguages() {
lang, ok := LanguagesByAlias(langOrAlias)
@@ -31,6 +39,7 @@ func (h *Heuristics) Match(data []byte) []string {
}
matchedLangs = append(matchedLangs, lang)
}
+ log.Printf("\t\tlangs:%q\n", matchedLangs)
break
}
}
but the idea is to provide API with simple instrumentation for all strategies instead, which can be used in tests to archive similar results.
To assist debugging in dev mode, it would be nice to have some visibility into the decision-making logic that Enry uses at runtime.
Problem: after getting a final prediction e.g though
enry.GetLanguage()
it's very hard to tell:Such introspection would simplify maintenance and reduce the time to debug miss-predictions in case of sync-ups with Linguist, etc.
Linguist does have a simple protocol for
Linguist.instrumenter
that serves this needs and is very generic, \w ability to be deployed and enabled in production, etc.Something simpler, similar to a
LocalInstrumenter
(in details below) that is propagated to every Strategy would work for Enry in Golang development mode and is subject of this issue.