From 78eee0cf7e3f5caeac5755da8cd6bf2390c8e13c Mon Sep 17 00:00:00 2001 From: Alexander Bezzubov Date: Sat, 28 Mar 2020 20:22:01 +0100 Subject: [PATCH] generator: flag to debug building of bayesian classifier It seems that reading ./samples/ from Linguist consumes a different number of files from filesystem on different OSes. This change adds ENRY_DEBUG env var to print some debug output about calculations of token stats from samples. TestPlan: - ENRY_DEBUG=1 go test -v ./internal/code-generator/generator \ -run Test_GeneratorTestSuite -testify.m TestGenerationFiles Signed-off-by: Alexander Bezzubov --- internal/code-generator/generator/samplesfreq.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/internal/code-generator/generator/samplesfreq.go b/internal/code-generator/generator/samplesfreq.go index 3dd2142..ca695b7 100644 --- a/internal/code-generator/generator/samplesfreq.go +++ b/internal/code-generator/generator/samplesfreq.go @@ -7,6 +7,7 @@ import ( "io/ioutil" "log" "math" + "os" "path/filepath" "sort" "strconv" @@ -31,6 +32,21 @@ func Frequencies(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit st return err } + if _, ok := os.LookupEnv("ENRY_DEBUG"); ok { + log.Printf("Total samples: %d\n", freqs.LanguageTotal) + log.Printf("Total tokens: %d\n", freqs.TokensTotal) + + keys := make([]string, 0, len(freqs.Languages)) + for k := range freqs.Languages { + keys = append(keys, k) + } + sort.Strings(keys) + + for _, k := range keys { + fmt.Printf(" %s: %d\n", k, freqs.Languages[k]) + } + } + buf := &bytes.Buffer{} if err := executeFrequenciesTemplate(buf, freqs, tmplPath, tmplName, commit); err != nil { return err