generator: flag to debug building of bayesian classifier

It seems that reading ./samples/ from Linguist consumes
a different number of files from filesystem on different OSes.

This change adds ENRY_DEBUG env var to print some debug output
about calculations of token stats from  samples.

TestPlan:
 - ENRY_DEBUG=1 go test -v ./internal/code-generator/generator \
	-run Test_GeneratorTestSuite -testify.m TestGenerationFiles

Signed-off-by: Alexander Bezzubov <bzz@apache.org>
This commit is contained in:
Alexander Bezzubov 2020-03-28 20:22:01 +01:00
parent b78e4423f0
commit 78eee0cf7e
No known key found for this signature in database
GPG Key ID: 8039F5787EFCD05D

View File

@ -7,6 +7,7 @@ import (
"io/ioutil" "io/ioutil"
"log" "log"
"math" "math"
"os"
"path/filepath" "path/filepath"
"sort" "sort"
"strconv" "strconv"
@ -31,6 +32,21 @@ func Frequencies(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit st
return err return err
} }
if _, ok := os.LookupEnv("ENRY_DEBUG"); ok {
log.Printf("Total samples: %d\n", freqs.LanguageTotal)
log.Printf("Total tokens: %d\n", freqs.TokensTotal)
keys := make([]string, 0, len(freqs.Languages))
for k := range freqs.Languages {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
fmt.Printf(" %s: %d\n", k, freqs.Languages[k])
}
}
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
if err := executeFrequenciesTemplate(buf, freqs, tmplPath, tmplName, commit); err != nil { if err := executeFrequenciesTemplate(buf, freqs, tmplPath, tmplName, commit); err != nil {
return err return err