mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-05-24 08:18:52 -03:00
generator: flag to debug building of bayesian classifier
It seems that reading ./samples/ from Linguist consumes a different number of files from filesystem on different OSes. This change adds ENRY_DEBUG env var to print some debug output about calculations of token stats from samples. TestPlan: - ENRY_DEBUG=1 go test -v ./internal/code-generator/generator \ -run Test_GeneratorTestSuite -testify.m TestGenerationFiles Signed-off-by: Alexander Bezzubov <bzz@apache.org>
This commit is contained in:
parent
b78e4423f0
commit
78eee0cf7e
@ -7,6 +7,7 @@ import (
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strconv"
|
||||
@ -31,6 +32,21 @@ func Frequencies(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit st
|
||||
return err
|
||||
}
|
||||
|
||||
if _, ok := os.LookupEnv("ENRY_DEBUG"); ok {
|
||||
log.Printf("Total samples: %d\n", freqs.LanguageTotal)
|
||||
log.Printf("Total tokens: %d\n", freqs.TokensTotal)
|
||||
|
||||
keys := make([]string, 0, len(freqs.Languages))
|
||||
for k := range freqs.Languages {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
|
||||
for _, k := range keys {
|
||||
fmt.Printf(" %s: %d\n", k, freqs.Languages[k])
|
||||
}
|
||||
}
|
||||
|
||||
buf := &bytes.Buffer{}
|
||||
if err := executeFrequenciesTemplate(buf, freqs, tmplPath, tmplName, commit); err != nil {
|
||||
return err
|
||||
|
Loading…
x
Reference in New Issue
Block a user