mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-05-24 08:18:52 -03:00
generator: flag to debug building of bayesian classifier
It seems that reading ./samples/ from Linguist consumes a different number of files from filesystem on different OSes. This change adds ENRY_DEBUG env var to print some debug output about calculations of token stats from samples. TestPlan: - ENRY_DEBUG=1 go test -v ./internal/code-generator/generator \ -run Test_GeneratorTestSuite -testify.m TestGenerationFiles Signed-off-by: Alexander Bezzubov <bzz@apache.org>
This commit is contained in:
parent
b78e4423f0
commit
78eee0cf7e
@ -7,6 +7,7 @@ import (
|
|||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"log"
|
"log"
|
||||||
"math"
|
"math"
|
||||||
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
@ -31,6 +32,21 @@ func Frequencies(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit st
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if _, ok := os.LookupEnv("ENRY_DEBUG"); ok {
|
||||||
|
log.Printf("Total samples: %d\n", freqs.LanguageTotal)
|
||||||
|
log.Printf("Total tokens: %d\n", freqs.TokensTotal)
|
||||||
|
|
||||||
|
keys := make([]string, 0, len(freqs.Languages))
|
||||||
|
for k := range freqs.Languages {
|
||||||
|
keys = append(keys, k)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
|
||||||
|
for _, k := range keys {
|
||||||
|
fmt.Printf(" %s: %d\n", k, freqs.Languages[k])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
buf := &bytes.Buffer{}
|
buf := &bytes.Buffer{}
|
||||||
if err := executeFrequenciesTemplate(buf, freqs, tmplPath, tmplName, commit); err != nil {
|
if err := executeFrequenciesTemplate(buf, freqs, tmplPath, tmplName, commit); err != nil {
|
||||||
return err
|
return err
|
||||||
|
Loading…
x
Reference in New Issue
Block a user