optimize line count and limit maximal file size to read; fixes #101

Signed-off-by: Denys Smirnov <denys@sourced.tech>
This commit is contained in:
Denys Smirnov 2018-04-28 17:20:10 +03:00 committed by Denys Smirnov
parent c72d3c4af2
commit 6712d4219f
2 changed files with 106 additions and 52 deletions

View File

@ -6,6 +6,7 @@ import (
"encoding/json" "encoding/json"
"flag" "flag"
"fmt" "fmt"
"io"
"io/ioutil" "io/ioutil"
"log" "log"
"os" "os"
@ -30,8 +31,9 @@ func main() {
showVersion := flag.Bool("version", false, "Show the enry version information") showVersion := flag.Bool("version", false, "Show the enry version information")
onlyProg := flag.Bool("prog", false, "Only show programming file types in output") onlyProg := flag.Bool("prog", false, "Only show programming file types in output")
countMode := flag.String("mode", "file", "the method used to count file size. Available options are: file, line and byte") countMode := flag.String("mode", "file", "the method used to count file size. Available options are: file, line and byte")
limitKB := flag.Int64("limit", 16*1024, "Analyse first N KB of the file (-1 means no limit)")
flag.Parse() flag.Parse()
limit := (*limitKB) * 1024
if *showVersion { if *showVersion {
fmt.Println(version) fmt.Println(version)
@ -49,7 +51,7 @@ func main() {
} }
if fileInfo.Mode().IsRegular() { if fileInfo.Mode().IsRegular() {
err = printFileAnalysis(root) err = printFileAnalysis(root, limit)
if err != nil { if err != nil {
fmt.Println(err) fmt.Println(err)
} }
@ -97,7 +99,7 @@ func main() {
language, ok := enry.GetLanguageByExtension(path) language, ok := enry.GetLanguageByExtension(path)
if !ok { if !ok {
if language, ok = enry.GetLanguageByFilename(path); !ok { if language, ok = enry.GetLanguageByFilename(path); !ok {
content, err := ioutil.ReadFile(path) content, err := readFile(path, limit)
if err != nil { if err != nil {
log.Println(err) log.Println(err)
return nil return nil
@ -123,21 +125,21 @@ func main() {
log.Fatal(err) log.Fatal(err)
} }
var buff bytes.Buffer var buf bytes.Buffer
switch { switch {
case *jsonFlag && !*breakdownFlag: case *jsonFlag && !*breakdownFlag:
printJson(out, &buff) printJson(out, &buf)
case *jsonFlag && *breakdownFlag: case *jsonFlag && *breakdownFlag:
printBreakDown(out, &buff) printBreakDown(out, &buf)
case *breakdownFlag: case *breakdownFlag:
printPercents(out, &buff, *countMode) printPercents(out, &buf, *countMode)
buff.WriteByte('\n') buf.WriteByte('\n')
printBreakDown(out, &buff) printBreakDown(out, &buf)
default: default:
printPercents(out, &buff, *countMode) printPercents(out, &buf, *countMode)
} }
fmt.Print(buff.String()) fmt.Print(buf.String())
} }
func usage() { func usage() {
@ -165,10 +167,8 @@ func printBreakDown(out map[string][]string, buff *bytes.Buffer) {
} }
} }
func printJson(out map[string][]string, buff *bytes.Buffer) { func printJson(out map[string][]string, buf *bytes.Buffer) {
data, _ := json.Marshal(out) json.NewEncoder(buf).Encode(out)
buff.Write(data)
buff.WriteByte('\n')
} }
// filelistError represents a failed operation that took place across multiple files. // filelistError represents a failed operation that took place across multiple files.
@ -193,10 +193,12 @@ func printPercents(fSummary map[string][]string, buff *bytes.Buffer, mode string
} }
// Reduce the list of files to a quantity of file type. // Reduce the list of files to a quantity of file type.
var total float64 var (
fileValues := make(map[string]float64) total float64
keys := []string{} keys []string
var unreadableFiles filelistError unreadableFiles filelistError
fileValues = make(map[string]float64)
)
for fType, files := range fSummary { for fType, files := range fSummary {
val, err := reducer(files) val, err := reducer(files)
if err != nil { if err != nil {
@ -231,12 +233,7 @@ func lineCountValues(files []string) (float64, filelistError) {
var filesErr filelistError var filesErr filelistError
var t float64 var t float64
for _, fName := range files { for _, fName := range files {
content, err := ioutil.ReadFile(fName) l, _ := getLines(fName, nil)
if err != nil {
filesErr = append(filesErr, fName)
continue
}
l, _ := getLines(content)
t += float64(l) t += float64(l)
} }
return t, filesErr return t, filesErr
@ -262,16 +259,26 @@ func byteCountValues(files []string) (float64, filelistError) {
return t, filesErr return t, filesErr
} }
func printFileAnalysis(fName string) error { func printFileAnalysis(file string, limit int64) error {
content, err := ioutil.ReadFile(fName) data, err := readFile(file, limit)
if err != nil { if err != nil {
return err return err
} }
totalLines, nonBlank := getLines(content) isSample := limit > 0 && len(data) == int(limit)
fileType := getFileType(fName, content)
language := enry.GetLanguage(fName, content) full := data
mimeType := enry.GetMimeType(fName, language) if isSample {
// communicate to getLines that we don't have full contents
full = nil
}
totalLines, nonBlank := getLines(file, full)
// functions below can work on a sample
fileType := getFileType(file, data)
language := enry.GetLanguage(file, data)
mimeType := enry.GetMimeType(file, language)
fmt.Printf( fmt.Printf(
`%s: %d lines (%d sloc) `%s: %d lines (%d sloc)
@ -279,30 +286,75 @@ func printFileAnalysis(fName string) error {
mime_type: %s mime_type: %s
language: %s language: %s
`, `,
filepath.Base(fName), totalLines, nonBlank, fileType, mimeType, language, filepath.Base(file), totalLines, nonBlank, fileType, mimeType, language,
) )
return nil return nil
} }
func getLines(b []byte) (total int, nonBlank int) { func readFile(path string, limit int64) ([]byte, error) {
scanner := bufio.NewScanner(bytes.NewReader(b)) if limit <= 0 {
lineCt := 0 return ioutil.ReadFile(path)
blankCt := 0 }
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
st, err := f.Stat()
if err != nil {
return nil, err
}
size := st.Size()
if limit > 0 && size > limit {
size = limit
}
buf := bytes.NewBuffer(nil)
buf.Grow(int(size))
_, err = io.Copy(buf, io.LimitReader(f, limit))
return buf.Bytes(), err
}
for scanner.Scan() { func getLines(file string, content []byte) (total, blank int) {
lineCt++ var r io.Reader
line := bytes.TrimSpace(scanner.Bytes()) if content != nil {
if len(line) == 0 { r = bytes.NewReader(content)
blankCt++ } else {
// file not loaded to memory - stream it
f, err := os.Open(file)
if err != nil {
fmt.Println(err)
return
}
defer f.Close()
r = f
}
br := bufio.NewReader(r)
lastBlank := true
empty := true
for {
data, prefix, err := br.ReadLine()
if err == io.EOF {
break
} else if err != nil {
fmt.Println(err)
break
}
if prefix {
continue
}
empty = false
total++
lastBlank = len(data) == 0
if lastBlank {
blank++
} }
} }
// Scanner doesn't catch the case of last byte newline. if !empty && lastBlank {
if len(b) > 0 && b[len(b)-1] == '\n' { total++
lineCt++ blank++
blankCt++
} }
nonBlank := total - blank
return lineCt, lineCt - blankCt return total, nonBlank
} }
func getFileType(file string, content []byte) string { func getFileType(file string, content []byte) string {

View File

@ -27,10 +27,12 @@ func TestGetLines(t *testing.T) {
} }
for i, test := range tests { for i, test := range tests {
gotTotal, gotNonBlank := getLines([]byte(test.content)) t.Run("", func(t *testing.T) {
if gotTotal != test.wantTotal || gotNonBlank != test.wantNonBlank { gotTotal, gotNonBlank := getLines("", []byte(test.content))
t.Errorf("wrong line counts obtained for test case #%d:\n %7s, %7s\nGOT: %7d, %7d\nWANT: %7d, %7d\n", i, "TOTAL", "NON_BLANK", if gotTotal != test.wantTotal || gotNonBlank != test.wantNonBlank {
gotTotal, gotNonBlank, test.wantTotal, test.wantNonBlank) t.Errorf("wrong line counts obtained for test case #%d:\n %7s, %7s\nGOT: %7d, %7d\nWANT: %7d, %7d\n", i, "TOTAL", "NON_BLANK",
} gotTotal, gotNonBlank, test.wantTotal, test.wantNonBlank)
}
})
} }
} }