mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-05-23 16:40:08 -03:00
optimize line count and limit maximal file size to read; fixes #101
Signed-off-by: Denys Smirnov <denys@sourced.tech>
This commit is contained in:
parent
c72d3c4af2
commit
6712d4219f
146
cmd/enry/main.go
146
cmd/enry/main.go
@ -6,6 +6,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
@ -30,8 +31,9 @@ func main() {
|
|||||||
showVersion := flag.Bool("version", false, "Show the enry version information")
|
showVersion := flag.Bool("version", false, "Show the enry version information")
|
||||||
onlyProg := flag.Bool("prog", false, "Only show programming file types in output")
|
onlyProg := flag.Bool("prog", false, "Only show programming file types in output")
|
||||||
countMode := flag.String("mode", "file", "the method used to count file size. Available options are: file, line and byte")
|
countMode := flag.String("mode", "file", "the method used to count file size. Available options are: file, line and byte")
|
||||||
|
limitKB := flag.Int64("limit", 16*1024, "Analyse first N KB of the file (-1 means no limit)")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
limit := (*limitKB) * 1024
|
||||||
|
|
||||||
if *showVersion {
|
if *showVersion {
|
||||||
fmt.Println(version)
|
fmt.Println(version)
|
||||||
@ -49,7 +51,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if fileInfo.Mode().IsRegular() {
|
if fileInfo.Mode().IsRegular() {
|
||||||
err = printFileAnalysis(root)
|
err = printFileAnalysis(root, limit)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
}
|
}
|
||||||
@ -97,7 +99,7 @@ func main() {
|
|||||||
language, ok := enry.GetLanguageByExtension(path)
|
language, ok := enry.GetLanguageByExtension(path)
|
||||||
if !ok {
|
if !ok {
|
||||||
if language, ok = enry.GetLanguageByFilename(path); !ok {
|
if language, ok = enry.GetLanguageByFilename(path); !ok {
|
||||||
content, err := ioutil.ReadFile(path)
|
content, err := readFile(path, limit)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Println(err)
|
log.Println(err)
|
||||||
return nil
|
return nil
|
||||||
@ -123,21 +125,21 @@ func main() {
|
|||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
var buff bytes.Buffer
|
var buf bytes.Buffer
|
||||||
switch {
|
switch {
|
||||||
case *jsonFlag && !*breakdownFlag:
|
case *jsonFlag && !*breakdownFlag:
|
||||||
printJson(out, &buff)
|
printJson(out, &buf)
|
||||||
case *jsonFlag && *breakdownFlag:
|
case *jsonFlag && *breakdownFlag:
|
||||||
printBreakDown(out, &buff)
|
printBreakDown(out, &buf)
|
||||||
case *breakdownFlag:
|
case *breakdownFlag:
|
||||||
printPercents(out, &buff, *countMode)
|
printPercents(out, &buf, *countMode)
|
||||||
buff.WriteByte('\n')
|
buf.WriteByte('\n')
|
||||||
printBreakDown(out, &buff)
|
printBreakDown(out, &buf)
|
||||||
default:
|
default:
|
||||||
printPercents(out, &buff, *countMode)
|
printPercents(out, &buf, *countMode)
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Print(buff.String())
|
fmt.Print(buf.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
func usage() {
|
func usage() {
|
||||||
@ -165,10 +167,8 @@ func printBreakDown(out map[string][]string, buff *bytes.Buffer) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func printJson(out map[string][]string, buff *bytes.Buffer) {
|
func printJson(out map[string][]string, buf *bytes.Buffer) {
|
||||||
data, _ := json.Marshal(out)
|
json.NewEncoder(buf).Encode(out)
|
||||||
buff.Write(data)
|
|
||||||
buff.WriteByte('\n')
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// filelistError represents a failed operation that took place across multiple files.
|
// filelistError represents a failed operation that took place across multiple files.
|
||||||
@ -193,10 +193,12 @@ func printPercents(fSummary map[string][]string, buff *bytes.Buffer, mode string
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Reduce the list of files to a quantity of file type.
|
// Reduce the list of files to a quantity of file type.
|
||||||
var total float64
|
var (
|
||||||
fileValues := make(map[string]float64)
|
total float64
|
||||||
keys := []string{}
|
keys []string
|
||||||
var unreadableFiles filelistError
|
unreadableFiles filelistError
|
||||||
|
fileValues = make(map[string]float64)
|
||||||
|
)
|
||||||
for fType, files := range fSummary {
|
for fType, files := range fSummary {
|
||||||
val, err := reducer(files)
|
val, err := reducer(files)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -231,12 +233,7 @@ func lineCountValues(files []string) (float64, filelistError) {
|
|||||||
var filesErr filelistError
|
var filesErr filelistError
|
||||||
var t float64
|
var t float64
|
||||||
for _, fName := range files {
|
for _, fName := range files {
|
||||||
content, err := ioutil.ReadFile(fName)
|
l, _ := getLines(fName, nil)
|
||||||
if err != nil {
|
|
||||||
filesErr = append(filesErr, fName)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
l, _ := getLines(content)
|
|
||||||
t += float64(l)
|
t += float64(l)
|
||||||
}
|
}
|
||||||
return t, filesErr
|
return t, filesErr
|
||||||
@ -262,16 +259,26 @@ func byteCountValues(files []string) (float64, filelistError) {
|
|||||||
return t, filesErr
|
return t, filesErr
|
||||||
}
|
}
|
||||||
|
|
||||||
func printFileAnalysis(fName string) error {
|
func printFileAnalysis(file string, limit int64) error {
|
||||||
content, err := ioutil.ReadFile(fName)
|
data, err := readFile(file, limit)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
totalLines, nonBlank := getLines(content)
|
isSample := limit > 0 && len(data) == int(limit)
|
||||||
fileType := getFileType(fName, content)
|
|
||||||
language := enry.GetLanguage(fName, content)
|
full := data
|
||||||
mimeType := enry.GetMimeType(fName, language)
|
if isSample {
|
||||||
|
// communicate to getLines that we don't have full contents
|
||||||
|
full = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
totalLines, nonBlank := getLines(file, full)
|
||||||
|
|
||||||
|
// functions below can work on a sample
|
||||||
|
fileType := getFileType(file, data)
|
||||||
|
language := enry.GetLanguage(file, data)
|
||||||
|
mimeType := enry.GetMimeType(file, language)
|
||||||
|
|
||||||
fmt.Printf(
|
fmt.Printf(
|
||||||
`%s: %d lines (%d sloc)
|
`%s: %d lines (%d sloc)
|
||||||
@ -279,30 +286,75 @@ func printFileAnalysis(fName string) error {
|
|||||||
mime_type: %s
|
mime_type: %s
|
||||||
language: %s
|
language: %s
|
||||||
`,
|
`,
|
||||||
filepath.Base(fName), totalLines, nonBlank, fileType, mimeType, language,
|
filepath.Base(file), totalLines, nonBlank, fileType, mimeType, language,
|
||||||
)
|
)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getLines(b []byte) (total int, nonBlank int) {
|
func readFile(path string, limit int64) ([]byte, error) {
|
||||||
scanner := bufio.NewScanner(bytes.NewReader(b))
|
if limit <= 0 {
|
||||||
lineCt := 0
|
return ioutil.ReadFile(path)
|
||||||
blankCt := 0
|
|
||||||
|
|
||||||
for scanner.Scan() {
|
|
||||||
lineCt++
|
|
||||||
line := bytes.TrimSpace(scanner.Bytes())
|
|
||||||
if len(line) == 0 {
|
|
||||||
blankCt++
|
|
||||||
}
|
}
|
||||||
|
f, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
// Scanner doesn't catch the case of last byte newline.
|
defer f.Close()
|
||||||
if len(b) > 0 && b[len(b)-1] == '\n' {
|
st, err := f.Stat()
|
||||||
lineCt++
|
if err != nil {
|
||||||
blankCt++
|
return nil, err
|
||||||
|
}
|
||||||
|
size := st.Size()
|
||||||
|
if limit > 0 && size > limit {
|
||||||
|
size = limit
|
||||||
|
}
|
||||||
|
buf := bytes.NewBuffer(nil)
|
||||||
|
buf.Grow(int(size))
|
||||||
|
_, err = io.Copy(buf, io.LimitReader(f, limit))
|
||||||
|
return buf.Bytes(), err
|
||||||
}
|
}
|
||||||
|
|
||||||
return lineCt, lineCt - blankCt
|
func getLines(file string, content []byte) (total, blank int) {
|
||||||
|
var r io.Reader
|
||||||
|
if content != nil {
|
||||||
|
r = bytes.NewReader(content)
|
||||||
|
} else {
|
||||||
|
// file not loaded to memory - stream it
|
||||||
|
f, err := os.Open(file)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
r = f
|
||||||
|
}
|
||||||
|
br := bufio.NewReader(r)
|
||||||
|
lastBlank := true
|
||||||
|
empty := true
|
||||||
|
for {
|
||||||
|
data, prefix, err := br.ReadLine()
|
||||||
|
if err == io.EOF {
|
||||||
|
break
|
||||||
|
} else if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if prefix {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
empty = false
|
||||||
|
total++
|
||||||
|
lastBlank = len(data) == 0
|
||||||
|
if lastBlank {
|
||||||
|
blank++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !empty && lastBlank {
|
||||||
|
total++
|
||||||
|
blank++
|
||||||
|
}
|
||||||
|
nonBlank := total - blank
|
||||||
|
return total, nonBlank
|
||||||
}
|
}
|
||||||
|
|
||||||
func getFileType(file string, content []byte) string {
|
func getFileType(file string, content []byte) string {
|
||||||
|
@ -27,10 +27,12 @@ func TestGetLines(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for i, test := range tests {
|
for i, test := range tests {
|
||||||
gotTotal, gotNonBlank := getLines([]byte(test.content))
|
t.Run("", func(t *testing.T) {
|
||||||
|
gotTotal, gotNonBlank := getLines("", []byte(test.content))
|
||||||
if gotTotal != test.wantTotal || gotNonBlank != test.wantNonBlank {
|
if gotTotal != test.wantTotal || gotNonBlank != test.wantNonBlank {
|
||||||
t.Errorf("wrong line counts obtained for test case #%d:\n %7s, %7s\nGOT: %7d, %7d\nWANT: %7d, %7d\n", i, "TOTAL", "NON_BLANK",
|
t.Errorf("wrong line counts obtained for test case #%d:\n %7s, %7s\nGOT: %7d, %7d\nWANT: %7d, %7d\n", i, "TOTAL", "NON_BLANK",
|
||||||
gotTotal, gotNonBlank, test.wantTotal, test.wantNonBlank)
|
gotTotal, gotNonBlank, test.wantTotal, test.wantNonBlank)
|
||||||
}
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user