Merge pull request #7 from go-enry/win-support

Code generator Win support
This commit is contained in:
Alexander 2020-03-29 23:31:10 +02:00 committed by GitHub
commit fa1c6f39b5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 111 additions and 69 deletions

View File

@ -13,10 +13,16 @@ jobs:
with:
go-version: ${{ matrix.go-version }}
- name: Set git on win to use LF
run: |
git config --global core.autocrlf false
git config --global core.eol lf
- name: Checkout code
uses: actions/checkout@v2
- name: Test
run: go test ./...
env:
ENRY_DEBUG: 1
test-oniguruma:
strategy:
matrix:

View File

@ -7,9 +7,11 @@ import (
"io/ioutil"
"log"
"math"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"text/template"
"github.com/go-enry/go-enry/v2/internal/tokenizer"
@ -31,6 +33,21 @@ func Frequencies(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit st
return err
}
if _, ok := os.LookupEnv("ENRY_DEBUG"); ok {
log.Printf("Total samples: %d\n", freqs.LanguageTotal)
log.Printf("Total tokens: %d\n", freqs.TokensTotal)
keys := make([]string, 0, len(freqs.Languages))
for k := range freqs.Languages {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
fmt.Printf(" %s: %d\n", k, freqs.Languages[k])
}
}
buf := &bytes.Buffer{}
if err := executeFrequenciesTemplate(buf, freqs, tmplPath, tmplName, commit); err != nil {
return err
@ -91,49 +108,46 @@ func getFrequencies(samplesDir string) (*samplesFrequencies, error) {
}, nil
}
// readSamples collects ./samples/ filenames from the Linguist codebase, skiping symlinks.
func readSamples(samplesLangDir string) ([]string, error) {
const samplesLangFilesDir = "filenames"
sampleFiles, err := ioutil.ReadDir(samplesLangDir)
if err != nil {
return nil, err
}
const specialSubDir = "filenames"
var samples []string
for _, sampleFile := range sampleFiles {
filename := filepath.Join(samplesLangDir, sampleFile.Name())
if sampleFile.Mode().IsRegular() {
samples = append(samples, filename)
continue
}
if sampleFile.IsDir() && sampleFile.Name() == samplesLangFilesDir {
subSamples, err := readSubSamples(filename)
err := filepath.Walk(samplesLangDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil, err
fmt.Printf("failure accessing a path %q: %v\n", path, err)
return err
}
samples = append(samples, subSamples...)
if info.IsDir() {
switch info.Name() {
case filepath.Base(samplesLangDir):
return nil
case specialSubDir:
return nil
default:
return filepath.SkipDir
}
}
// skip git file symlinks on win and *nix
if isKnownSymlinkInLinguist(path) || !info.Mode().IsRegular() {
return nil
}
samples = append(samples, path)
return nil
})
return samples, nil
return samples, err
}
func readSubSamples(path string) ([]string, error) {
subSamples := []string{}
entries, err := ioutil.ReadDir(path)
if err != nil {
return nil, err
}
for _, entry := range entries {
if entry.Mode().IsRegular() {
subSamples = append(subSamples, filepath.Join(path, entry.Name()))
}
}
return subSamples, nil
// isKnownSymlinkInLinguist checks if the file name is on the list of known symlinks.
// On Windows, there is no symlink support in Git [1] and those become regular text files,
// so we have to skip these files manually, maintaing a list here :/
// 1. https://github.com/git-for-windows/git/wiki/Symbolic-Links
//
// $ find -L .linguist/samples -xtype l
func isKnownSymlinkInLinguist(path string) bool {
return strings.HasSuffix(path, filepath.Join("Ant Build System", "filenames", "build.xml")) ||
strings.HasSuffix(path, filepath.Join("Markdown", "symlink.md"))
}
func getTokens(samples []string) ([]string, error) {

View File

@ -9,7 +9,10 @@ import (
"strings"
"testing"
"github.com/go-enry/go-enry/v2/internal/tokenizer"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
)
@ -302,7 +305,23 @@ func (s *GeneratorTestSuite) TestGenerationFiles() {
expected := normalizeSpaces(string(gold))
actual := normalizeSpaces(string(out))
assert.Equal(s.T(), expected, actual, "Test %s", test.name)
if expected != actual {
s.T().Logf("%s generated is different from %q", test.name, test.wantOut)
s.T().Logf("Expected %q", expected[:400])
s.T().Logf("Actual %q", actual[:400])
}
}
}
func (s *GeneratorTestSuite) TestTokenizerOnATS() {
const suspiciousSample = "samples/ATS/csv_parse.hats"
sFile := filepath.Join(s.tmpLinguist, suspiciousSample)
content, err := ioutil.ReadFile(sFile)
require.NoError(s.T(), err)
tokens := tokenizer.Tokenize(content)
assert.Equal(s.T(), 381, len(tokens), "Number of tokens using LF as line endings")
}
// normalizeSpaces returns a copy of str with whitespaces normalized.

View File

@ -3,81 +3,84 @@ package main
import (
"io/ioutil"
"log"
"path/filepath"
"github.com/go-enry/go-enry/v2/internal/code-generator/generator"
)
const (
// languages info file
languagesYAML = ".linguist/lib/linguist/languages.yml"
var (
// directories
samplesDir = filepath.Join(".linguist", "samples")
libDir = filepath.Join(".linguist", "lib", "linguist")
assetsDir = filepath.Join("internal", "code-generator", "assets")
// linguist's samples directory
samplesDir = ".linguist/samples"
// languages info file
languagesYAML = filepath.Join(libDir, "languages.yml")
// extension.go generation
extensionsFile = "data/extension.go"
extensionsTmplPath = "internal/code-generator/assets/extension.go.tmpl"
extensionsFile = filepath.Join("data", "extension.go")
extensionsTmplPath = filepath.Join(assetsDir, "extension.go.tmpl")
extensionsTmpl = "extension.go.tmpl"
// content.go generation
heuristicsYAML = ".linguist/lib/linguist/heuristics.yml"
contentFile = "data/content.go"
contentTmplPath = "internal/code-generator/assets/content.go.tmpl"
heuristicsYAML = filepath.Join(libDir, "heuristics.yml")
contentFile = filepath.Join("data", "content.go")
contentTmplPath = filepath.Join(assetsDir, "content.go.tmpl")
contentTmpl = "content.go.tmpl"
// vendor.go generation
vendorYAML = ".linguist/lib/linguist/vendor.yml"
vendorFile = "data/vendor.go"
vendorTmplPath = "internal/code-generator/assets/vendor.go.tmpl"
vendorYAML = filepath.Join(libDir, "vendor.yml")
vendorFile = filepath.Join("data", "vendor.go")
vendorTmplPath = filepath.Join(assetsDir, "vendor.go.tmpl")
vendorTmpl = "vendor.go.tmpl"
// documentation.go generation
documentationYAML = ".linguist/lib/linguist/documentation.yml"
documentationFile = "data/documentation.go"
documentationTmplPath = "internal/code-generator/assets/documentation.go.tmpl"
documentationYAML = filepath.Join(libDir, "documentation.yml")
documentationFile = filepath.Join("data", "documentation.go")
documentationTmplPath = filepath.Join(assetsDir, "documentation.go.tmpl")
documentationTmpl = "documentation.go.tmpl"
// type.go generation
typeFile = "data/type.go"
typeTmplPath = "internal/code-generator/assets/type.go.tmpl"
typeFile = filepath.Join("data", "type.go")
typeTmplPath = filepath.Join(assetsDir, "type.go.tmpl")
typeTmpl = "type.go.tmpl"
// interpreter.go generation
interpretersFile = "data/interpreter.go"
interpretersTmplPath = "internal/code-generator/assets/interpreter.go.tmpl"
interpretersFile = filepath.Join("data", "interpreter.go")
interpretersTmplPath = filepath.Join(assetsDir, "interpreter.go.tmpl")
interpretersTmpl = "interpreter.go.tmpl"
// filename.go generation
filenamesFile = "data/filename.go"
filenamesTmplPath = "internal/code-generator/assets/filename.go.tmpl"
filenamesFile = filepath.Join("data", "filename.go")
filenamesTmplPath = filepath.Join(assetsDir, "filename.go.tmpl")
filenamesTmpl = "filename.go.tmpl"
// alias.go generation
aliasesFile = "data/alias.go"
aliasesTmplPath = "internal/code-generator/assets/alias.go.tmpl"
aliasesFile = filepath.Join("data", "alias.go")
aliasesTmplPath = filepath.Join(assetsDir, "alias.go.tmpl")
aliasesTmpl = "alias.go.tmpl"
// frequencies.go generation
frequenciesFile = "data/frequencies.go"
frequenciesTmplPath = "internal/code-generator/assets/frequencies.go.tmpl"
frequenciesFile = filepath.Join("data", "frequencies.go")
frequenciesTmplPath = filepath.Join(assetsDir, "frequencies.go.tmpl")
frequenciesTmpl = "frequencies.go.tmpl"
// commit.go generation
commitFile = "data/commit.go"
commitTmplPath = "internal/code-generator/assets/commit.go.tmpl"
commitFile = filepath.Join("data", "commit.go")
commitTmplPath = filepath.Join(assetsDir, "commit.go.tmpl")
commitTmpl = "commit.go.tmpl"
// mimeType.go generation
mimeTypeFile = "data/mimeType.go"
mimeTypeTmplPath = "internal/code-generator/assets/mimeType.go.tmpl"
mimeTypeFile = filepath.Join("data", "mimeType.go")
mimeTypeTmplPath = filepath.Join(assetsDir, "mimeType.go.tmpl")
mimeTypeTmpl = "mimeType.go.tmpl"
// colors.go generation
colorsFile = "data/colors.go"
colorsTmplPath = "internal/code-generator/assets/colors.go.tmpl"
colorsFile = filepath.Join("data", "colors.go")
colorsTmplPath = filepath.Join(assetsDir, "colors.go.tmpl")
colorsTmpl = "colors.go.tmpl"
commitPath = ".linguist/.git/HEAD"
commitPath = filepath.Join(".linguist", ".git", "HEAD")
)
type generatorFiles struct {
@ -125,7 +128,7 @@ func getCommit(path string) (string, error) {
}
if string(commit) == "ref: refs/heads/master\n" {
path = ".linguist/.git/" + string(commit[5:len(commit)-1])
path = filepath.Join(".linguist", ".git", string(commit[5:len(commit)-1]))
commit, err = ioutil.ReadFile(path)
if err != nil {
return "", err