Expose LanguageInfo with all Linguist data

As discussed in https://github.com/go-enry/go-enry/issues/54, this provides an
API for accessing a LanguageInfo struct which is populated with all the data
from the Linguist YAML source file. Functions are provided to access the
LanguageInfo by name or ID.

The other top-level functions like GetLanguageExtensions, GetLanguageGroup, etc.
could in principle be implemented using this structure, which would simplify the
code generation. But that would be a big change so I didn't do any of that.
Perhaps in the next major version something like that would make sense.
This commit is contained in:
Luke Francl 2021-10-11 13:32:29 -07:00
parent adb553dc50
commit b248b21349
10 changed files with 26979 additions and 18 deletions

View File

@ -3,6 +3,7 @@ package enry
import ( import (
"bufio" "bufio"
"bytes" "bytes"
"fmt"
"path" "path"
"path/filepath" "path/filepath"
"strings" "strings"
@ -522,11 +523,11 @@ type Type int
// Type's values. // Type's values.
const ( const (
Unknown Type = iota Unknown Type = Type(data.TypeUnknown)
Data Data = Type(data.TypeData)
Programming Programming = Type(data.TypeProgramming)
Markup Markup = Type(data.TypeMarkup)
Prose Prose = Type(data.TypeProse)
) )
// GetLanguageType returns the type of the given language. // GetLanguageType returns the type of the given language.
@ -558,3 +559,21 @@ func GetLanguageGroup(language string) string {
return "" return ""
} }
// GetLanguageInfo returns the LanguageInfo for a given language name, or an error if not found.
func GetLanguageInfo(language string) (data.LanguageInfo, error) {
if info, ok := data.LanguageInfoByName[language]; ok {
return info, nil
}
return data.LanguageInfo{}, fmt.Errorf("language %q not found", language)
}
// GetLanguageInfo returns the LanguageInfo for a given language name, or an error if not found.
func GetLanguageInfoByID(id int) (data.LanguageInfo, error) {
if info, ok := data.LanguageInfoByID[id]; ok {
return info, nil
}
return data.LanguageInfo{}, fmt.Errorf("language %q not found", id)
}

View File

@ -642,3 +642,56 @@ func (s *EnryTestSuite) TestGetLanguageID() {
assert.Equal(s.T(), test.found, found, fmt.Sprintf("%v: found = %t, expected: %t", test.name, found, test.found)) assert.Equal(s.T(), test.found, found, fmt.Sprintf("%v: found = %t, expected: %t", test.name, found, test.found))
} }
} }
func (s *EnryTestSuite) TestGetLanguageInfo() {
tests := []struct {
name string
language string
expectedID int
error bool
}{
{name: "TestGetLanguageID_1", language: "1C Enterprise", expectedID: 0},
{name: "TestGetLanguageID_2", language: "BestLanguageEver", error: true},
{name: "TestGetLanguageID_3", language: "C++", expectedID: 43},
{name: "TestGetLanguageID_5", language: "Objective-C", expectedID: 257},
{name: "TestGetLanguageID_6", language: "golang", error: true}, // Aliases are not supported
{name: "TestGetLanguageID_7", language: "Go", expectedID: 132},
{name: "TestGetLanguageID_8", language: "Makefile", expectedID: 220},
}
for _, test := range tests {
info, err := GetLanguageInfo(test.language)
if test.error {
assert.Error(s.T(), err, "%v: expected error for %q", test.name, test.language)
} else {
assert.NoError(s.T(), err)
assert.Equal(s.T(), test.expectedID, info.LanguageID, fmt.Sprintf("%v: id = %v, expected: %v", test.name, info.LanguageID, test.expectedID))
}
}
}
func (s *EnryTestSuite) TestGetLanguageInfoByID() {
tests := []struct {
name string
id int
expectedName string
error bool
}{
{name: "TestGetLanguageID_1", id: 0, expectedName: "1C Enterprise"},
{name: "TestGetLanguageID_2", id: -1, error: true},
{name: "TestGetLanguageID_3", id: 43, expectedName: "C++"},
{name: "TestGetLanguageID_5", id: 257, expectedName: "Objective-C"},
{name: "TestGetLanguageID_7", id: 132, expectedName: "Go"},
{name: "TestGetLanguageID_8", id: 220, expectedName: "Makefile"},
}
for _, test := range tests {
info, err := GetLanguageInfoByID(test.id)
if test.error {
assert.Error(s.T(), err, "%v: expected error for %q", test.name, test.id)
} else {
assert.NoError(s.T(), err)
assert.Equal(s.T(), test.expectedName, info.Name, fmt.Sprintf("%v: id = %v, expected: %v", test.name, test.id, test.expectedName))
}
}
}

26620
data/languageInfo.go Normal file

File diff suppressed because it is too large Load Diff

View File

@ -3,6 +3,48 @@
package data package data
// Type represent language's type. Either data, programming, markup, prose, or unknown.
type Type int
// Type's values.
const (
TypeUnknown Type = iota
TypeData
TypeProgramming
TypeMarkup
TypeProse
)
func (t Type) String() string {
switch t {
case TypeData:
return "data"
case TypeProgramming:
return "programming"
case TypeMarkup:
return "markup"
case TypeProse:
return "prose"
default:
return "unknown"
}
}
func TypeForString(s string) Type {
switch s {
case "data":
return TypeData
case "programming":
return TypeProgramming
case "markup":
return TypeMarkup
case "prose":
return TypeProse
default:
return TypeUnknown
}
}
var LanguagesType = map[string]int{ var LanguagesType = map[string]int{
"1C Enterprise": 2, "1C Enterprise": 2,
"4D": 2, "4D": 2,

View File

@ -0,0 +1,99 @@
package data
// LanguageInfo exposes the data for a language's Linguist YAML entry as a Go struct.
// See https://github.com/github/linguist/blob/master/lib/linguist/languages.yml
type LanguageInfo struct {
Name string
FSName string
Type Type
Color string
Group string
Aliases []string
Extensions []string
Interpreters []string
Filenames []string
MimeType string
TMScope string
AceMode string
CodemirrorMode string
Wrap bool
LanguageID int
}
// LanguageInfoByName allows accessing LanguageInfo by a language's primary name.
var LanguageInfoByName = map[string]LanguageInfo{
{{range $language, $info := . -}}
"{{$language}}": LanguageInfo{
Name: "{{$language}}",
FSName: "{{$info.FSName}}",
Type: TypeForString("{{$info.Type}}"),
Color: "{{$info.Color}}",
Group: "{{$info.Group}}",
Aliases: []string{
{{range $alias := $info.Aliases -}}
"{{$alias}}",
{{end -}}
},
Extensions: []string{
{{range $extension := $info.Extensions -}}
"{{$extension}}",
{{end -}}
},
Interpreters: []string{
{{range $interpreter := $info.Interpreters -}}
"{{$interpreter}}",
{{end -}}
},
Filenames: []string{
{{range $filename := $info.Filenames -}}
"{{$filename}}",
{{end -}}
},
MimeType: "{{$info.MimeType}}",
TMScope: "{{$info.TMScope}}",
AceMode: "{{$info.AceMode}}",
CodemirrorMode: "{{$info.CodemirrorMode}}",
Wrap: {{$info.Wrap}},
LanguageID: {{$info.LanguageID}},
},
{{end -}}
}
// LanguageInfoByID allows accessing LanguageInfo by a language's ID.
var LanguageInfoByID = map[int]LanguageInfo{
{{range $language, $info := . -}}
{{$info.LanguageID}}: LanguageInfo{
Name: "{{$language}}",
FSName: "{{$info.FSName}}",
Type: TypeForString("{{$info.Type}}"),
Color: "{{$info.Color}}",
Group: "{{$info.Group}}",
Aliases: []string{
{{range $alias := $info.Aliases -}}
"{{$alias}}",
{{end -}}
},
Extensions: []string{
{{range $extension := $info.Extensions -}}
"{{$extension}}",
{{end -}}
},
Interpreters: []string{
{{range $interpreter := $info.Interpreters -}}
"{{$interpreter}}",
{{end -}}
},
Filenames: []string{
{{range $filename := $info.Filenames -}}
"{{$filename}}",
{{end -}}
},
MimeType: "{{$info.MimeType}}",
TMScope: "{{$info.TMScope}}",
AceMode: "{{$info.AceMode}}",
CodemirrorMode: "{{$info.CodemirrorMode}}",
Wrap: {{$info.Wrap}},
LanguageID: {{$info.LanguageID}},
},
{{end -}}
}

View File

@ -1,5 +1,47 @@
package data package data
// Type represent language's type. Either data, programming, markup, prose, or unknown.
type Type int
// Type's values.
const (
TypeUnknown Type = iota
TypeData
TypeProgramming
TypeMarkup
TypeProse
)
func (t Type) String() string {
switch t {
case TypeData:
return "data"
case TypeProgramming:
return "programming"
case TypeMarkup:
return "markup"
case TypeProse:
return "prose"
default:
return "unknown"
}
}
func TypeForString(s string) Type {
switch s {
case "data":
return TypeData
case "programming":
return TypeProgramming
case "markup":
return TypeMarkup
case "prose":
return TypeProse
default:
return TypeUnknown
}
}
var LanguagesType = map[string]int{ var LanguagesType = map[string]int{
{{range $language, $type := . -}} {{range $language, $type := . -}}
"{{ $language }}": {{ $type -}}, "{{ $language }}": {{ $type -}},

View File

@ -1,17 +1,29 @@
package generator package generator
import "sort" import (
"bytes"
"io"
"io/ioutil"
"sort"
"gopkg.in/yaml.v2"
)
type languageInfo struct { type languageInfo struct {
Type string `yaml:"type,omitempty"` FSName string `yaml:"fs_name"`
Color string `yaml:"color,omitempty"` Type string `yaml:"type,omitempty"`
Group string `yaml:"group,omitempty"` Color string `yaml:"color,omitempty"`
Aliases []string `yaml:"aliases,omitempty"` Group string `yaml:"group,omitempty"`
Extensions []string `yaml:"extensions,omitempty,flow"` Aliases []string `yaml:"aliases,omitempty"`
Interpreters []string `yaml:"interpreters,omitempty,flow"` Extensions []string `yaml:"extensions,omitempty,flow"`
Filenames []string `yaml:"filenames,omitempty,flow"` Interpreters []string `yaml:"interpreters,omitempty,flow"`
MimeType string `yaml:"codemirror_mime_type,omitempty,flow"` Filenames []string `yaml:"filenames,omitempty,flow"`
LanguageID *int `yaml:"language_id,omitempty"` MimeType string `yaml:"codemirror_mime_type,omitempty,flow"`
TMScope string `yaml:"tm_scope"`
AceMode string `yaml:"ace_mode"`
CodemirrorMode string `yaml:"codemirror_mode"`
Wrap bool `yaml:"wrap"`
LanguageID *int `yaml:"language_id,omitempty"`
} }
func getAlphabeticalOrderedKeys(languages map[string]*languageInfo) []string { func getAlphabeticalOrderedKeys(languages map[string]*languageInfo) []string {
@ -23,3 +35,28 @@ func getAlphabeticalOrderedKeys(languages map[string]*languageInfo) []string {
sort.Strings(keyList) sort.Strings(keyList)
return keyList return keyList
} }
// LanguageInfo generates maps in Go with language name -> LanguageInfo and language ID -> LanguageInfo.
// It is of generator.File type.
func LanguageInfo(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil {
return err
}
buf := &bytes.Buffer{}
if err := executeLanguageInfoTemplate(buf, languages, tmplPath, tmplName, commit); err != nil {
return err
}
return formatedWrite(outPath, buf.Bytes())
}
func executeLanguageInfoTemplate(out io.Writer, languages map[string]*languageInfo, tmplPath, tmplName, commit string) error {
return executeTemplate(out, tmplName, tmplPath, commit, nil, languages) // TODO: use function map to put language by id?
}

View File

@ -3,6 +3,48 @@
package data package data
// Type represent language's type. Either data, programming, markup, prose, or unknown.
type Type int
// Type's values.
const (
TypeUnknown Type = iota
TypeData
TypeProgramming
TypeMarkup
TypeProse
)
func (t Type) String() string {
switch t {
case TypeData:
return "data"
case TypeProgramming:
return "programming"
case TypeMarkup:
return "markup"
case TypeProse:
return "prose"
default:
return "unknown"
}
}
func TypeForString(s string) Type {
switch s {
case "data":
return TypeData
case "programming":
return TypeProgramming
case "markup":
return TypeMarkup
case "prose":
return TypeProse
default:
return TypeUnknown
}
}
var LanguagesType = map[string]int{ var LanguagesType = map[string]int{
"1C Enterprise": 2, "1C Enterprise": 2,
"4D": 2, "4D": 2,

View File

@ -2,9 +2,10 @@ package generator
import ( import (
"bytes" "bytes"
"gopkg.in/yaml.v2"
"io" "io"
"io/ioutil" "io/ioutil"
"gopkg.in/yaml.v2"
) )
var typeToTypeConst = map[string]int{ var typeToTypeConst = map[string]int{

View File

@ -87,9 +87,14 @@ var (
// id.go generation // id.go generation
idFile = "data/id.go" idFile = "data/id.go"
idTmplPath = "internal/code-generator/assets/id.go.tmpl" idTmplPath = filepath.Join(assetsDir, "id.go.tmpl")
idTmpl = "id.go.tmpl" idTmpl = "id.go.tmpl"
// languageInfo.go generation
languageInfoFile = filepath.Join("data", "languageInfo.go")
langaugeInfoTmplPath = filepath.Join(assetsDir, "languageInfo.go.tmpl")
langaugeInfoTmpl = "languageInfo.go.tmpl"
commitPath = filepath.Join(".linguist", ".git", "HEAD") commitPath = filepath.Join(".linguist", ".git", "HEAD")
) )
@ -124,11 +129,12 @@ func main() {
{generator.Colors, languagesYAML, "", colorsFile, colorsTmplPath, colorsTmpl, commit}, {generator.Colors, languagesYAML, "", colorsFile, colorsTmplPath, colorsTmpl, commit},
{generator.Groups, languagesYAML, "", groupsFile, groupsTmplPath, groupsTmpl, commit}, {generator.Groups, languagesYAML, "", groupsFile, groupsTmplPath, groupsTmpl, commit},
{generator.ID, languagesYAML, "", idFile, idTmplPath, idTmpl, commit}, {generator.ID, languagesYAML, "", idFile, idTmplPath, idTmpl, commit},
{generator.LanguageInfo, languagesYAML, "", languageInfoFile, langaugeInfoTmplPath, langaugeInfoTmpl, commit},
} }
for _, file := range fileList { for _, file := range fileList {
if err := file.generate(file.fileToParse, file.samplesDir, file.outPath, file.tmplPath, file.tmplName, file.commit); err != nil { if err := file.generate(file.fileToParse, file.samplesDir, file.outPath, file.tmplPath, file.tmplName, file.commit); err != nil {
log.Println(err) log.Printf("Error generating template %q to %q: %+v", file.tmplPath, file.outPath, err)
} }
} }
} }