mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-05-24 08:18:52 -03:00
Improve shebang parsing to detect correct interpreter
This commit is contained in:
parent
7c24e3d5d2
commit
4686615d9e
22
common.go
22
common.go
@ -111,13 +111,6 @@ func getFirstLanguageAndSafe(languages []string) (language string, safe bool) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// getLanguageBySpecificClassifier returns the most probably language for the given content using
|
|
||||||
// classifier to detect language.
|
|
||||||
func getLanguageBySpecificClassifier(content []byte, candidates []string, classifier classifier) (language string, safe bool) {
|
|
||||||
languages := getLanguagesBySpecificClassifier(content, candidates, classifier)
|
|
||||||
return getFirstLanguageAndSafe(languages)
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetLanguages applies a sequence of strategies based on the given filename and content
|
// GetLanguages applies a sequence of strategies based on the given filename and content
|
||||||
// to find out the most probable languages to return.
|
// to find out the most probable languages to return.
|
||||||
//
|
//
|
||||||
@ -300,9 +293,11 @@ func GetLanguagesByShebang(_ string, content []byte, _ []string) (languages []st
|
|||||||
var (
|
var (
|
||||||
shebangExecHack = regex.MustCompile(`exec (\w+).+\$0.+\$@`)
|
shebangExecHack = regex.MustCompile(`exec (\w+).+\$0.+\$@`)
|
||||||
pythonVersion = regex.MustCompile(`python\d\.\d+`)
|
pythonVersion = regex.MustCompile(`python\d\.\d+`)
|
||||||
|
envOptArgs = regex.MustCompile(`-[i0uCSv]*|--\S+`)
|
||||||
|
envVarArgs = regex.MustCompile(`\S+=\S+`)
|
||||||
)
|
)
|
||||||
|
|
||||||
func getInterpreter(data []byte) (interpreter string) {
|
func getInterpreter(data []byte) string {
|
||||||
line := getFirstLine(data)
|
line := getFirstLine(data)
|
||||||
if !hasShebang(line) {
|
if !hasShebang(line) {
|
||||||
return ""
|
return ""
|
||||||
@ -317,7 +312,7 @@ func getInterpreter(data []byte) (interpreter string) {
|
|||||||
|
|
||||||
// Extract interpreter name from path. Use path.Base because
|
// Extract interpreter name from path. Use path.Base because
|
||||||
// shebang on Cygwin/Windows still use a forward slash
|
// shebang on Cygwin/Windows still use a forward slash
|
||||||
interpreter = path.Base(string(splitted[0]))
|
interpreter := path.Base(string(splitted[0]))
|
||||||
|
|
||||||
// #!/usr/bin/env [...]
|
// #!/usr/bin/env [...]
|
||||||
if interpreter == "env" {
|
if interpreter == "env" {
|
||||||
@ -325,6 +320,13 @@ func getInterpreter(data []byte) (interpreter string) {
|
|||||||
// /usr/bin/env with no arguments
|
// /usr/bin/env with no arguments
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
for len(splitted) > 2 {
|
||||||
|
if envOptArgs.Match(splitted[1]) || envVarArgs.Match(splitted[1]) {
|
||||||
|
splitted = append(splitted[:1], splitted[2:]...)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
interpreter = path.Base(string(splitted[1]))
|
interpreter = path.Base(string(splitted[1]))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -342,7 +344,7 @@ func getInterpreter(data []byte) (interpreter string) {
|
|||||||
interpreter = ""
|
interpreter = ""
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return interpreter
|
||||||
}
|
}
|
||||||
|
|
||||||
func getFirstLines(content []byte, count int) []byte {
|
func getFirstLines(content []byte, count int) []byte {
|
||||||
|
@ -297,7 +297,49 @@ println("The shell script says ",vm.arglist.concat(" "));`
|
|||||||
{name: "TestGetLanguagesByShebang_9", content: []byte(multilineExecHack), expected: []string{"Tcl"}},
|
{name: "TestGetLanguagesByShebang_9", content: []byte(multilineExecHack), expected: []string{"Tcl"}},
|
||||||
{name: "TestGetLanguagesByShebang_10", content: []byte(multilineNoExecHack), expected: []string{"Shell"}},
|
{name: "TestGetLanguagesByShebang_10", content: []byte(multilineNoExecHack), expected: []string{"Shell"}},
|
||||||
{name: "TestGetLanguagesByShebang_11", content: []byte(`#!/envinpath/python`), expected: []string{"Python"}},
|
{name: "TestGetLanguagesByShebang_11", content: []byte(`#!/envinpath/python`), expected: []string{"Python"}},
|
||||||
{name: "TestGetLanguagesByShebang_12", content: []byte(`#!`), expected: nil},
|
|
||||||
|
{name: "TestGetLanguagesByShebang_12", content: []byte(""), expected: nil},
|
||||||
|
{name: "TestGetLanguagesByShebang_13", content: []byte("foo"), expected: nil},
|
||||||
|
{name: "TestGetLanguagesByShebang_14", content: []byte("#bar"), expected: nil},
|
||||||
|
{name: "TestGetLanguagesByShebang_15", content: []byte("#baz"), expected: nil},
|
||||||
|
{name: "TestGetLanguagesByShebang_16", content: []byte("///"), expected: nil},
|
||||||
|
{name: "TestGetLanguagesByShebang_17", content: []byte("\n\n\n\n\n"), expected: nil},
|
||||||
|
{name: "TestGetLanguagesByShebang_18", content: []byte(" #!/usr/sbin/ruby"), expected: nil},
|
||||||
|
{name: "TestGetLanguagesByShebang_19", content: []byte("\n#!/usr/sbin/ruby"), expected: nil},
|
||||||
|
{name: "TestGetLanguagesByShebang_20", content: []byte("#!"), expected: nil},
|
||||||
|
{name: "TestGetLanguagesByShebang_21", content: []byte("#! "), expected: nil},
|
||||||
|
{name: "TestGetLanguagesByShebang_22", content: []byte("#!/usr/bin/env"), expected: nil},
|
||||||
|
{name: "TestGetLanguagesByShebang_23", content: []byte("#!/usr/bin/env osascript -l JavaScript"), expected: nil},
|
||||||
|
{name: "TestGetLanguagesByShebang_24", content: []byte("#!/usr/bin/env osascript -l AppleScript"), expected: nil},
|
||||||
|
{name: "TestGetLanguagesByShebang_25", content: []byte("#!/usr/bin/env osascript -l foobar"), expected: nil},
|
||||||
|
{name: "TestGetLanguagesByShebang_26", content: []byte("#!/usr/bin/osascript -l JavaScript"), expected: nil},
|
||||||
|
{name: "TestGetLanguagesByShebang_27", content: []byte("#!/usr/bin/osascript -l foobar"), expected: nil},
|
||||||
|
|
||||||
|
{name: "TestGetLanguagesByShebang_28", content: []byte("#!/usr/sbin/ruby\n# bar"), expected: []string{"Ruby"}},
|
||||||
|
{name: "TestGetLanguagesByShebang_29", content: []byte("#!/usr/bin/ruby\n# foo"), expected: []string{"Ruby"}},
|
||||||
|
{name: "TestGetLanguagesByShebang_30", content: []byte("#!/usr/sbin/ruby"), expected: []string{"Ruby"}},
|
||||||
|
{name: "TestGetLanguagesByShebang_31", content: []byte("#!/usr/sbin/ruby foo bar baz\n"), expected: []string{"Ruby"}},
|
||||||
|
|
||||||
|
{name: "TestGetLanguagesByShebang_32", content: []byte("#!/usr/bin/env Rscript\n# example R script\n#\n"), expected: []string{"R"}},
|
||||||
|
{name: "TestGetLanguagesByShebang_33", content: []byte("#!/usr/bin/env ruby\n# baz"), expected: []string{"Ruby"}},
|
||||||
|
|
||||||
|
{name: "TestGetLanguagesByShebang_34", content: []byte("#!/usr/bin/bash\n"), expected: []string{"Shell"}},
|
||||||
|
{name: "TestGetLanguagesByShebang_35", content: []byte("#!/bin/sh"), expected: []string{"Shell"}},
|
||||||
|
{name: "TestGetLanguagesByShebang_36", content: []byte("#!/bin/python\n# foo\n# bar\n# baz"), expected: []string{"Python"}},
|
||||||
|
{name: "TestGetLanguagesByShebang_37", content: []byte("#!/usr/bin/python2.7\n\n\n\n"), expected: []string{"Python"}},
|
||||||
|
{name: "TestGetLanguagesByShebang_38", content: []byte("#!/usr/bin/python3\n\n\n\n"), expected: []string{"Python"}},
|
||||||
|
{name: "TestGetLanguagesByShebang_39", content: []byte("#!/usr/bin/sbcl --script\n\n"), expected: []string{"Common Lisp"}},
|
||||||
|
{name: "TestGetLanguagesByShebang_40", content: []byte("#! perl"), expected: []string{"Perl", "Pod"}},
|
||||||
|
|
||||||
|
{name: "TestGetLanguagesByShebang_41", content: []byte("#!/bin/sh\n\n\nexec ruby $0 $@"), expected: []string{"Ruby"}},
|
||||||
|
{name: "TestGetLanguagesByShebang_42", content: []byte("#! /usr/bin/env A=003 B=149 C=150 D=xzd E=base64 F=tar G=gz H=head I=tail sh"), expected: []string{"Shell"}},
|
||||||
|
{name: "TestGetLanguagesByShebang_43", content: []byte("#!/usr/bin/env foo=bar bar=foo python -cos=__import__(\"os\");"), expected: []string{"Python"}},
|
||||||
|
{name: "TestGetLanguagesByShebang_44", content: []byte("#!/usr/bin/env osascript"), expected: []string{"AppleScript"}},
|
||||||
|
{name: "TestGetLanguagesByShebang_45", content: []byte("#!/usr/bin/osascript"), expected: []string{"AppleScript"}},
|
||||||
|
|
||||||
|
{name: "TestGetLanguagesByShebang_46", content: []byte("#!/usr/bin/env -vS ruby -wKU\nputs ?t+?e+?s+?t"), expected: []string{"Ruby"}},
|
||||||
|
{name: "TestGetLanguagesByShebang_47", content: []byte("#!/usr/bin/env --split-string sed -f\ny/a/A/"), expected: []string{"sed"}},
|
||||||
|
{name: "TestGetLanguagesByShebang_48", content: []byte("#!/usr/bin/env -S GH_TOKEN=ghp_*** deno run --allow-net\nconsole.log(1);"), expected: []string{"TypeScript"}},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user