diff --git a/common.go b/common.go index e70c2b0..d2f22ae 100644 --- a/common.go +++ b/common.go @@ -111,13 +111,6 @@ func getFirstLanguageAndSafe(languages []string) (language string, safe bool) { return } -// getLanguageBySpecificClassifier returns the most probably language for the given content using -// classifier to detect language. -func getLanguageBySpecificClassifier(content []byte, candidates []string, classifier classifier) (language string, safe bool) { - languages := getLanguagesBySpecificClassifier(content, candidates, classifier) - return getFirstLanguageAndSafe(languages) -} - // GetLanguages applies a sequence of strategies based on the given filename and content // to find out the most probable languages to return. // @@ -300,9 +293,11 @@ func GetLanguagesByShebang(_ string, content []byte, _ []string) (languages []st var ( shebangExecHack = regex.MustCompile(`exec (\w+).+\$0.+\$@`) pythonVersion = regex.MustCompile(`python\d\.\d+`) + envOptArgs = regex.MustCompile(`-[i0uCSv]*|--\S+`) + envVarArgs = regex.MustCompile(`\S+=\S+`) ) -func getInterpreter(data []byte) (interpreter string) { +func getInterpreter(data []byte) string { line := getFirstLine(data) if !hasShebang(line) { return "" @@ -317,7 +312,7 @@ func getInterpreter(data []byte) (interpreter string) { // Extract interpreter name from path. Use path.Base because // shebang on Cygwin/Windows still use a forward slash - interpreter = path.Base(string(splitted[0])) + interpreter := path.Base(string(splitted[0])) // #!/usr/bin/env [...] if interpreter == "env" { @@ -325,6 +320,13 @@ func getInterpreter(data []byte) (interpreter string) { // /usr/bin/env with no arguments return "" } + for len(splitted) > 2 { + if envOptArgs.Match(splitted[1]) || envVarArgs.Match(splitted[1]) { + splitted = append(splitted[:1], splitted[2:]...) + continue + } + break + } interpreter = path.Base(string(splitted[1])) } @@ -342,7 +344,7 @@ func getInterpreter(data []byte) (interpreter string) { interpreter = "" } - return + return interpreter } func getFirstLines(content []byte, count int) []byte { diff --git a/common_test.go b/common_test.go index e8e5fb9..0996b90 100644 --- a/common_test.go +++ b/common_test.go @@ -297,7 +297,49 @@ println("The shell script says ",vm.arglist.concat(" "));` {name: "TestGetLanguagesByShebang_9", content: []byte(multilineExecHack), expected: []string{"Tcl"}}, {name: "TestGetLanguagesByShebang_10", content: []byte(multilineNoExecHack), expected: []string{"Shell"}}, {name: "TestGetLanguagesByShebang_11", content: []byte(`#!/envinpath/python`), expected: []string{"Python"}}, - {name: "TestGetLanguagesByShebang_12", content: []byte(`#!`), expected: nil}, + + {name: "TestGetLanguagesByShebang_12", content: []byte(""), expected: nil}, + {name: "TestGetLanguagesByShebang_13", content: []byte("foo"), expected: nil}, + {name: "TestGetLanguagesByShebang_14", content: []byte("#bar"), expected: nil}, + {name: "TestGetLanguagesByShebang_15", content: []byte("#baz"), expected: nil}, + {name: "TestGetLanguagesByShebang_16", content: []byte("///"), expected: nil}, + {name: "TestGetLanguagesByShebang_17", content: []byte("\n\n\n\n\n"), expected: nil}, + {name: "TestGetLanguagesByShebang_18", content: []byte(" #!/usr/sbin/ruby"), expected: nil}, + {name: "TestGetLanguagesByShebang_19", content: []byte("\n#!/usr/sbin/ruby"), expected: nil}, + {name: "TestGetLanguagesByShebang_20", content: []byte("#!"), expected: nil}, + {name: "TestGetLanguagesByShebang_21", content: []byte("#! "), expected: nil}, + {name: "TestGetLanguagesByShebang_22", content: []byte("#!/usr/bin/env"), expected: nil}, + {name: "TestGetLanguagesByShebang_23", content: []byte("#!/usr/bin/env osascript -l JavaScript"), expected: nil}, + {name: "TestGetLanguagesByShebang_24", content: []byte("#!/usr/bin/env osascript -l AppleScript"), expected: nil}, + {name: "TestGetLanguagesByShebang_25", content: []byte("#!/usr/bin/env osascript -l foobar"), expected: nil}, + {name: "TestGetLanguagesByShebang_26", content: []byte("#!/usr/bin/osascript -l JavaScript"), expected: nil}, + {name: "TestGetLanguagesByShebang_27", content: []byte("#!/usr/bin/osascript -l foobar"), expected: nil}, + + {name: "TestGetLanguagesByShebang_28", content: []byte("#!/usr/sbin/ruby\n# bar"), expected: []string{"Ruby"}}, + {name: "TestGetLanguagesByShebang_29", content: []byte("#!/usr/bin/ruby\n# foo"), expected: []string{"Ruby"}}, + {name: "TestGetLanguagesByShebang_30", content: []byte("#!/usr/sbin/ruby"), expected: []string{"Ruby"}}, + {name: "TestGetLanguagesByShebang_31", content: []byte("#!/usr/sbin/ruby foo bar baz\n"), expected: []string{"Ruby"}}, + + {name: "TestGetLanguagesByShebang_32", content: []byte("#!/usr/bin/env Rscript\n# example R script\n#\n"), expected: []string{"R"}}, + {name: "TestGetLanguagesByShebang_33", content: []byte("#!/usr/bin/env ruby\n# baz"), expected: []string{"Ruby"}}, + + {name: "TestGetLanguagesByShebang_34", content: []byte("#!/usr/bin/bash\n"), expected: []string{"Shell"}}, + {name: "TestGetLanguagesByShebang_35", content: []byte("#!/bin/sh"), expected: []string{"Shell"}}, + {name: "TestGetLanguagesByShebang_36", content: []byte("#!/bin/python\n# foo\n# bar\n# baz"), expected: []string{"Python"}}, + {name: "TestGetLanguagesByShebang_37", content: []byte("#!/usr/bin/python2.7\n\n\n\n"), expected: []string{"Python"}}, + {name: "TestGetLanguagesByShebang_38", content: []byte("#!/usr/bin/python3\n\n\n\n"), expected: []string{"Python"}}, + {name: "TestGetLanguagesByShebang_39", content: []byte("#!/usr/bin/sbcl --script\n\n"), expected: []string{"Common Lisp"}}, + {name: "TestGetLanguagesByShebang_40", content: []byte("#! perl"), expected: []string{"Perl", "Pod"}}, + + {name: "TestGetLanguagesByShebang_41", content: []byte("#!/bin/sh\n\n\nexec ruby $0 $@"), expected: []string{"Ruby"}}, + {name: "TestGetLanguagesByShebang_42", content: []byte("#! /usr/bin/env A=003 B=149 C=150 D=xzd E=base64 F=tar G=gz H=head I=tail sh"), expected: []string{"Shell"}}, + {name: "TestGetLanguagesByShebang_43", content: []byte("#!/usr/bin/env foo=bar bar=foo python -cos=__import__(\"os\");"), expected: []string{"Python"}}, + {name: "TestGetLanguagesByShebang_44", content: []byte("#!/usr/bin/env osascript"), expected: []string{"AppleScript"}}, + {name: "TestGetLanguagesByShebang_45", content: []byte("#!/usr/bin/osascript"), expected: []string{"AppleScript"}}, + + {name: "TestGetLanguagesByShebang_46", content: []byte("#!/usr/bin/env -vS ruby -wKU\nputs ?t+?e+?s+?t"), expected: []string{"Ruby"}}, + {name: "TestGetLanguagesByShebang_47", content: []byte("#!/usr/bin/env --split-string sed -f\ny/a/A/"), expected: []string{"sed"}}, + {name: "TestGetLanguagesByShebang_48", content: []byte("#!/usr/bin/env -S GH_TOKEN=ghp_*** deno run --allow-net\nconsole.log(1);"), expected: []string{"TypeScript"}}, } for _, test := range tests {