mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-10 13:32:24 +00:00
807 lines
18 KiB
Go
807 lines
18 KiB
Go
|
package data
|
||
|
|
||
|
import (
|
||
|
"bytes"
|
||
|
"strings"
|
||
|
|
||
|
"github.com/go-enry/go-enry/v2/regex"
|
||
|
)
|
||
|
|
||
|
// GeneratedCodeExtensions contains all extensions that belong to generated
|
||
|
// files for sure.
|
||
|
var GeneratedCodeExtensions = map[string]struct{}{
|
||
|
// XCode files
|
||
|
".nib": {},
|
||
|
".xcworkspacedata": {},
|
||
|
".xcuserstate": {},
|
||
|
}
|
||
|
|
||
|
// GeneratedCodeNameMatcher is a function that tells whether the file with the
|
||
|
// given name is generated.
|
||
|
type GeneratedCodeNameMatcher func(string) bool
|
||
|
|
||
|
func nameMatches(pattern string) GeneratedCodeNameMatcher {
|
||
|
r := regex.MustCompile(pattern)
|
||
|
return func(name string) bool {
|
||
|
return r.MatchString(name)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func nameContains(pattern string) GeneratedCodeNameMatcher {
|
||
|
return func(name string) bool {
|
||
|
return strings.Contains(name, pattern)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func nameEndsWith(pattern string) GeneratedCodeNameMatcher {
|
||
|
return func(name string) bool {
|
||
|
return strings.HasSuffix(name, pattern)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// GeneratedCodeNameMatchers are all the matchers that check whether the code
|
||
|
// is generated based only on the file name.
|
||
|
var GeneratedCodeNameMatchers = []GeneratedCodeNameMatcher{
|
||
|
// Cocoa pods
|
||
|
nameMatches(`(^Pods|\/Pods)\/`),
|
||
|
|
||
|
// Carthage build
|
||
|
nameMatches(`(^|\/)Carthage\/Build\/`),
|
||
|
|
||
|
// NET designer file
|
||
|
nameMatches(`(?i)\.designer\.(cs|vb)$`),
|
||
|
|
||
|
// Generated NET specflow feature file
|
||
|
nameEndsWith(".feature.cs"),
|
||
|
|
||
|
// Node modules
|
||
|
nameContains("node_modules/"),
|
||
|
|
||
|
// Go vendor
|
||
|
nameMatches(`vendor\/([-0-9A-Za-z]+\.)+(com|edu|gov|in|me|net|org|fm|io)`),
|
||
|
|
||
|
// Go lock
|
||
|
nameEndsWith("Gopkg.lock"),
|
||
|
nameEndsWith("glide.lock"),
|
||
|
|
||
|
// Esy lock
|
||
|
nameMatches(`(^|\/)(\w+\.)?esy.lock$`),
|
||
|
|
||
|
// NPM shrinkwrap
|
||
|
nameEndsWith("npm-shrinkwrap.json"),
|
||
|
|
||
|
// NPM package lock
|
||
|
nameEndsWith("package-lock.json"),
|
||
|
|
||
|
// Yarn plugnplay
|
||
|
nameMatches(`(^|\/)\.pnp\.(c|m)?js$`),
|
||
|
|
||
|
// Godeps
|
||
|
nameContains("Godeps/"),
|
||
|
|
||
|
// Composer lock
|
||
|
nameEndsWith("composer.lock"),
|
||
|
|
||
|
// Generated by zephir
|
||
|
nameMatches(`.\.zep\.(?:c|h|php)$`),
|
||
|
|
||
|
// Cargo lock
|
||
|
nameEndsWith("Cargo.lock"),
|
||
|
|
||
|
// Pipenv lock
|
||
|
nameEndsWith("Pipfile.lock"),
|
||
|
|
||
|
// GraphQL relay
|
||
|
nameContains("__generated__/"),
|
||
|
}
|
||
|
|
||
|
// GeneratedCodeMatcher checks whether the file with the given data is
|
||
|
// generated code.
|
||
|
type GeneratedCodeMatcher func(path, ext string, content []byte) bool
|
||
|
|
||
|
// GeneratedCodeMatchers is the list of all generated code matchers that
|
||
|
// rely on checking the content of the file to make the guess.
|
||
|
var GeneratedCodeMatchers = []GeneratedCodeMatcher{
|
||
|
isMinifiedFile,
|
||
|
hasSourceMapReference,
|
||
|
isSourceMap,
|
||
|
isCompiledCoffeeScript,
|
||
|
isGeneratedNetDocfile,
|
||
|
isGeneratedJavaScriptPEGParser,
|
||
|
isGeneratedPostScript,
|
||
|
isGeneratedGo,
|
||
|
isGeneratedProtobuf,
|
||
|
isGeneratedJavaScriptProtocolBuffer,
|
||
|
isGeneratedApacheThrift,
|
||
|
isGeneratedJNIHeader,
|
||
|
isVCRCassette,
|
||
|
isCompiledCythonFile,
|
||
|
isGeneratedModule,
|
||
|
isGeneratedUnity3DMeta,
|
||
|
isGeneratedRacc,
|
||
|
isGeneratedJFlex,
|
||
|
isGeneratedGrammarKit,
|
||
|
isGeneratedRoxygen2,
|
||
|
isGeneratedJison,
|
||
|
isGeneratedGRPCCpp,
|
||
|
isGeneratedDart,
|
||
|
isGeneratedPerlPPPortHeader,
|
||
|
isGeneratedGameMakerStudio,
|
||
|
isGeneratedGimp,
|
||
|
isGeneratedVisualStudio6,
|
||
|
isGeneratedHaxe,
|
||
|
isGeneratedHTML,
|
||
|
isGeneratedJooq,
|
||
|
}
|
||
|
|
||
|
func canBeMinified(ext string) bool {
|
||
|
return ext == ".js" || ext == ".css"
|
||
|
}
|
||
|
|
||
|
// isMinifiedFile returns whether the file may be minified.
|
||
|
// We consider a minified file any css or js file whose average number of chars
|
||
|
// per line is more than 110.
|
||
|
func isMinifiedFile(path, ext string, content []byte) bool {
|
||
|
if !canBeMinified(ext) {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
var chars, lines uint64
|
||
|
forEachLine(content, func(line []byte) {
|
||
|
chars += uint64(len(line))
|
||
|
lines++
|
||
|
})
|
||
|
|
||
|
if lines == 0 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
return chars/lines > 110
|
||
|
}
|
||
|
|
||
|
var sourceMapRegex = regex.MustCompile(`^\/[*\/][\#@] source(?:Mapping)?URL|sourceURL=`)
|
||
|
|
||
|
// hasSourceMapReference returns whether the file contains a reference to a
|
||
|
// source-map file.
|
||
|
func hasSourceMapReference(_ string, ext string, content []byte) bool {
|
||
|
if !canBeMinified(ext) {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
for _, line := range getLines(content, -2) {
|
||
|
if sourceMapRegex.Match(line) {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
var sourceMapRegexps = []regex.EnryRegexp{
|
||
|
regex.MustCompile(`^{"version":\d+,`),
|
||
|
regex.MustCompile(`^\/\*\* Begin line maps\. \*\*\/{`),
|
||
|
}
|
||
|
|
||
|
// isSourceMap returns whether the file itself is a source map.
|
||
|
func isSourceMap(path, _ string, content []byte) bool {
|
||
|
if strings.HasSuffix(path, ".js.map") || strings.HasSuffix(path, ".css.map") {
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
firstLine := getLines(content, 1)[0]
|
||
|
for _, r := range sourceMapRegexps {
|
||
|
if r.Match(firstLine) {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
func isCompiledCoffeeScript(path, ext string, content []byte) bool {
|
||
|
if ext != ".js" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
firstLine := getLines(content, 1)[0]
|
||
|
lastLines := getLines(content, -2)
|
||
|
|
||
|
if string(firstLine) == "(function() {" &&
|
||
|
string(lastLines[1]) == "}).call(this);" &&
|
||
|
string(lastLines[0]) == "" {
|
||
|
score := 0
|
||
|
|
||
|
forEachLine(content, func(line []byte) {
|
||
|
if bytes.Contains(line, []byte("var ")) {
|
||
|
// Underscored temp vars are likely to be Coffee
|
||
|
score += 1 * countAppearancesInLine(line, "_fn", "_i", "_len", "_ref", "_results")
|
||
|
|
||
|
// bind and extend functions are very Coffee specific
|
||
|
score += 3 * countAppearancesInLine(line, "__bind", "__extends", "__hasProp", "__indexOf", "__slice")
|
||
|
}
|
||
|
})
|
||
|
|
||
|
// Require a score of 3. This is fairly abritrary. Consider tweaking later.
|
||
|
// See: https://github.com/github/linguist/blob/master/lib/linguist/generated.rb#L176-L213
|
||
|
return score >= 3
|
||
|
}
|
||
|
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
func isGeneratedNetDocfile(_, ext string, content []byte) bool {
|
||
|
if ext != ".xml" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := bytes.Split(content, []byte{'\n'})
|
||
|
if len(lines) <= 3 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
return bytes.Contains(lines[1], []byte("<doc>")) &&
|
||
|
bytes.Contains(lines[2], []byte("<assembly>")) &&
|
||
|
bytes.Contains(lines[len(lines)-2], []byte("</doc>"))
|
||
|
}
|
||
|
|
||
|
var pegJavaScriptGeneratedRegex = regex.MustCompile(`^(?:[^\/]|\/[^\*])*\/\*(?:[^\*]|\*[^\/])*Generated by PEG.js`)
|
||
|
|
||
|
func isGeneratedJavaScriptPEGParser(_, ext string, content []byte) bool {
|
||
|
if ext != ".js" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
// PEG.js-generated parsers include a comment near the top of the file
|
||
|
// that marks them as such.
|
||
|
return pegJavaScriptGeneratedRegex.Match(bytes.Join(getLines(content, 5), []byte("")))
|
||
|
}
|
||
|
|
||
|
var postScriptType1And42Regex = regex.MustCompile(`(\n|\r\n|\r)\s*(?:currentfile eexec\s+|\/sfnts\s+\[)`)
|
||
|
|
||
|
var postScriptRegexes = []regex.EnryRegexp{
|
||
|
regex.MustCompile(`[0-9]|draw|mpage|ImageMagick|inkscape|MATLAB`),
|
||
|
regex.MustCompile(`PCBNEW|pnmtops|\(Unknown\)|Serif Affinity|Filterimage -tops`),
|
||
|
}
|
||
|
|
||
|
func isGeneratedPostScript(_, ext string, content []byte) bool {
|
||
|
if ext != ".ps" && ext != ".eps" && ext != ".pfa" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
// Type 1 and Type 42 fonts converted to PostScript are stored as hex-encoded byte streams; these
|
||
|
// streams are always preceded the `eexec` operator (if Type 1), or the `/sfnts` key (if Type 42).
|
||
|
if postScriptType1And42Regex.Match(content) {
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
// We analyze the "%%Creator:" comment, which contains the author/generator
|
||
|
// of the file. If there is one, it should be in one of the first few lines.
|
||
|
var creator []byte
|
||
|
for _, line := range getLines(content, 10) {
|
||
|
if bytes.HasPrefix(line, []byte("%%Creator: ")) {
|
||
|
creator = line
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if len(creator) == 0 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
// EAGLE doesn't include a version number when it generates PostScript.
|
||
|
// However, it does prepend its name to the document's "%%Title" field.
|
||
|
if bytes.Contains(creator, []byte("EAGLE")) {
|
||
|
for _, line := range getLines(content, 5) {
|
||
|
if bytes.HasPrefix(line, []byte("%%Title: EAGLE Drawing ")) {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Most generators write their version number, while human authors' or companies'
|
||
|
// names don't contain numbers. So look if the line contains digits. Also
|
||
|
// look for some special cases without version numbers.
|
||
|
for _, r := range postScriptRegexes {
|
||
|
if r.Match(creator) {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
func isGeneratedGo(_, ext string, content []byte) bool {
|
||
|
if ext != ".go" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := getLines(content, 40)
|
||
|
if len(lines) <= 1 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
for _, line := range lines {
|
||
|
if bytes.Contains(line, []byte("Code generated by")) {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
var protoExtensions = map[string]struct{}{
|
||
|
".py": {},
|
||
|
".java": {},
|
||
|
".h": {},
|
||
|
".cc": {},
|
||
|
".cpp": {},
|
||
|
".m": {},
|
||
|
".rb": {},
|
||
|
".php": {},
|
||
|
}
|
||
|
|
||
|
func isGeneratedProtobuf(_, ext string, content []byte) bool {
|
||
|
if _, ok := protoExtensions[ext]; !ok {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := getLines(content, 3)
|
||
|
if len(lines) <= 1 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
for _, line := range lines {
|
||
|
if bytes.Contains(line, []byte("Generated by the protocol buffer compiler. DO NOT EDIT!")) {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
func isGeneratedJavaScriptProtocolBuffer(_, ext string, content []byte) bool {
|
||
|
if ext != ".js" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := getLines(content, 6)
|
||
|
if len(lines) < 6 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
return bytes.Contains(lines[5], []byte("GENERATED CODE -- DO NOT EDIT!"))
|
||
|
}
|
||
|
|
||
|
var apacheThriftExtensions = map[string]struct{}{
|
||
|
".rb": {},
|
||
|
".py": {},
|
||
|
".go": {},
|
||
|
".js": {},
|
||
|
".m": {},
|
||
|
".java": {},
|
||
|
".h": {},
|
||
|
".cc": {},
|
||
|
".cpp": {},
|
||
|
".php": {},
|
||
|
}
|
||
|
|
||
|
func isGeneratedApacheThrift(_, ext string, content []byte) bool {
|
||
|
if _, ok := apacheThriftExtensions[ext]; !ok {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
for _, line := range getLines(content, 6) {
|
||
|
if bytes.Contains(line, []byte("Autogenerated by Thrift Compiler")) {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
func isGeneratedJNIHeader(_, ext string, content []byte) bool {
|
||
|
if ext != ".h" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := getLines(content, 2)
|
||
|
if len(lines) < 2 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
return bytes.Contains(lines[0], []byte("/* DO NOT EDIT THIS FILE - it is machine generated */")) &&
|
||
|
bytes.Contains(lines[1], []byte("#include <jni.h>"))
|
||
|
}
|
||
|
|
||
|
func isVCRCassette(_, ext string, content []byte) bool {
|
||
|
if ext != ".yml" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := getLines(content, -2)
|
||
|
if len(lines) < 2 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
return bytes.Contains(lines[1], []byte("recorded_with: VCR"))
|
||
|
}
|
||
|
|
||
|
func isCompiledCythonFile(_, ext string, content []byte) bool {
|
||
|
if ext != ".c" && ext != ".cpp" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := getLines(content, 1)
|
||
|
if len(lines) < 1 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
return bytes.Contains(lines[0], []byte("Generated by Cython"))
|
||
|
}
|
||
|
|
||
|
func isGeneratedModule(_, ext string, content []byte) bool {
|
||
|
if ext != ".mod" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := getLines(content, 1)
|
||
|
if len(lines) < 1 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
return bytes.Contains(lines[0], []byte("PCBNEW-LibModule-V")) ||
|
||
|
bytes.Contains(lines[0], []byte("GFORTRAN module version '"))
|
||
|
}
|
||
|
|
||
|
func isGeneratedUnity3DMeta(_, ext string, content []byte) bool {
|
||
|
if ext != ".meta" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := getLines(content, 1)
|
||
|
if len(lines) < 1 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
return bytes.Contains(lines[0], []byte("fileFormatVersion: "))
|
||
|
}
|
||
|
|
||
|
func isGeneratedRacc(_, ext string, content []byte) bool {
|
||
|
if ext != ".rb" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := getLines(content, 3)
|
||
|
if len(lines) < 3 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
return bytes.HasPrefix(lines[2], []byte("# This file is automatically generated by Racc"))
|
||
|
}
|
||
|
|
||
|
func isGeneratedJFlex(_, ext string, content []byte) bool {
|
||
|
if ext != ".java" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := getLines(content, 1)
|
||
|
if len(lines) < 1 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
return bytes.HasPrefix(lines[0], []byte("/* The following code was generated by JFlex "))
|
||
|
}
|
||
|
|
||
|
func isGeneratedGrammarKit(_, ext string, content []byte) bool {
|
||
|
if ext != ".java" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := getLines(content, 1)
|
||
|
if len(lines) < 1 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
return bytes.Contains(lines[0], []byte("// This is a generated file. Not intended for manual editing."))
|
||
|
}
|
||
|
|
||
|
func isGeneratedRoxygen2(_, ext string, content []byte) bool {
|
||
|
if ext != ".rd" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := getLines(content, 1)
|
||
|
if len(lines) < 1 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
return bytes.Contains(lines[0], []byte("% Generated by roxygen2: do not edit by hand"))
|
||
|
}
|
||
|
|
||
|
func isGeneratedJison(_, ext string, content []byte) bool {
|
||
|
if ext != ".js" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := getLines(content, 1)
|
||
|
if len(lines) < 1 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
return bytes.Contains(lines[0], []byte("/* parser generated by jison ")) ||
|
||
|
bytes.Contains(lines[0], []byte("/* generated by jison-lex "))
|
||
|
}
|
||
|
|
||
|
func isGeneratedGRPCCpp(_, ext string, content []byte) bool {
|
||
|
switch ext {
|
||
|
case ".cpp", ".hpp", ".h", ".cc":
|
||
|
lines := getLines(content, 1)
|
||
|
if len(lines) < 1 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
return bytes.Contains(lines[0], []byte("// Generated by the gRPC"))
|
||
|
default:
|
||
|
return false
|
||
|
}
|
||
|
}
|
||
|
|
||
|
var dartRegex = regex.MustCompile(`generated code\W{2,3}do not modify`)
|
||
|
|
||
|
func isGeneratedDart(_, ext string, content []byte) bool {
|
||
|
if ext != ".dart" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := getLines(content, 1)
|
||
|
if len(lines) < 1 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
return dartRegex.Match(bytes.ToLower(lines[0]))
|
||
|
}
|
||
|
|
||
|
func isGeneratedPerlPPPortHeader(name, _ string, content []byte) bool {
|
||
|
if !strings.HasSuffix(name, "ppport.h") {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := getLines(content, 10)
|
||
|
if len(lines) < 10 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
return bytes.Contains(lines[8], []byte("Automatically created by Devel::PPPort"))
|
||
|
}
|
||
|
|
||
|
var (
|
||
|
gameMakerStudioFirstLineRegex = regex.MustCompile(`^\d\.\d\.\d.+\|\{`)
|
||
|
gameMakerStudioThirdLineRegex = regex.MustCompile(`\"modelName\"\:\s*\"GM`)
|
||
|
)
|
||
|
|
||
|
func isGeneratedGameMakerStudio(_, ext string, content []byte) bool {
|
||
|
if ext != ".yy" && ext != ".yyp" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := getLines(content, 3)
|
||
|
if len(lines) < 3 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
return gameMakerStudioThirdLineRegex.Match(lines[2]) ||
|
||
|
gameMakerStudioFirstLineRegex.Match(lines[0])
|
||
|
}
|
||
|
|
||
|
var gimpRegexes = []regex.EnryRegexp{
|
||
|
regex.MustCompile(`\/\* GIMP [a-zA-Z0-9\- ]+ C\-Source image dump \(.+?\.c\) \*\/`),
|
||
|
regex.MustCompile(`\/\* GIMP header image file format \([a-zA-Z0-9\- ]+\)\: .+?\.h \*\/`),
|
||
|
}
|
||
|
|
||
|
func isGeneratedGimp(_, ext string, content []byte) bool {
|
||
|
if ext != ".c" && ext != ".h" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := getLines(content, 1)
|
||
|
if len(lines) < 1 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
for _, r := range gimpRegexes {
|
||
|
if r.Match(lines[0]) {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
func isGeneratedVisualStudio6(_, ext string, content []byte) bool {
|
||
|
if ext != ".dsp" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
for _, l := range getLines(content, 3) {
|
||
|
if bytes.Contains(l, []byte("# Microsoft Developer Studio Generated Build File")) {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
var haxeExtensions = map[string]struct{}{
|
||
|
".js": {},
|
||
|
".py": {},
|
||
|
".lua": {},
|
||
|
".cpp": {},
|
||
|
".h": {},
|
||
|
".java": {},
|
||
|
".cs": {},
|
||
|
".php": {},
|
||
|
}
|
||
|
|
||
|
func isGeneratedHaxe(_, ext string, content []byte) bool {
|
||
|
if _, ok := haxeExtensions[ext]; !ok {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
for _, l := range getLines(content, 3) {
|
||
|
if bytes.Contains(l, []byte("Generated by Haxe")) {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
var (
|
||
|
doxygenRegex = regex.MustCompile(`<!--\s+Generated by Doxygen\s+[.0-9]+\s*-->`)
|
||
|
htmlMetaRegex = regex.MustCompile(`<meta(\s+[^>]+)>`)
|
||
|
htmlMetaContentRegex = regex.MustCompile(`\s+(name|content|value)\s*=\s*("[^"]+"|'[^']+'|[^\s"']+)`)
|
||
|
orgModeMetaRegex = regex.MustCompile(`org\s+mode`)
|
||
|
)
|
||
|
|
||
|
func isGeneratedHTML(_, ext string, content []byte) bool {
|
||
|
if ext != ".html" && ext != ".htm" && ext != ".xhtml" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
lines := getLines(content, 30)
|
||
|
|
||
|
// Pkgdown
|
||
|
for _, l := range lines[:2] {
|
||
|
if bytes.Contains(l, []byte("<!-- Generated by pkgdown: do not edit by hand -->")) {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Mandoc
|
||
|
if len(lines) > 2 &&
|
||
|
bytes.HasPrefix(lines[2], []byte("<!-- This is an automatically generated file.")) {
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
// Doxygen
|
||
|
for _, l := range lines {
|
||
|
if doxygenRegex.Match(l) {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// HTML tag: <meta name="generator" content="" />
|
||
|
part := bytes.ToLower(bytes.Join(lines, []byte{' '}))
|
||
|
part = bytes.ReplaceAll(part, []byte{'\n'}, []byte{})
|
||
|
part = bytes.ReplaceAll(part, []byte{'\r'}, []byte{})
|
||
|
matches := htmlMetaRegex.FindAll(part, -1)
|
||
|
if len(matches) == 0 {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
for _, m := range matches {
|
||
|
var name, value, content string
|
||
|
ms := htmlMetaContentRegex.FindAllStringSubmatch(string(m), -1)
|
||
|
for _, m := range ms {
|
||
|
switch m[1] {
|
||
|
case "name":
|
||
|
name = m[2]
|
||
|
case "value":
|
||
|
value = m[2]
|
||
|
case "content":
|
||
|
content = m[2]
|
||
|
}
|
||
|
}
|
||
|
|
||
|
var val = value
|
||
|
if val == "" {
|
||
|
val = content
|
||
|
}
|
||
|
|
||
|
name = strings.Trim(name, `"'`)
|
||
|
val = strings.Trim(val, `"'`)
|
||
|
|
||
|
if name != "generator" || val == "" {
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
if strings.Contains(val, "jlatex2html") ||
|
||
|
strings.Contains(val, "latex2html") ||
|
||
|
strings.Contains(val, "groff") ||
|
||
|
strings.Contains(val, "makeinfo") ||
|
||
|
strings.Contains(val, "texi2html") ||
|
||
|
strings.Contains(val, "ronn") ||
|
||
|
orgModeMetaRegex.MatchString(val) {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
func isGeneratedJooq(_, ext string, content []byte) bool {
|
||
|
if ext != ".java" {
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
for _, l := range getLines(content, 2) {
|
||
|
if bytes.Contains(l, []byte("This file is generated by jOOQ.")) {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
// getLines returns up to the first n lines. A negative index will return up to
|
||
|
// the last n lines in reverse order.
|
||
|
func getLines(content []byte, n int) [][]byte {
|
||
|
var result [][]byte
|
||
|
if n < 0 {
|
||
|
for pos := len(content); pos > 0 && len(result) < -n; {
|
||
|
nlpos := bytes.LastIndexByte(content[:pos], '\n')
|
||
|
if nlpos+1 < len(content)-1 {
|
||
|
result = append(result, content[nlpos+1:pos])
|
||
|
}
|
||
|
pos = nlpos
|
||
|
}
|
||
|
} else {
|
||
|
for pos := 0; pos < len(content) && len(result) < n; {
|
||
|
nlpos := bytes.IndexByte(content[pos:], '\n')
|
||
|
if nlpos < 0 && pos < len(content) {
|
||
|
nlpos = len(content)
|
||
|
} else if nlpos >= 0 {
|
||
|
nlpos += pos
|
||
|
}
|
||
|
|
||
|
result = append(result, content[pos:nlpos])
|
||
|
pos = nlpos + 1
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return result
|
||
|
}
|
||
|
|
||
|
func forEachLine(content []byte, cb func([]byte)) {
|
||
|
var pos int
|
||
|
for pos < len(content) {
|
||
|
nlpos := bytes.IndexByte(content[pos:], '\n')
|
||
|
if nlpos < 0 && pos < len(content) {
|
||
|
nlpos = len(content)
|
||
|
} else if nlpos >= 0 {
|
||
|
nlpos += pos
|
||
|
}
|
||
|
|
||
|
cb(content[pos:nlpos])
|
||
|
pos = nlpos + 1
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func countAppearancesInLine(line []byte, targets ...string) int {
|
||
|
var count int
|
||
|
for _, t := range targets {
|
||
|
count += bytes.Count(line, []byte(t))
|
||
|
}
|
||
|
return count
|
||
|
}
|