mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-12 22:42:23 +00:00
8ff885a3a8
Closes #17 Implements the IsGenerated helper function to filter out generated files using the rules and matchers in: - https://github.com/github/linguist/blob/master/lib/linguist/generated.rb Since the vast majority of matchers have very different logic, it cannot be autogenerated directly from linguist like other logics in enry, so it's translated by hand. There are three different types of matchers in this implementation: - By extension, which mark as generated based only in the extension. These are the fastest matchers, so they're done first. - By file name, which matches patterns against the filename. These are performed in second place. Unlike linguist, we try to use string functions instead of regexps as much as possible. - Finally, the rest of the matchers, which go into the content and try to identify if they're generated or not based on the content. Unlike linguist, we try to only read the content we need and not split it all unless it's necessary and use byte functions instead of regexps as much as possible. Signed-off-by: Miguel Molina <miguel@erizocosmi.co>
807 lines
18 KiB
Go
807 lines
18 KiB
Go
package data
|
|
|
|
import (
|
|
"bytes"
|
|
"strings"
|
|
|
|
"github.com/go-enry/go-enry/v2/regex"
|
|
)
|
|
|
|
// GeneratedCodeExtensions contains all extensions that belong to generated
|
|
// files for sure.
|
|
var GeneratedCodeExtensions = map[string]struct{}{
|
|
// XCode files
|
|
".nib": {},
|
|
".xcworkspacedata": {},
|
|
".xcuserstate": {},
|
|
}
|
|
|
|
// GeneratedCodeNameMatcher is a function that tells whether the file with the
|
|
// given name is generated.
|
|
type GeneratedCodeNameMatcher func(string) bool
|
|
|
|
func nameMatches(pattern string) GeneratedCodeNameMatcher {
|
|
r := regex.MustCompile(pattern)
|
|
return func(name string) bool {
|
|
return r.MatchString(name)
|
|
}
|
|
}
|
|
|
|
func nameContains(pattern string) GeneratedCodeNameMatcher {
|
|
return func(name string) bool {
|
|
return strings.Contains(name, pattern)
|
|
}
|
|
}
|
|
|
|
func nameEndsWith(pattern string) GeneratedCodeNameMatcher {
|
|
return func(name string) bool {
|
|
return strings.HasSuffix(name, pattern)
|
|
}
|
|
}
|
|
|
|
// GeneratedCodeNameMatchers are all the matchers that check whether the code
|
|
// is generated based only on the file name.
|
|
var GeneratedCodeNameMatchers = []GeneratedCodeNameMatcher{
|
|
// Cocoa pods
|
|
nameMatches(`(^Pods|\/Pods)\/`),
|
|
|
|
// Carthage build
|
|
nameMatches(`(^|\/)Carthage\/Build\/`),
|
|
|
|
// NET designer file
|
|
nameMatches(`(?i)\.designer\.(cs|vb)$`),
|
|
|
|
// Generated NET specflow feature file
|
|
nameEndsWith(".feature.cs"),
|
|
|
|
// Node modules
|
|
nameContains("node_modules/"),
|
|
|
|
// Go vendor
|
|
nameMatches(`vendor\/([-0-9A-Za-z]+\.)+(com|edu|gov|in|me|net|org|fm|io)`),
|
|
|
|
// Go lock
|
|
nameEndsWith("Gopkg.lock"),
|
|
nameEndsWith("glide.lock"),
|
|
|
|
// Esy lock
|
|
nameMatches(`(^|\/)(\w+\.)?esy.lock$`),
|
|
|
|
// NPM shrinkwrap
|
|
nameEndsWith("npm-shrinkwrap.json"),
|
|
|
|
// NPM package lock
|
|
nameEndsWith("package-lock.json"),
|
|
|
|
// Yarn plugnplay
|
|
nameMatches(`(^|\/)\.pnp\.(c|m)?js$`),
|
|
|
|
// Godeps
|
|
nameContains("Godeps/"),
|
|
|
|
// Composer lock
|
|
nameEndsWith("composer.lock"),
|
|
|
|
// Generated by zephir
|
|
nameMatches(`.\.zep\.(?:c|h|php)$`),
|
|
|
|
// Cargo lock
|
|
nameEndsWith("Cargo.lock"),
|
|
|
|
// Pipenv lock
|
|
nameEndsWith("Pipfile.lock"),
|
|
|
|
// GraphQL relay
|
|
nameContains("__generated__/"),
|
|
}
|
|
|
|
// GeneratedCodeMatcher checks whether the file with the given data is
|
|
// generated code.
|
|
type GeneratedCodeMatcher func(path, ext string, content []byte) bool
|
|
|
|
// GeneratedCodeMatchers is the list of all generated code matchers that
|
|
// rely on checking the content of the file to make the guess.
|
|
var GeneratedCodeMatchers = []GeneratedCodeMatcher{
|
|
isMinifiedFile,
|
|
hasSourceMapReference,
|
|
isSourceMap,
|
|
isCompiledCoffeeScript,
|
|
isGeneratedNetDocfile,
|
|
isGeneratedJavaScriptPEGParser,
|
|
isGeneratedPostScript,
|
|
isGeneratedGo,
|
|
isGeneratedProtobuf,
|
|
isGeneratedJavaScriptProtocolBuffer,
|
|
isGeneratedApacheThrift,
|
|
isGeneratedJNIHeader,
|
|
isVCRCassette,
|
|
isCompiledCythonFile,
|
|
isGeneratedModule,
|
|
isGeneratedUnity3DMeta,
|
|
isGeneratedRacc,
|
|
isGeneratedJFlex,
|
|
isGeneratedGrammarKit,
|
|
isGeneratedRoxygen2,
|
|
isGeneratedJison,
|
|
isGeneratedGRPCCpp,
|
|
isGeneratedDart,
|
|
isGeneratedPerlPPPortHeader,
|
|
isGeneratedGameMakerStudio,
|
|
isGeneratedGimp,
|
|
isGeneratedVisualStudio6,
|
|
isGeneratedHaxe,
|
|
isGeneratedHTML,
|
|
isGeneratedJooq,
|
|
}
|
|
|
|
func canBeMinified(ext string) bool {
|
|
return ext == ".js" || ext == ".css"
|
|
}
|
|
|
|
// isMinifiedFile returns whether the file may be minified.
|
|
// We consider a minified file any css or js file whose average number of chars
|
|
// per line is more than 110.
|
|
func isMinifiedFile(path, ext string, content []byte) bool {
|
|
if !canBeMinified(ext) {
|
|
return false
|
|
}
|
|
|
|
var chars, lines uint64
|
|
forEachLine(content, func(line []byte) {
|
|
chars += uint64(len(line))
|
|
lines++
|
|
})
|
|
|
|
if lines == 0 {
|
|
return false
|
|
}
|
|
|
|
return chars/lines > 110
|
|
}
|
|
|
|
var sourceMapRegex = regex.MustCompile(`^\/[*\/][\#@] source(?:Mapping)?URL|sourceURL=`)
|
|
|
|
// hasSourceMapReference returns whether the file contains a reference to a
|
|
// source-map file.
|
|
func hasSourceMapReference(_ string, ext string, content []byte) bool {
|
|
if !canBeMinified(ext) {
|
|
return false
|
|
}
|
|
|
|
for _, line := range getLines(content, -2) {
|
|
if sourceMapRegex.Match(line) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
var sourceMapRegexps = []regex.EnryRegexp{
|
|
regex.MustCompile(`^{"version":\d+,`),
|
|
regex.MustCompile(`^\/\*\* Begin line maps\. \*\*\/{`),
|
|
}
|
|
|
|
// isSourceMap returns whether the file itself is a source map.
|
|
func isSourceMap(path, _ string, content []byte) bool {
|
|
if strings.HasSuffix(path, ".js.map") || strings.HasSuffix(path, ".css.map") {
|
|
return true
|
|
}
|
|
|
|
firstLine := getLines(content, 1)[0]
|
|
for _, r := range sourceMapRegexps {
|
|
if r.Match(firstLine) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func isCompiledCoffeeScript(path, ext string, content []byte) bool {
|
|
if ext != ".js" {
|
|
return false
|
|
}
|
|
|
|
firstLine := getLines(content, 1)[0]
|
|
lastLines := getLines(content, -2)
|
|
|
|
if string(firstLine) == "(function() {" &&
|
|
string(lastLines[1]) == "}).call(this);" &&
|
|
string(lastLines[0]) == "" {
|
|
score := 0
|
|
|
|
forEachLine(content, func(line []byte) {
|
|
if bytes.Contains(line, []byte("var ")) {
|
|
// Underscored temp vars are likely to be Coffee
|
|
score += 1 * countAppearancesInLine(line, "_fn", "_i", "_len", "_ref", "_results")
|
|
|
|
// bind and extend functions are very Coffee specific
|
|
score += 3 * countAppearancesInLine(line, "__bind", "__extends", "__hasProp", "__indexOf", "__slice")
|
|
}
|
|
})
|
|
|
|
// Require a score of 3. This is fairly abritrary. Consider tweaking later.
|
|
// See: https://github.com/github/linguist/blob/master/lib/linguist/generated.rb#L176-L213
|
|
return score >= 3
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func isGeneratedNetDocfile(_, ext string, content []byte) bool {
|
|
if ext != ".xml" {
|
|
return false
|
|
}
|
|
|
|
lines := bytes.Split(content, []byte{'\n'})
|
|
if len(lines) <= 3 {
|
|
return false
|
|
}
|
|
|
|
return bytes.Contains(lines[1], []byte("<doc>")) &&
|
|
bytes.Contains(lines[2], []byte("<assembly>")) &&
|
|
bytes.Contains(lines[len(lines)-2], []byte("</doc>"))
|
|
}
|
|
|
|
var pegJavaScriptGeneratedRegex = regex.MustCompile(`^(?:[^\/]|\/[^\*])*\/\*(?:[^\*]|\*[^\/])*Generated by PEG.js`)
|
|
|
|
func isGeneratedJavaScriptPEGParser(_, ext string, content []byte) bool {
|
|
if ext != ".js" {
|
|
return false
|
|
}
|
|
|
|
// PEG.js-generated parsers include a comment near the top of the file
|
|
// that marks them as such.
|
|
return pegJavaScriptGeneratedRegex.Match(bytes.Join(getLines(content, 5), []byte("")))
|
|
}
|
|
|
|
var postScriptType1And42Regex = regex.MustCompile(`(\n|\r\n|\r)\s*(?:currentfile eexec\s+|\/sfnts\s+\[)`)
|
|
|
|
var postScriptRegexes = []regex.EnryRegexp{
|
|
regex.MustCompile(`[0-9]|draw|mpage|ImageMagick|inkscape|MATLAB`),
|
|
regex.MustCompile(`PCBNEW|pnmtops|\(Unknown\)|Serif Affinity|Filterimage -tops`),
|
|
}
|
|
|
|
func isGeneratedPostScript(_, ext string, content []byte) bool {
|
|
if ext != ".ps" && ext != ".eps" && ext != ".pfa" {
|
|
return false
|
|
}
|
|
|
|
// Type 1 and Type 42 fonts converted to PostScript are stored as hex-encoded byte streams; these
|
|
// streams are always preceded the `eexec` operator (if Type 1), or the `/sfnts` key (if Type 42).
|
|
if postScriptType1And42Regex.Match(content) {
|
|
return true
|
|
}
|
|
|
|
// We analyze the "%%Creator:" comment, which contains the author/generator
|
|
// of the file. If there is one, it should be in one of the first few lines.
|
|
var creator []byte
|
|
for _, line := range getLines(content, 10) {
|
|
if bytes.HasPrefix(line, []byte("%%Creator: ")) {
|
|
creator = line
|
|
break
|
|
}
|
|
}
|
|
|
|
if len(creator) == 0 {
|
|
return false
|
|
}
|
|
|
|
// EAGLE doesn't include a version number when it generates PostScript.
|
|
// However, it does prepend its name to the document's "%%Title" field.
|
|
if bytes.Contains(creator, []byte("EAGLE")) {
|
|
for _, line := range getLines(content, 5) {
|
|
if bytes.HasPrefix(line, []byte("%%Title: EAGLE Drawing ")) {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
|
|
// Most generators write their version number, while human authors' or companies'
|
|
// names don't contain numbers. So look if the line contains digits. Also
|
|
// look for some special cases without version numbers.
|
|
for _, r := range postScriptRegexes {
|
|
if r.Match(creator) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func isGeneratedGo(_, ext string, content []byte) bool {
|
|
if ext != ".go" {
|
|
return false
|
|
}
|
|
|
|
lines := getLines(content, 40)
|
|
if len(lines) <= 1 {
|
|
return false
|
|
}
|
|
|
|
for _, line := range lines {
|
|
if bytes.Contains(line, []byte("Code generated by")) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
var protoExtensions = map[string]struct{}{
|
|
".py": {},
|
|
".java": {},
|
|
".h": {},
|
|
".cc": {},
|
|
".cpp": {},
|
|
".m": {},
|
|
".rb": {},
|
|
".php": {},
|
|
}
|
|
|
|
func isGeneratedProtobuf(_, ext string, content []byte) bool {
|
|
if _, ok := protoExtensions[ext]; !ok {
|
|
return false
|
|
}
|
|
|
|
lines := getLines(content, 3)
|
|
if len(lines) <= 1 {
|
|
return false
|
|
}
|
|
|
|
for _, line := range lines {
|
|
if bytes.Contains(line, []byte("Generated by the protocol buffer compiler. DO NOT EDIT!")) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func isGeneratedJavaScriptProtocolBuffer(_, ext string, content []byte) bool {
|
|
if ext != ".js" {
|
|
return false
|
|
}
|
|
|
|
lines := getLines(content, 6)
|
|
if len(lines) < 6 {
|
|
return false
|
|
}
|
|
|
|
return bytes.Contains(lines[5], []byte("GENERATED CODE -- DO NOT EDIT!"))
|
|
}
|
|
|
|
var apacheThriftExtensions = map[string]struct{}{
|
|
".rb": {},
|
|
".py": {},
|
|
".go": {},
|
|
".js": {},
|
|
".m": {},
|
|
".java": {},
|
|
".h": {},
|
|
".cc": {},
|
|
".cpp": {},
|
|
".php": {},
|
|
}
|
|
|
|
func isGeneratedApacheThrift(_, ext string, content []byte) bool {
|
|
if _, ok := apacheThriftExtensions[ext]; !ok {
|
|
return false
|
|
}
|
|
|
|
for _, line := range getLines(content, 6) {
|
|
if bytes.Contains(line, []byte("Autogenerated by Thrift Compiler")) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func isGeneratedJNIHeader(_, ext string, content []byte) bool {
|
|
if ext != ".h" {
|
|
return false
|
|
}
|
|
|
|
lines := getLines(content, 2)
|
|
if len(lines) < 2 {
|
|
return false
|
|
}
|
|
|
|
return bytes.Contains(lines[0], []byte("/* DO NOT EDIT THIS FILE - it is machine generated */")) &&
|
|
bytes.Contains(lines[1], []byte("#include <jni.h>"))
|
|
}
|
|
|
|
func isVCRCassette(_, ext string, content []byte) bool {
|
|
if ext != ".yml" {
|
|
return false
|
|
}
|
|
|
|
lines := getLines(content, -2)
|
|
if len(lines) < 2 {
|
|
return false
|
|
}
|
|
|
|
return bytes.Contains(lines[1], []byte("recorded_with: VCR"))
|
|
}
|
|
|
|
func isCompiledCythonFile(_, ext string, content []byte) bool {
|
|
if ext != ".c" && ext != ".cpp" {
|
|
return false
|
|
}
|
|
|
|
lines := getLines(content, 1)
|
|
if len(lines) < 1 {
|
|
return false
|
|
}
|
|
|
|
return bytes.Contains(lines[0], []byte("Generated by Cython"))
|
|
}
|
|
|
|
func isGeneratedModule(_, ext string, content []byte) bool {
|
|
if ext != ".mod" {
|
|
return false
|
|
}
|
|
|
|
lines := getLines(content, 1)
|
|
if len(lines) < 1 {
|
|
return false
|
|
}
|
|
|
|
return bytes.Contains(lines[0], []byte("PCBNEW-LibModule-V")) ||
|
|
bytes.Contains(lines[0], []byte("GFORTRAN module version '"))
|
|
}
|
|
|
|
func isGeneratedUnity3DMeta(_, ext string, content []byte) bool {
|
|
if ext != ".meta" {
|
|
return false
|
|
}
|
|
|
|
lines := getLines(content, 1)
|
|
if len(lines) < 1 {
|
|
return false
|
|
}
|
|
|
|
return bytes.Contains(lines[0], []byte("fileFormatVersion: "))
|
|
}
|
|
|
|
func isGeneratedRacc(_, ext string, content []byte) bool {
|
|
if ext != ".rb" {
|
|
return false
|
|
}
|
|
|
|
lines := getLines(content, 3)
|
|
if len(lines) < 3 {
|
|
return false
|
|
}
|
|
|
|
return bytes.HasPrefix(lines[2], []byte("# This file is automatically generated by Racc"))
|
|
}
|
|
|
|
func isGeneratedJFlex(_, ext string, content []byte) bool {
|
|
if ext != ".java" {
|
|
return false
|
|
}
|
|
|
|
lines := getLines(content, 1)
|
|
if len(lines) < 1 {
|
|
return false
|
|
}
|
|
|
|
return bytes.HasPrefix(lines[0], []byte("/* The following code was generated by JFlex "))
|
|
}
|
|
|
|
func isGeneratedGrammarKit(_, ext string, content []byte) bool {
|
|
if ext != ".java" {
|
|
return false
|
|
}
|
|
|
|
lines := getLines(content, 1)
|
|
if len(lines) < 1 {
|
|
return false
|
|
}
|
|
|
|
return bytes.Contains(lines[0], []byte("// This is a generated file. Not intended for manual editing."))
|
|
}
|
|
|
|
func isGeneratedRoxygen2(_, ext string, content []byte) bool {
|
|
if ext != ".rd" {
|
|
return false
|
|
}
|
|
|
|
lines := getLines(content, 1)
|
|
if len(lines) < 1 {
|
|
return false
|
|
}
|
|
|
|
return bytes.Contains(lines[0], []byte("% Generated by roxygen2: do not edit by hand"))
|
|
}
|
|
|
|
func isGeneratedJison(_, ext string, content []byte) bool {
|
|
if ext != ".js" {
|
|
return false
|
|
}
|
|
|
|
lines := getLines(content, 1)
|
|
if len(lines) < 1 {
|
|
return false
|
|
}
|
|
|
|
return bytes.Contains(lines[0], []byte("/* parser generated by jison ")) ||
|
|
bytes.Contains(lines[0], []byte("/* generated by jison-lex "))
|
|
}
|
|
|
|
func isGeneratedGRPCCpp(_, ext string, content []byte) bool {
|
|
switch ext {
|
|
case ".cpp", ".hpp", ".h", ".cc":
|
|
lines := getLines(content, 1)
|
|
if len(lines) < 1 {
|
|
return false
|
|
}
|
|
|
|
return bytes.Contains(lines[0], []byte("// Generated by the gRPC"))
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
var dartRegex = regex.MustCompile(`generated code\W{2,3}do not modify`)
|
|
|
|
func isGeneratedDart(_, ext string, content []byte) bool {
|
|
if ext != ".dart" {
|
|
return false
|
|
}
|
|
|
|
lines := getLines(content, 1)
|
|
if len(lines) < 1 {
|
|
return false
|
|
}
|
|
|
|
return dartRegex.Match(bytes.ToLower(lines[0]))
|
|
}
|
|
|
|
func isGeneratedPerlPPPortHeader(name, _ string, content []byte) bool {
|
|
if !strings.HasSuffix(name, "ppport.h") {
|
|
return false
|
|
}
|
|
|
|
lines := getLines(content, 10)
|
|
if len(lines) < 10 {
|
|
return false
|
|
}
|
|
|
|
return bytes.Contains(lines[8], []byte("Automatically created by Devel::PPPort"))
|
|
}
|
|
|
|
var (
|
|
gameMakerStudioFirstLineRegex = regex.MustCompile(`^\d\.\d\.\d.+\|\{`)
|
|
gameMakerStudioThirdLineRegex = regex.MustCompile(`\"modelName\"\:\s*\"GM`)
|
|
)
|
|
|
|
func isGeneratedGameMakerStudio(_, ext string, content []byte) bool {
|
|
if ext != ".yy" && ext != ".yyp" {
|
|
return false
|
|
}
|
|
|
|
lines := getLines(content, 3)
|
|
if len(lines) < 3 {
|
|
return false
|
|
}
|
|
|
|
return gameMakerStudioThirdLineRegex.Match(lines[2]) ||
|
|
gameMakerStudioFirstLineRegex.Match(lines[0])
|
|
}
|
|
|
|
var gimpRegexes = []regex.EnryRegexp{
|
|
regex.MustCompile(`\/\* GIMP [a-zA-Z0-9\- ]+ C\-Source image dump \(.+?\.c\) \*\/`),
|
|
regex.MustCompile(`\/\* GIMP header image file format \([a-zA-Z0-9\- ]+\)\: .+?\.h \*\/`),
|
|
}
|
|
|
|
func isGeneratedGimp(_, ext string, content []byte) bool {
|
|
if ext != ".c" && ext != ".h" {
|
|
return false
|
|
}
|
|
|
|
lines := getLines(content, 1)
|
|
if len(lines) < 1 {
|
|
return false
|
|
}
|
|
|
|
for _, r := range gimpRegexes {
|
|
if r.Match(lines[0]) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func isGeneratedVisualStudio6(_, ext string, content []byte) bool {
|
|
if ext != ".dsp" {
|
|
return false
|
|
}
|
|
|
|
for _, l := range getLines(content, 3) {
|
|
if bytes.Contains(l, []byte("# Microsoft Developer Studio Generated Build File")) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
var haxeExtensions = map[string]struct{}{
|
|
".js": {},
|
|
".py": {},
|
|
".lua": {},
|
|
".cpp": {},
|
|
".h": {},
|
|
".java": {},
|
|
".cs": {},
|
|
".php": {},
|
|
}
|
|
|
|
func isGeneratedHaxe(_, ext string, content []byte) bool {
|
|
if _, ok := haxeExtensions[ext]; !ok {
|
|
return false
|
|
}
|
|
|
|
for _, l := range getLines(content, 3) {
|
|
if bytes.Contains(l, []byte("Generated by Haxe")) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
var (
|
|
doxygenRegex = regex.MustCompile(`<!--\s+Generated by Doxygen\s+[.0-9]+\s*-->`)
|
|
htmlMetaRegex = regex.MustCompile(`<meta(\s+[^>]+)>`)
|
|
htmlMetaContentRegex = regex.MustCompile(`\s+(name|content|value)\s*=\s*("[^"]+"|'[^']+'|[^\s"']+)`)
|
|
orgModeMetaRegex = regex.MustCompile(`org\s+mode`)
|
|
)
|
|
|
|
func isGeneratedHTML(_, ext string, content []byte) bool {
|
|
if ext != ".html" && ext != ".htm" && ext != ".xhtml" {
|
|
return false
|
|
}
|
|
|
|
lines := getLines(content, 30)
|
|
|
|
// Pkgdown
|
|
for _, l := range lines[:2] {
|
|
if bytes.Contains(l, []byte("<!-- Generated by pkgdown: do not edit by hand -->")) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
// Mandoc
|
|
if len(lines) > 2 &&
|
|
bytes.HasPrefix(lines[2], []byte("<!-- This is an automatically generated file.")) {
|
|
return true
|
|
}
|
|
|
|
// Doxygen
|
|
for _, l := range lines {
|
|
if doxygenRegex.Match(l) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
// HTML tag: <meta name="generator" content="" />
|
|
part := bytes.ToLower(bytes.Join(lines, []byte{' '}))
|
|
part = bytes.ReplaceAll(part, []byte{'\n'}, []byte{})
|
|
part = bytes.ReplaceAll(part, []byte{'\r'}, []byte{})
|
|
matches := htmlMetaRegex.FindAll(part, -1)
|
|
if len(matches) == 0 {
|
|
return false
|
|
}
|
|
|
|
for _, m := range matches {
|
|
var name, value, content string
|
|
ms := htmlMetaContentRegex.FindAllStringSubmatch(string(m), -1)
|
|
for _, m := range ms {
|
|
switch m[1] {
|
|
case "name":
|
|
name = m[2]
|
|
case "value":
|
|
value = m[2]
|
|
case "content":
|
|
content = m[2]
|
|
}
|
|
}
|
|
|
|
var val = value
|
|
if val == "" {
|
|
val = content
|
|
}
|
|
|
|
name = strings.Trim(name, `"'`)
|
|
val = strings.Trim(val, `"'`)
|
|
|
|
if name != "generator" || val == "" {
|
|
continue
|
|
}
|
|
|
|
if strings.Contains(val, "jlatex2html") ||
|
|
strings.Contains(val, "latex2html") ||
|
|
strings.Contains(val, "groff") ||
|
|
strings.Contains(val, "makeinfo") ||
|
|
strings.Contains(val, "texi2html") ||
|
|
strings.Contains(val, "ronn") ||
|
|
orgModeMetaRegex.MatchString(val) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func isGeneratedJooq(_, ext string, content []byte) bool {
|
|
if ext != ".java" {
|
|
return false
|
|
}
|
|
|
|
for _, l := range getLines(content, 2) {
|
|
if bytes.Contains(l, []byte("This file is generated by jOOQ.")) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// getLines returns up to the first n lines. A negative index will return up to
|
|
// the last n lines in reverse order.
|
|
func getLines(content []byte, n int) [][]byte {
|
|
var result [][]byte
|
|
if n < 0 {
|
|
for pos := len(content); pos > 0 && len(result) < -n; {
|
|
nlpos := bytes.LastIndexByte(content[:pos], '\n')
|
|
if nlpos+1 < len(content)-1 {
|
|
result = append(result, content[nlpos+1:pos])
|
|
}
|
|
pos = nlpos
|
|
}
|
|
} else {
|
|
for pos := 0; pos < len(content) && len(result) < n; {
|
|
nlpos := bytes.IndexByte(content[pos:], '\n')
|
|
if nlpos < 0 && pos < len(content) {
|
|
nlpos = len(content)
|
|
} else if nlpos >= 0 {
|
|
nlpos += pos
|
|
}
|
|
|
|
result = append(result, content[pos:nlpos])
|
|
pos = nlpos + 1
|
|
}
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
func forEachLine(content []byte, cb func([]byte)) {
|
|
var pos int
|
|
for pos < len(content) {
|
|
nlpos := bytes.IndexByte(content[pos:], '\n')
|
|
if nlpos < 0 && pos < len(content) {
|
|
nlpos = len(content)
|
|
} else if nlpos >= 0 {
|
|
nlpos += pos
|
|
}
|
|
|
|
cb(content[pos:nlpos])
|
|
pos = nlpos + 1
|
|
}
|
|
}
|
|
|
|
func countAppearancesInLine(line []byte, targets ...string) int {
|
|
var count int
|
|
for _, t := range targets {
|
|
count += bytes.Count(line, []byte(t))
|
|
}
|
|
return count
|
|
}
|