mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-06-18 22:23:07 -03:00
implement IsGenerated helper to filter out generated files
Closes #17 Implements the IsGenerated helper function to filter out generated files using the rules and matchers in: - https://github.com/github/linguist/blob/master/lib/linguist/generated.rb Since the vast majority of matchers have very different logic, it cannot be autogenerated directly from linguist like other logics in enry, so it's translated by hand. There are three different types of matchers in this implementation: - By extension, which mark as generated based only in the extension. These are the fastest matchers, so they're done first. - By file name, which matches patterns against the filename. These are performed in second place. Unlike linguist, we try to use string functions instead of regexps as much as possible. - Finally, the rest of the matchers, which go into the content and try to identify if they're generated or not based on the content. Unlike linguist, we try to only read the content we need and not split it all unless it's necessary and use byte functions instead of regexps as much as possible. Signed-off-by: Miguel Molina <miguel@erizocosmi.co>
This commit is contained in:
8
_testdata/HTML/attr-swapped.html
Normal file
8
_testdata/HTML/attr-swapped.html
Normal file
@ -0,0 +1,8 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta
|
||||
content = "Org mode"
|
||||
name = "generator" >
|
||||
</head>
|
||||
</html>
|
12
_testdata/HTML/extra-attr.html
Normal file
12
_testdata/HTML/extra-attr.html
Normal file
@ -0,0 +1,12 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta
|
||||
data-foo="Bar"
|
||||
http-equiv="Content-Type: text/html; charset=UTF-8"
|
||||
content = "Org mode"
|
||||
id="Some wicked id"
|
||||
wrong-content="whoops"
|
||||
name = "generator" >
|
||||
</head>
|
||||
</html>
|
6
_testdata/HTML/extra-spaces.html
Normal file
6
_testdata/HTML/extra-spaces.html
Normal file
@ -0,0 +1,6 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta name = "generator" content = "Org mode" />
|
||||
</head>
|
||||
</html>
|
7
_testdata/HTML/extra-tags.html
Normal file
7
_testdata/HTML/extra-tags.html
Normal file
@ -0,0 +1,7 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<meta >
|
||||
<meta name="generator" content="something invalid">
|
||||
<meta name="generator" content="makeinfo 4.8"
|
||||
/>
|
||||
</html>
|
21
_testdata/HTML/grohtml.html
Normal file
21
_testdata/HTML/grohtml.html
Normal file
@ -0,0 +1,21 @@
|
||||
<!-- Creator : groff version 1.22.4 -->
|
||||
<!-- CreationDate: Tue Jul 2 20:06:41 2019 -->
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
|
||||
"http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<meta name="generator" content="groff -Thtml, see www.gnu.org">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=US-ASCII">
|
||||
<meta name="Content-Style" content="text/css">
|
||||
<style type="text/css">
|
||||
p { margin-top: 0; margin-bottom: 0; vertical-align: top }
|
||||
pre { margin-top: 0; margin-bottom: 0; vertical-align: top }
|
||||
table { margin-top: 0; margin-bottom: 0; vertical-align: top }
|
||||
h1 { text-align: center }
|
||||
</style>
|
||||
<title></title>
|
||||
</head>
|
||||
<body>
|
||||
... document truncated
|
||||
</body>
|
||||
</html>
|
25
_testdata/HTML/grohtml.xhtml
Normal file
25
_testdata/HTML/grohtml.xhtml
Normal file
@ -0,0 +1,25 @@
|
||||
<?xml version="1.0" encoding="us-ascii"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"
|
||||
"http://www.w3.org/TR/MathML2/dtd/xhtml-math11-f.dtd"
|
||||
[<!ENTITY mathml "http://www.w3.org/1998/Math/MathML">]>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="groff -Txhtml, see www.gnu.org"/>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=US-ASCII"/>
|
||||
<meta name="Content-Style" content="text/css"/>
|
||||
<style type="text/css">
|
||||
.center { text-align: center }
|
||||
.right { text-align: right }
|
||||
p { margin-top: 0; margin-bottom: 0; vertical-align: top }
|
||||
pre { margin-top: 0; margin-bottom: 0; vertical-align: top }
|
||||
table { margin-top: 0; margin-bottom: 0; vertical-align: top }
|
||||
h1 { text-align: center }
|
||||
</style>
|
||||
<!-- Creator : groff version 1.22.4 -->
|
||||
<!-- CreationDate: Tue Jul 2 20:08:09 2019 -->
|
||||
<title></title>
|
||||
</head>
|
||||
<body>
|
||||
... truncated
|
||||
</body>
|
||||
</html>
|
47
_testdata/HTML/makeinfo.html
Normal file
47
_testdata/HTML/makeinfo.html
Normal file
@ -0,0 +1,47 @@
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>Asm Mode - GNU Emacs Manual</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<meta name="description" content="GNU Emacs Manual">
|
||||
<meta name="generator" content="makeinfo 4.8">
|
||||
<link title="Top" rel="start" href="index.html#Top">
|
||||
<link rel="up" href="Programs.html#Programs" title="Programs">
|
||||
<link rel="prev" href="C-Modes.html#C-Modes" title="C Modes">
|
||||
<link rel="next" href="Fortran.html#Fortran" title="Fortran">
|
||||
<link href="http://www.gnu.org/software/texinfo/" rel="generator-home" title="Texinfo Homepage">
|
||||
<!--
|
||||
This is the `GNU Emacs Manual',
|
||||
updated for Emacs version {No value for `EMACSVER'}.
|
||||
|
||||
Copyright (C) 1985--1987, 1993--2019 Free Software Foundation, Inc.
|
||||
|
||||
Permission is granted to copy, distribute and/or modify this
|
||||
document under the terms of the GNU Free Documentation License,
|
||||
Version 1.3 or any later version published by the Free Software
|
||||
Foundation; with the Invariant Sections being ``The GNU
|
||||
Manifesto,'' ``Distribution'' and ``GNU GENERAL PUBLIC LICENSE,''
|
||||
with the Front-Cover Texts being ``A GNU Manual,'' and with the
|
||||
Back-Cover Texts as in (a) below. A copy of the license is
|
||||
included in the section entitled ``GNU Free Documentation
|
||||
License.''
|
||||
|
||||
(a) The FSF's Back-Cover Text is: ``You have the freedom to copy
|
||||
and modify this GNU manual. Buying copies from the FSF supports
|
||||
it in developing GNU and promoting software freedom.''
|
||||
-->
|
||||
<meta http-equiv="Content-Style-Type" content="text/css">
|
||||
<style type="text/css"><!--
|
||||
pre.display { font-family:inherit }
|
||||
pre.format { font-family:inherit }
|
||||
pre.smalldisplay { font-family:inherit; font-size:smaller }
|
||||
pre.smallformat { font-family:inherit; font-size:smaller }
|
||||
pre.smallexample { font-size:smaller }
|
||||
pre.smalllisp { font-size:smaller }
|
||||
span.sc { font-variant:small-caps }
|
||||
span.roman { font-family:serif; font-weight:normal; }
|
||||
span.sansserif { font-family:sans-serif; font-weight:normal; }
|
||||
--></style>
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>
|
40
_testdata/HTML/mandoc.html
Normal file
40
_testdata/HTML/mandoc.html
Normal file
@ -0,0 +1,40 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<!-- This is an automatically generated file. Do not edit.
|
||||
$OpenBSD: mandoc.1,v 1.161 2019/02/23 18:52:45 schwarze Exp $
|
||||
|
||||
Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
|
||||
Copyright (c) 2012, 2014-2018 Ingo Schwarze <schwarze@openbsd.org>
|
||||
|
||||
Permission to use, copy, modify, and distribute this software for any
|
||||
purpose with or without fee is hereby granted, provided that the above
|
||||
copyright notice and this permission notice appear in all copies.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
-->
|
||||
<head>
|
||||
<meta charset="utf-8"/>
|
||||
<style>
|
||||
table.head, table.foot { width: 100%; }
|
||||
td.head-rtitle, td.foot-os { text-align: right; }
|
||||
td.head-vol { text-align: center; }
|
||||
div.Pp { margin: 1ex 0ex; }
|
||||
div.Nd, div.Bf, div.Op { display: inline; }
|
||||
span.Pa, span.Ad { font-style: italic; }
|
||||
span.Ms { font-weight: bold; }
|
||||
dl.Bl-diag > dt { font-weight: bold; }
|
||||
code.Nm, code.Fl, code.Cm, code.Ic, code.In, code.Fd, code.Fn,
|
||||
code.Cd { font-weight: bold; font-family: inherit; }
|
||||
</style>
|
||||
<title>MANDOC(1)</title>
|
||||
</head>
|
||||
<body>
|
||||
... document truncated
|
||||
</body>
|
||||
</html>
|
4
_testdata/HTML/no-content.html
Normal file
4
_testdata/HTML/no-content.html
Normal file
@ -0,0 +1,4 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<meta name="generator" value="Some sick tool nobody's using yet" />
|
||||
</html>
|
73
_testdata/HTML/node78.html
Normal file
73
_testdata/HTML/node78.html
Normal file
@ -0,0 +1,73 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
|
||||
|
||||
<!--Converted with jLaTeX2HTML 2002-2-1 (1.70) JA patch-2.0
|
||||
patched version by: Kenshi Muto, Debian Project.
|
||||
* modified by: Shige TAKENO
|
||||
LaTeX2HTML 2002-2-1 (1.70),
|
||||
original version by: Nikos Drakos, CBLU, University of Leeds
|
||||
* revised and updated by: Marcus Hennecke, Ross Moore, Herb Swan
|
||||
* with significant contributions from:
|
||||
Jens Lippmann, Marek Rouchal, Martin Wilck and others -->
|
||||
<HTML>
|
||||
<HEAD>
|
||||
<TITLE>Quality/Complexity metric plugins</TITLE>
|
||||
<META NAME="description" CONTENT="Quality/Complexity metric plugins">
|
||||
<META NAME="keywords" CONTENT="main">
|
||||
<META NAME="resource-type" CONTENT="document">
|
||||
<META NAME="distribution" CONTENT="global">
|
||||
|
||||
<META NAME="Generator" CONTENT="jLaTeX2HTML v2002-2-1 JA patch-2.0">
|
||||
<META HTTP-EQUIV="Content-Style-Type" CONTENT="text/css">
|
||||
|
||||
<LINK REL="STYLESHEET" HREF="main.css">
|
||||
|
||||
<LINK REL="next" HREF="node79.html">
|
||||
<LINK REL="previous" HREF="node77.html">
|
||||
<LINK REL="up" HREF="node74.html">
|
||||
<LINK REL="next" HREF="node79.html">
|
||||
</HEAD>
|
||||
|
||||
<BODY >
|
||||
<!--Navigation Panel-->
|
||||
<A NAME="tex2html1237"
|
||||
HREF="node79.html">
|
||||
<IMG WIDTH="37" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="next"
|
||||
SRC="file:/usr/share/latex2html/icons/next.png"></A>
|
||||
<A NAME="tex2html1233"
|
||||
HREF="node74.html">
|
||||
<IMG WIDTH="26" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="up"
|
||||
SRC="file:/usr/share/latex2html/icons/up.png"></A>
|
||||
<A NAME="tex2html1227"
|
||||
HREF="node77.html">
|
||||
<IMG WIDTH="63" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="previous"
|
||||
SRC="file:/usr/share/latex2html/icons/prev.png"></A>
|
||||
<A NAME="tex2html1235"
|
||||
HREF="node1.html">
|
||||
<IMG WIDTH="65" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="contents"
|
||||
SRC="file:/usr/share/latex2html/icons/contents.png"></A>
|
||||
<BR>
|
||||
<B> Next:</B> <A NAME="tex2html1238"
|
||||
HREF="node79.html">Time signal generator</A>
|
||||
<B> Up:</B> <A NAME="tex2html1234"
|
||||
HREF="node74.html">Additional simulator components</A>
|
||||
<B> Previous:</B> <A NAME="tex2html1228"
|
||||
HREF="node77.html">Weather scenario ()</A>
|
||||
<B> <A NAME="tex2html1236"
|
||||
HREF="node1.html">Contents</A></B>
|
||||
<BR>
|
||||
<BR>
|
||||
<!--End of Navigation Panel-->
|
||||
|
||||
<H3><A NAME="SECTION000101400000000000000">
|
||||
Quality/Complexity metric plugins</A>
|
||||
</H3>
|
||||
Various plugins to measure schedule quality and problem complexity to suit the application.
|
||||
|
||||
<P>
|
||||
<BR><HR>
|
||||
<ADDRESS>
|
||||
Steve Fraser
|
||||
2008-01-31
|
||||
</ADDRESS>
|
||||
</BODY>
|
||||
</HTML>
|
20
_testdata/HTML/org-mode.html
Normal file
20
_testdata/HTML/org-mode.html
Normal file
@ -0,0 +1,20 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
|
||||
<head>
|
||||
<!-- 2019-06-08 Sat 04:15 -->
|
||||
<meta http-equiv="Content-Type" /
|
||||
content="text/html;charset=utf-8" />
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<title>unpackaged.el</title>
|
||||
<meta name="generator" content="Org mode" />
|
||||
<meta name="author" content="Adam Porter" />
|
||||
<style type="text/css">
|
||||
<!--/*--><![CDATA[/*><!--*/
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>
|
31
_testdata/HTML/pages.html
Normal file
31
_testdata/HTML/pages.html
Normal file
@ -0,0 +1,31 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
|
||||
<title>Related Pages</title>
|
||||
<link href="qt.css" rel="stylesheet" type="text/css"/>
|
||||
</head>
|
||||
<body>
|
||||
<div class=header>
|
||||
<a class=headerLink href="index.html">Main Page</a> ·
|
||||
<a class=headerLink href="classoverview.html">Class Overview</a> ·
|
||||
<a class=headerLink href="hierarchy.html">Hierarchy</a> ·
|
||||
<a class=headerLink href="annotated.html">All Classes</a>
|
||||
</div>
|
||||
<!-- Generated by Doxygen 1.8.1.2 -->
|
||||
</div><!-- top -->
|
||||
<div class="header">
|
||||
<div class="headertitle">
|
||||
<div class="title">Related Pages</div> </div>
|
||||
</div><!--header-->
|
||||
<div class="contents">
|
||||
<div class="textblock">Here is a list of all related documentation pages:</div><div class="directory">
|
||||
<table class="directory">
|
||||
<tr id="row_0_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><a class="el" href="classoverview.html" target="_self">Class Overview</a></td><td class="desc"></td></tr>
|
||||
<tr id="row_1_"><td class="entry"><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><a class="el" href="thelayoutsystem.html" target="_self">The Layout System</a></td><td class="desc"></td></tr>
|
||||
</table>
|
||||
</div><!-- directory -->
|
||||
</div><!-- contents -->
|
||||
<div class="footer" />Generated with <a href="http://www.doxygen.org/index.html">Doxygen</a> 1.8.1.2</div>
|
||||
</body>
|
||||
</html>
|
10
_testdata/HTML/quotes-double.html
Normal file
10
_testdata/HTML/quotes-double.html
Normal file
@ -0,0 +1,10 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta
|
||||
name ="generator"
|
||||
content =
|
||||
"Org mode"
|
||||
>
|
||||
</head>
|
||||
</html>
|
6
_testdata/HTML/quotes-none.html
Normal file
6
_testdata/HTML/quotes-none.html
Normal file
@ -0,0 +1,6 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta name = generator content = makeinfo />
|
||||
</head>
|
||||
</html>
|
6
_testdata/HTML/quotes-single.html
Normal file
6
_testdata/HTML/quotes-single.html
Normal file
@ -0,0 +1,6 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta name = 'generator' content = 'Org mode'>
|
||||
</head>
|
||||
</html>
|
4
_testdata/HTML/ronn.html
Normal file
4
_testdata/HTML/ronn.html
Normal file
@ -0,0 +1,4 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<meta name="generator" value="Ronn/v0.7.3 (http://github.com/rtomayko/ronn/tree/0.7.3)" />
|
||||
</html>
|
4
_testdata/HTML/unknown.html
Normal file
4
_testdata/HTML/unknown.html
Normal file
@ -0,0 +1,4 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<meta name="generator" content="Some sick tool nobody's using yet" />
|
||||
</html>
|
8
_testdata/HTML/uppercase.html
Normal file
8
_testdata/HTML/uppercase.html
Normal file
@ -0,0 +1,8 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<META NAME= 'GENERATOR'
|
||||
CONTENT
|
||||
= 'ORG MODE'>
|
||||
</head>
|
||||
</html>
|
Reference in New Issue
Block a user