mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-09-19 23:11:22 +00:00
Added constants for token abbrev
This commit is contained in:
parent
82db232511
commit
a9ff9bc8ac
24
scripts/token_abbrevs.py
Normal file
24
scripts/token_abbrevs.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
import sys
|
||||||
|
import string
|
||||||
|
|
||||||
|
# Run it as grep token lexers/* | python scripts/token_abbrevs.py
|
||||||
|
|
||||||
|
|
||||||
|
def abbr(line):
|
||||||
|
return "".join(c for c in line if c in string.ascii_uppercase).lower()
|
||||||
|
|
||||||
|
abbrevs = {}
|
||||||
|
tokens = set([])
|
||||||
|
for line in sys.stdin:
|
||||||
|
if "<token" not in line:
|
||||||
|
continue
|
||||||
|
line = line.strip()
|
||||||
|
line = line.split('<token ',1)[-1]
|
||||||
|
line = line.split('"')[1]
|
||||||
|
abbrevs[line] = abbr(line)
|
||||||
|
tokens.add(line)
|
||||||
|
|
||||||
|
print("Abbreviations: {")
|
||||||
|
for k, v in abbrevs.items():
|
||||||
|
print(f' "{k}" => "{v}",')
|
||||||
|
print("}")
|
91
src/constants.cr
Normal file
91
src/constants.cr
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
module Tartrazine
|
||||||
|
Abbreviations = {
|
||||||
|
"Text" => "t",
|
||||||
|
"CommentSingle" => "cs",
|
||||||
|
"CommentSpecial" => "cs",
|
||||||
|
"NameVariable" => "nv",
|
||||||
|
"Keyword" => "k",
|
||||||
|
"NameFunction" => "nf",
|
||||||
|
"Punctuation" => "p",
|
||||||
|
"Operator" => "o",
|
||||||
|
"LiteralNumberInteger" => "lni",
|
||||||
|
"NameBuiltin" => "nb",
|
||||||
|
"Name" => "n",
|
||||||
|
"OperatorWord" => "ow",
|
||||||
|
"LiteralStringSingle" => "lss",
|
||||||
|
"Literal" => "l",
|
||||||
|
"NameClass" => "nc",
|
||||||
|
"CommentMultiline" => "cm",
|
||||||
|
"LiteralStringRegex" => "lsr",
|
||||||
|
"KeywordDeclaration" => "kd",
|
||||||
|
"KeywordConstant" => "kc",
|
||||||
|
"NameOther" => "no",
|
||||||
|
"LiteralNumberFloat" => "lnf",
|
||||||
|
"LiteralNumberHex" => "lnh",
|
||||||
|
"LiteralStringDouble" => "lsd",
|
||||||
|
"KeywordType" => "kt",
|
||||||
|
"NameNamespace" => "nn",
|
||||||
|
"NameAttribute" => "na",
|
||||||
|
"KeywordReserved" => "kr",
|
||||||
|
"CommentPreproc" => "cp",
|
||||||
|
"KeywordNamespace" => "kn",
|
||||||
|
"NameConstant" => "nc",
|
||||||
|
"NameLabel" => "nl",
|
||||||
|
"LiteralString" => "ls",
|
||||||
|
"LiteralStringChar" => "lsc",
|
||||||
|
"TextWhitespace" => "tw",
|
||||||
|
"LiteralStringEscape" => "lse",
|
||||||
|
"LiteralNumber" => "ln",
|
||||||
|
"Other" => "o",
|
||||||
|
"LiteralStringBoolean" => "lsb",
|
||||||
|
"NameProperty" => "np",
|
||||||
|
"Comment" => "c",
|
||||||
|
"NameTag" => "nt",
|
||||||
|
"LiteralStringOther" => "lso",
|
||||||
|
"NameVariableGlobal" => "nvg",
|
||||||
|
"NameBuiltinPseudo" => "nbp",
|
||||||
|
"LiteralNumberBin" => "lnb",
|
||||||
|
"KeywordPseudo" => "kp",
|
||||||
|
"CommentPreprocFile" => "cpf",
|
||||||
|
"LiteralStringAffix" => "lsa",
|
||||||
|
"LiteralStringDelimiter" => "lsd",
|
||||||
|
"LiteralNumberOct" => "lno",
|
||||||
|
"Error" => "e",
|
||||||
|
"Generic" => "g",
|
||||||
|
"LiteralNumberIntegerLong" => "lnil",
|
||||||
|
"NameDecorator" => "nd",
|
||||||
|
"LiteralStringInterpol" => "lsi",
|
||||||
|
"LiteralStringBacktick" => "lsb",
|
||||||
|
"GenericPrompt" => "gp",
|
||||||
|
"GenericOutput" => "go",
|
||||||
|
"LiteralStringName" => "lsn",
|
||||||
|
"LiteralStringHeredoc" => "lsh",
|
||||||
|
"LiteralStringSymbol" => "lss",
|
||||||
|
"NameVariableInstance" => "nvi",
|
||||||
|
"LiteralOther" => "lo",
|
||||||
|
"NameVariableClass" => "nvc",
|
||||||
|
"NameOperator" => "no",
|
||||||
|
"None" => "n",
|
||||||
|
"LiteralStringDoc" => "lsd",
|
||||||
|
"NameException" => "ne",
|
||||||
|
"GenericSubheading" => "gs",
|
||||||
|
"GenericStrong" => "gs",
|
||||||
|
"GenericDeleted" => "gd",
|
||||||
|
"GenericInserted" => "gi",
|
||||||
|
"GenericHeading" => "gh",
|
||||||
|
"NameEntity" => "ne",
|
||||||
|
"NamePseudo" => "np",
|
||||||
|
"CommentHashbang" => "ch",
|
||||||
|
"TextPunctuation" => "tp",
|
||||||
|
"NameVariableAnonymous" => "nva",
|
||||||
|
"NameVariableMagic" => "nvm",
|
||||||
|
"NameFunctionMagic" => "nfm",
|
||||||
|
"GenericEmph" => "ge",
|
||||||
|
"GenericUnderline" => "gu",
|
||||||
|
"LiteralStringAtom" => "lsa",
|
||||||
|
"LiteralDate" => "ld",
|
||||||
|
"GenericError" => "ge",
|
||||||
|
"TextSymbol" => "ts",
|
||||||
|
"NameKeyword" => "nk",
|
||||||
|
}
|
||||||
|
end
|
@ -54,25 +54,34 @@ module Tartrazine
|
|||||||
|
|
||||||
property state_stack = ["root"]
|
property state_stack = ["root"]
|
||||||
|
|
||||||
# Turn the text into a list of tokens.
|
# Turn the text into a list of tokens. The `usingself` parameter
|
||||||
|
# is true when the lexer is being used to tokenize a string
|
||||||
|
# from a larger text that is already being tokenized.
|
||||||
|
# So, when it's true, we don't modify the text.
|
||||||
def tokenize(text, usingself = false) : Array(Token)
|
def tokenize(text, usingself = false) : Array(Token)
|
||||||
@state_stack = ["root"]
|
@state_stack = ["root"]
|
||||||
tokens = [] of Token
|
tokens = [] of Token
|
||||||
pos = 0
|
pos = 0
|
||||||
matched = false
|
matched = false
|
||||||
|
|
||||||
|
# Respect the `ensure_nl` config option
|
||||||
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
|
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
|
||||||
text += "\n"
|
text += "\n"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Loop through the text, applying rules
|
||||||
while pos < text.size
|
while pos < text.size
|
||||||
state = states[@state_stack.last]
|
state = states[@state_stack.last]
|
||||||
Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
|
Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
|
||||||
state.rules.each do |rule|
|
state.rules.each do |rule|
|
||||||
matched, new_pos, new_tokens = rule.match(text, pos, self)
|
matched, new_pos, new_tokens = rule.match(text, pos, self)
|
||||||
if matched
|
if matched
|
||||||
|
# Move position forward, save the tokens,
|
||||||
|
# tokenize from the new position
|
||||||
Log.trace { "MATCHED: #{rule.xml}" }
|
Log.trace { "MATCHED: #{rule.xml}" }
|
||||||
pos = new_pos
|
pos = new_pos
|
||||||
tokens += new_tokens
|
tokens += new_tokens
|
||||||
break # We go back to processing with current state
|
break
|
||||||
end
|
end
|
||||||
Log.trace { "NOT MATCHED: #{rule.xml}" }
|
Log.trace { "NOT MATCHED: #{rule.xml}" }
|
||||||
end
|
end
|
||||||
|
Loading…
Reference in New Issue
Block a user