mirror of
https://github.com/ralsina/tartrazine.git
synced 2024-11-12 22:42:23 +00:00
Added constants for token abbrev
This commit is contained in:
parent
82db232511
commit
a9ff9bc8ac
24
scripts/token_abbrevs.py
Normal file
24
scripts/token_abbrevs.py
Normal file
@ -0,0 +1,24 @@
|
||||
import sys
|
||||
import string
|
||||
|
||||
# Run it as grep token lexers/* | python scripts/token_abbrevs.py
|
||||
|
||||
|
||||
def abbr(line):
|
||||
return "".join(c for c in line if c in string.ascii_uppercase).lower()
|
||||
|
||||
abbrevs = {}
|
||||
tokens = set([])
|
||||
for line in sys.stdin:
|
||||
if "<token" not in line:
|
||||
continue
|
||||
line = line.strip()
|
||||
line = line.split('<token ',1)[-1]
|
||||
line = line.split('"')[1]
|
||||
abbrevs[line] = abbr(line)
|
||||
tokens.add(line)
|
||||
|
||||
print("Abbreviations: {")
|
||||
for k, v in abbrevs.items():
|
||||
print(f' "{k}" => "{v}",')
|
||||
print("}")
|
91
src/constants.cr
Normal file
91
src/constants.cr
Normal file
@ -0,0 +1,91 @@
|
||||
module Tartrazine
|
||||
Abbreviations = {
|
||||
"Text" => "t",
|
||||
"CommentSingle" => "cs",
|
||||
"CommentSpecial" => "cs",
|
||||
"NameVariable" => "nv",
|
||||
"Keyword" => "k",
|
||||
"NameFunction" => "nf",
|
||||
"Punctuation" => "p",
|
||||
"Operator" => "o",
|
||||
"LiteralNumberInteger" => "lni",
|
||||
"NameBuiltin" => "nb",
|
||||
"Name" => "n",
|
||||
"OperatorWord" => "ow",
|
||||
"LiteralStringSingle" => "lss",
|
||||
"Literal" => "l",
|
||||
"NameClass" => "nc",
|
||||
"CommentMultiline" => "cm",
|
||||
"LiteralStringRegex" => "lsr",
|
||||
"KeywordDeclaration" => "kd",
|
||||
"KeywordConstant" => "kc",
|
||||
"NameOther" => "no",
|
||||
"LiteralNumberFloat" => "lnf",
|
||||
"LiteralNumberHex" => "lnh",
|
||||
"LiteralStringDouble" => "lsd",
|
||||
"KeywordType" => "kt",
|
||||
"NameNamespace" => "nn",
|
||||
"NameAttribute" => "na",
|
||||
"KeywordReserved" => "kr",
|
||||
"CommentPreproc" => "cp",
|
||||
"KeywordNamespace" => "kn",
|
||||
"NameConstant" => "nc",
|
||||
"NameLabel" => "nl",
|
||||
"LiteralString" => "ls",
|
||||
"LiteralStringChar" => "lsc",
|
||||
"TextWhitespace" => "tw",
|
||||
"LiteralStringEscape" => "lse",
|
||||
"LiteralNumber" => "ln",
|
||||
"Other" => "o",
|
||||
"LiteralStringBoolean" => "lsb",
|
||||
"NameProperty" => "np",
|
||||
"Comment" => "c",
|
||||
"NameTag" => "nt",
|
||||
"LiteralStringOther" => "lso",
|
||||
"NameVariableGlobal" => "nvg",
|
||||
"NameBuiltinPseudo" => "nbp",
|
||||
"LiteralNumberBin" => "lnb",
|
||||
"KeywordPseudo" => "kp",
|
||||
"CommentPreprocFile" => "cpf",
|
||||
"LiteralStringAffix" => "lsa",
|
||||
"LiteralStringDelimiter" => "lsd",
|
||||
"LiteralNumberOct" => "lno",
|
||||
"Error" => "e",
|
||||
"Generic" => "g",
|
||||
"LiteralNumberIntegerLong" => "lnil",
|
||||
"NameDecorator" => "nd",
|
||||
"LiteralStringInterpol" => "lsi",
|
||||
"LiteralStringBacktick" => "lsb",
|
||||
"GenericPrompt" => "gp",
|
||||
"GenericOutput" => "go",
|
||||
"LiteralStringName" => "lsn",
|
||||
"LiteralStringHeredoc" => "lsh",
|
||||
"LiteralStringSymbol" => "lss",
|
||||
"NameVariableInstance" => "nvi",
|
||||
"LiteralOther" => "lo",
|
||||
"NameVariableClass" => "nvc",
|
||||
"NameOperator" => "no",
|
||||
"None" => "n",
|
||||
"LiteralStringDoc" => "lsd",
|
||||
"NameException" => "ne",
|
||||
"GenericSubheading" => "gs",
|
||||
"GenericStrong" => "gs",
|
||||
"GenericDeleted" => "gd",
|
||||
"GenericInserted" => "gi",
|
||||
"GenericHeading" => "gh",
|
||||
"NameEntity" => "ne",
|
||||
"NamePseudo" => "np",
|
||||
"CommentHashbang" => "ch",
|
||||
"TextPunctuation" => "tp",
|
||||
"NameVariableAnonymous" => "nva",
|
||||
"NameVariableMagic" => "nvm",
|
||||
"NameFunctionMagic" => "nfm",
|
||||
"GenericEmph" => "ge",
|
||||
"GenericUnderline" => "gu",
|
||||
"LiteralStringAtom" => "lsa",
|
||||
"LiteralDate" => "ld",
|
||||
"GenericError" => "ge",
|
||||
"TextSymbol" => "ts",
|
||||
"NameKeyword" => "nk",
|
||||
}
|
||||
end
|
@ -54,25 +54,34 @@ module Tartrazine
|
||||
|
||||
property state_stack = ["root"]
|
||||
|
||||
# Turn the text into a list of tokens.
|
||||
# Turn the text into a list of tokens. The `usingself` parameter
|
||||
# is true when the lexer is being used to tokenize a string
|
||||
# from a larger text that is already being tokenized.
|
||||
# So, when it's true, we don't modify the text.
|
||||
def tokenize(text, usingself = false) : Array(Token)
|
||||
@state_stack = ["root"]
|
||||
tokens = [] of Token
|
||||
pos = 0
|
||||
matched = false
|
||||
|
||||
# Respect the `ensure_nl` config option
|
||||
if text.size > 0 && text[-1] != '\n' && config[:ensure_nl] && !usingself
|
||||
text += "\n"
|
||||
end
|
||||
|
||||
# Loop through the text, applying rules
|
||||
while pos < text.size
|
||||
state = states[@state_stack.last]
|
||||
Log.trace { "Stack is #{@state_stack} State is #{state.name}, pos is #{pos}, text is #{text[pos..pos + 10]}" }
|
||||
state.rules.each do |rule|
|
||||
matched, new_pos, new_tokens = rule.match(text, pos, self)
|
||||
if matched
|
||||
# Move position forward, save the tokens,
|
||||
# tokenize from the new position
|
||||
Log.trace { "MATCHED: #{rule.xml}" }
|
||||
pos = new_pos
|
||||
tokens += new_tokens
|
||||
break # We go back to processing with current state
|
||||
break
|
||||
end
|
||||
Log.trace { "NOT MATCHED: #{rule.xml}" }
|
||||
end
|
||||
|
Loading…
Reference in New Issue
Block a user