mirror of
https://github.com/ralsina/tartrazine.git
synced 2025-04-03 14:58:22 +00:00
284 lines
9.0 KiB
XML
284 lines
9.0 KiB
XML
<lexer>
|
|
<config>
|
|
<name>WebVTT</name>
|
|
<alias>vtt</alias>
|
|
<filename>*.vtt</filename>
|
|
<mime_type>text/vtt</mime_type>
|
|
</config>
|
|
<!--
|
|
The WebVTT spec refers to a WebVTT line terminator as either CRLF, CR or LF.
|
|
(https://www.w3.org/TR/webvtt1/#webvtt-line-terminator) However, with this
|
|
definition it is unclear whether CRLF is one line terminator (CRLF) or two
|
|
line terminators (CR and LF).
|
|
|
|
To work around this ambiguity, only CRLF and LF are considered as line terminators.
|
|
To my knowledge only classic Mac OS uses CR as line terminators, so the lexer should
|
|
still work for most files.
|
|
-->
|
|
<rules>
|
|
<!-- https://www.w3.org/TR/webvtt1/#webvtt-file-body -->
|
|
<state name="root">
|
|
<rule pattern="(\AWEBVTT)((?:[ \t][^\r\n]*)?(?:\r?\n){2,})">
|
|
<bygroups>
|
|
<token type="Keyword" />
|
|
<token type="Text" />
|
|
</bygroups>
|
|
</rule>
|
|
<rule pattern="(^REGION)([ \t]*$)">
|
|
<bygroups>
|
|
<token type="Keyword" />
|
|
<token type="Text" />
|
|
</bygroups>
|
|
<push state="region-settings-list" />
|
|
</rule>
|
|
<rule
|
|
pattern="(^STYLE)([ \t]*$)((?:(?!-->)[\s\S])*?)((?:\r?\n){2})">
|
|
<bygroups>
|
|
<token type="Keyword" />
|
|
<token type="Text" />
|
|
<using lexer="CSS" />
|
|
<token type="Text" />
|
|
</bygroups>
|
|
</rule>
|
|
<rule>
|
|
<include state="comment" />
|
|
</rule>
|
|
<rule
|
|
pattern="(?=((?![^\r\n]*-->)[^\r\n]*\r?\n)?(\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3}[ \t]+-->[ \t]+(\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})"
|
|
>
|
|
<push state="cues" />
|
|
</rule>
|
|
</state>
|
|
|
|
<!-- https://www.w3.org/TR/webvtt1/#webvtt-region-settings-list -->
|
|
<state name="region-settings-list">
|
|
<rule pattern="(?: |\t|\r?\n(?!\r?\n))+">
|
|
<token type="Text" />
|
|
</rule>
|
|
<rule pattern="(?:\r?\n){2}">
|
|
<token type="Text" />
|
|
<pop depth="1" />
|
|
</rule>
|
|
<rule pattern="(id)(:)(?!-->)(\S+)">
|
|
<bygroups>
|
|
<token type="Keyword" />
|
|
<token type="Punctuation" />
|
|
<token type="Literal" />
|
|
</bygroups>
|
|
</rule>
|
|
<rule pattern="(width)(:)((?:[1-9]?\d|100)(?:\.\d+)?)(%)">
|
|
<bygroups>
|
|
<token type="Keyword" />
|
|
<token type="Punctuation" />
|
|
<token type="Literal" />
|
|
<token type="KeywordType" />
|
|
</bygroups>
|
|
</rule>
|
|
<rule pattern="(lines)(:)(\d+)">
|
|
<bygroups>
|
|
<token type="Keyword" />
|
|
<token type="Punctuation" />
|
|
<token type="Literal" />
|
|
</bygroups>
|
|
</rule>
|
|
<rule
|
|
pattern="(regionanchor|viewportanchor)(:)((?:[1-9]?\d|100)(?:\.\d+)?)(%)(,)((?:[1-9]?\d|100)(?:\.\d+)?)(%)">
|
|
<bygroups>
|
|
<token type="Keyword" />
|
|
<token type="Punctuation" />
|
|
<token type="Literal" />
|
|
<token type="KeywordType" />
|
|
<token type="Punctuation" />
|
|
<token type="Literal" />
|
|
<token type="KeywordType" />
|
|
</bygroups>
|
|
</rule>
|
|
<rule pattern="(scroll)(:)(up)">
|
|
<bygroups>
|
|
<token type="Keyword" />
|
|
<token type="Punctuation" />
|
|
<token type="KeywordConstant" />
|
|
</bygroups>
|
|
</rule>
|
|
</state>
|
|
|
|
<!-- https://www.w3.org/TR/webvtt1/#webvtt-comment-block -->
|
|
<state name="comment">
|
|
<rule
|
|
pattern="^NOTE( |\t|\r?\n)((?!-->)[\s\S])*?(?:(\r?\n){2}|\Z)">
|
|
<token type="Comment" />
|
|
</rule>
|
|
</state>
|
|
|
|
<!--
|
|
"Zero or more WebVTT cue blocks and WebVTT comment blocks separated from each other by one or more
|
|
WebVTT line terminators." (https://www.w3.org/TR/webvtt1/#file-structure)
|
|
-->
|
|
<state name="cues">
|
|
<rule
|
|
pattern="(?:((?!-->)[^\r\n]+)?(\r?\n))?((?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})([ \t]+)(-->)([ \t]+)((?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})([ \t]*)">
|
|
<bygroups>
|
|
<token type="Name" />
|
|
<token type="Text" />
|
|
<token type="LiteralDate" />
|
|
<token type="Text" />
|
|
<token type="Operator" />
|
|
<token type="Text" />
|
|
<token type="LiteralDate" />
|
|
<token type="Text" />
|
|
</bygroups>
|
|
<push state="cue-settings-list" />
|
|
</rule>
|
|
<rule>
|
|
<include state="comment" />
|
|
</rule>
|
|
</state>
|
|
|
|
<!-- https://www.w3.org/TR/webvtt1/#webvtt-cue-settings-list -->
|
|
<state name="cue-settings-list">
|
|
<rule pattern="[ \t]+">
|
|
<token type="Text" />
|
|
</rule>
|
|
<rule pattern="(vertical)(:)?(rl|lr)?">
|
|
<bygroups>
|
|
<token type="Keyword" />
|
|
<token type="Punctuation" />
|
|
<token type="KeywordConstant" />
|
|
</bygroups>
|
|
</rule>
|
|
<rule
|
|
pattern="(line)(:)?(?:(?:((?:[1-9]?\d|100)(?:\.\d+)?)(%)|(-?\d+))(?:(,)(start|center|end))?)?">
|
|
<bygroups>
|
|
<token type="Keyword" />
|
|
<token type="Punctuation" />
|
|
<token type="Literal" />
|
|
<token type="KeywordType" />
|
|
<token type="Literal" />
|
|
<token type="Punctuation" />
|
|
<token type="KeywordConstant" />
|
|
</bygroups>
|
|
</rule>
|
|
<rule
|
|
pattern="(position)(:)?(?:(?:((?:[1-9]?\d|100)(?:\.\d+)?)(%)|(-?\d+))(?:(,)(line-left|center|line-right))?)?">
|
|
<bygroups>
|
|
<token type="Keyword" />
|
|
<token type="Punctuation" />
|
|
<token type="Literal" />
|
|
<token type="KeywordType" />
|
|
<token type="Literal" />
|
|
<token type="Punctuation" />
|
|
<token type="KeywordConstant" />
|
|
</bygroups>
|
|
</rule>
|
|
<rule pattern="(size)(:)?(?:((?:[1-9]?\d|100)(?:\.\d+)?)(%))?">
|
|
<bygroups>
|
|
<token type="Keyword" />
|
|
<token type="Punctuation" />
|
|
<token type="Literal" />
|
|
<token type="KeywordType" />
|
|
</bygroups>
|
|
</rule>
|
|
<rule pattern="(align)(:)?(start|center|end|left|right)?">
|
|
<bygroups>
|
|
<token type="Keyword" />
|
|
<token type="Punctuation" />
|
|
<token type="KeywordConstant" />
|
|
</bygroups>
|
|
</rule>
|
|
<rule pattern="(region)(:)?((?![^\r\n]*-->(?=[ \t]+?))[^ \t\r\n]+)?">
|
|
<bygroups>
|
|
<token type="Keyword" />
|
|
<token type="Punctuation" />
|
|
<token type="Literal" />
|
|
</bygroups>
|
|
</rule>
|
|
<rule
|
|
pattern="(?=\r?\n)">
|
|
<push state="cue-payload" />
|
|
</rule>
|
|
</state>
|
|
|
|
<!-- https://www.w3.org/TR/webvtt1/#cue-payload -->
|
|
<state name="cue-payload">
|
|
<rule pattern="(\r?\n){2,}">
|
|
<token type="Text" />
|
|
<pop depth="2" />
|
|
</rule>
|
|
<rule pattern="[^<&]+?">
|
|
<token type="Text" />
|
|
</rule>
|
|
<rule pattern="&(#\d+|#x[0-9A-Fa-f]+|[a-zA-Z0-9]+);">
|
|
<token type="Text" />
|
|
</rule>
|
|
<rule pattern="(?=<)">
|
|
<token type="Text" />
|
|
<push state="cue-span-tag" />
|
|
</rule>
|
|
</state>
|
|
<state name="cue-span-tag">
|
|
<rule
|
|
pattern="<(?=c|i|b|u|ruby|rt|v|lang|(?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})">
|
|
<token type="Punctuation" />
|
|
<push state="cue-span-start-tag-name" />
|
|
</rule>
|
|
<rule pattern="(</)(c|i|b|u|ruby|rt|v|lang)">
|
|
<bygroups>
|
|
<token type="Punctuation" />
|
|
<token type="NameTag" />
|
|
</bygroups>
|
|
</rule>
|
|
<rule pattern=">">
|
|
<token type="Punctuation" />
|
|
<pop depth="1" />
|
|
</rule>
|
|
</state>
|
|
<state name="cue-span-start-tag-name">
|
|
<rule pattern="(c|i|b|u|ruby|rt)|((?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})">
|
|
<bygroups>
|
|
<token type="NameTag" />
|
|
<token type="LiteralDate" />
|
|
</bygroups>
|
|
<push state="cue-span-classes-without-annotations" />
|
|
</rule>
|
|
<rule pattern="v|lang">
|
|
<token type="NameTag" />
|
|
<push state="cue-span-classes-with-annotations" />
|
|
</rule>
|
|
</state>
|
|
<state name="cue-span-classes-without-annotations">
|
|
<rule>
|
|
<include state="cue-span-classes" />
|
|
</rule>
|
|
<rule pattern="(?=>)">
|
|
<pop depth="2" />
|
|
</rule>
|
|
</state>
|
|
<state name="cue-span-classes-with-annotations">
|
|
<rule>
|
|
<include state="cue-span-classes" />
|
|
</rule>
|
|
<rule pattern="(?=[ \t])">
|
|
<push state="cue-span-start-tag-annotations" />
|
|
</rule>
|
|
</state>
|
|
<state name="cue-span-classes">
|
|
<rule pattern="(\.)([^ \t\n\r&<>\.]+)">
|
|
<bygroups>
|
|
<token type="Punctuation" />
|
|
<token type="NameTag" />
|
|
</bygroups>
|
|
</rule>
|
|
</state>
|
|
<state name="cue-span-start-tag-annotations">
|
|
<rule
|
|
pattern="[ \t](?:[^\n\r&>]|&(?:#\d+|#x[0-9A-Fa-f]+|[a-zA-Z0-9]+);)+">
|
|
<token type="Text" />
|
|
</rule>
|
|
<rule pattern="(?=>)">
|
|
<token type="Text" />
|
|
<pop depth="3" />
|
|
</rule>
|
|
</state>
|
|
</rules>
|
|
</lexer>
|