address review feedback

Signed-off-by: Alexander Bezzubov <bzz@apache.org>
This commit is contained in:
Alexander Bezzubov
2019-04-14 22:15:18 +02:00
parent 7929933eb5
commit ada6f15c93
5 changed files with 22 additions and 19 deletions

View File

@ -9,8 +9,7 @@ package flex
import "C"
import "unsafe"
const maxTokenLen = 32
const maxTokenLen = 32 // bytes
// TokenizeFlex implements tokenizer by calling Flex generated code from linguist in C
// This is a transliteration from C https://github.com/github/linguist/blob/master/ext/linguist/linguist.c#L12
@ -28,25 +27,24 @@ func TokenizeFlex(content []byte) []string {
C.linguist_yylex_init_extra(&extra, &scanner)
buf = C.linguist_yy_scan_bytes((*C.char)(cs), _len, scanner)
ary := []string{}
for {
extra._type = C.NO_ACTION
extra.token = nil
r = C.linguist_yylex(scanner)
switch (extra._type) {
switch extra._type {
case C.NO_ACTION:
break
case C.REGULAR_TOKEN:
_len = C.strlen(extra.token)
if (_len <= maxTokenLen) {
if _len <= maxTokenLen {
ary = append(ary, C.GoStringN(extra.token, (C.int)(_len)))
}
C.free(unsafe.Pointer(extra.token))
break
case C.SHEBANG_TOKEN:
_len = C.strlen(extra.token)
if (_len <= maxTokenLen) {
if _len <= maxTokenLen {
s := "SHEBANG#!" + C.GoStringN(extra.token, (C.int)(_len))
ary = append(ary, s)
}
@ -54,7 +52,7 @@ func TokenizeFlex(content []byte) []string {
break
case C.SGML_TOKEN:
_len = C.strlen(extra.token)
if (_len <= maxTokenLen) {
if _len <= maxTokenLen {
s := C.GoStringN(extra.token, (C.int)(_len)) + ">"
ary = append(ary, s)
}