back to token engine concept..
this time example is very general (i hope) and useful( i hope too)
and also to post something..
MAIN goal of this small program should be part of small or little language interpreter(i hope again)
anyway it sems so far that tokenize properly:
ident( keywords,variables{a..z, a..z09) low case only(for now)
operators (+ , - , * , / , = , < , > )
quoted string[/b](literal) : "quoted"
parens (brackets) - () , []
special - comma,colon,,,
'microB tokenizer by Aurel 19.3.2019
int tkNULL=0,tkPLUS=1,tkMINUS=2,tkMULTI=3,tkDIVIDE=4
int tkCOLON=5,tkCOMMA=6,tkLPAREN=7,tkRPAREN=8,tkLBRACKET=9,tkRBRACKET=10
int tkPRINT=11,tkDOT=12,tkLINE=13,tkCIRCLE=14 ,tkEOL = 20
string tokList[256] : int typList[256] 'token/type arrays
int start , p = 1 ,start = p ,tp ,n 'init
string code,ch,tk ,crlf=chr(13)+chr(10),bf
'--------------------------------------------------------------------
code = "arr[10]: func(a,b): var1+ 0.5*7: str s="+ chr(34)+ "micro" + chr(34) ' test or load_src?
'--------------------------------------------------------------------
sub tokenizer(src as string) as int
'ch = mid(src,p,1) : print "CH:" + ch' get first char
while p <= len(src)
' print "P:" + str(p)
ch = mid(src,p,1) 'get char
If asc(ch)=32 then p=p+1 : end if 'skip blank space[ ]
If asc(ch)=9 then p=p+1 : end if 'skip TAB [ ]
'--------------------------------------------------------
If asc(ch)=34 ' if char is QUOTE "
' print mid(src,p+1,1)
p++ : ch = mid(src,p,1) : tk=ch : p++ 'skip quote :add ch TO tk buffer: p+1
while asc(ch) <> 34 'and mid(src,p+1,1)<> chr(34)
ch = mid(src,p,1) : if asc(ch)= 34 then exit while
tk=tk+ch : p++
IF ch = chr(10): print "Unclosed Quote! Exit...": exit sub : end if
wend
tp++ : tokList[tp] = tk : tk="":ch="": p++ 'add quoted string to token list
End if
'-------------------------------------------------------
If (asc(ch)>96 and asc(ch)<123) ' [a-z]
while (asc(ch)>96 and asc(ch)<123) or (asc(ch)>47 and asc(ch)<58) ' [a-z0-9]*
'print "AZ:" + ch
tk=tk+ch : p++ : ch = mid(src,p,1)
wend
'print "TOK-AZ:" + tk + " PAZ:" + p
tp++ : tokList[tp] = tk : tk="":ch=""
'return IDENT;
End If
'--------------------------------------------------------------
'While (Asc(Look) > 47 And Asc(Look) < 58) Or Asc(Look) = 46'
If (asc(ch)>47 and asc(ch)<58) ' [0-9.]
while (asc(ch)>47 AND asc(ch)<58) OR asc(ch)=46 '[0-9[0.0]]*
tk=tk+ch :p++
ch = mid(src,p,1)
wend
'print "Pnum:" + str(p)
tp++ : tokList[tp] = tk : tk="":ch=""
'return NUMBER;
End if
'---------------------------------------------------
If asc(ch)=43 : tp++ : tokList[tp] = ch : ch="" : p++ : End if ' + plus
If asc(ch)=45 : tp++ : tokList[tp] = ch : ch="" : p++ : End if ' - minus
If asc(ch)=42 : tp++ : tokList[tp] = ch : ch="" : p++ : End if ' * multiply
If asc(ch)=47 : tp++ : tokList[tp] = ch : ch="" : p++ : End if ' / divide
If asc(ch)=40 : tp++ : tokList[tp] = ch : ch="" : p++ : End if ' ( Lparen
If asc(ch)=41 : tp++ : tokList[tp] = ch : ch="" : p++ : End if ' ) Rparen
If asc(ch)=44 : tp++ : tokList[tp] = ch : ch="" : p++ : End if ' , comma
If asc(ch)=58 : tp++ : tokList[tp] = ch : ch="" : p++ : End if ' : colon
If asc(ch)=60 : tp++ : tokList[tp] = ch : ch="" : p++ : End if ' < less
If asc(ch)=61 : tp++ : tokList[tp] = ch : ch="" : p++ : End if ' = equal
If asc(ch)=62 : tp++ : tokList[tp] = ch : ch="" : p++ : End if ' > more(greater)
If asc(ch)=91 : tp++ : tokList[tp] = ch : ch="" : p++ : End if ' [ Lbracket
If asc(ch)=93 : tp++ : tokList[tp] = ch : ch="" : p++ : End if ' ] Rbracket
'elseif...
'End if
IF ASC(ch)>125: print "Unknown token!-[" +ch +" ]-Exit...": RETURN 0: END IF
wend
return tp
end sub
'call tokenizer..tested(ident,numbers)
int tn: tn = tokenizer(code) : print "number of tokens:" + str(tn)
for n = 1 to tn : bf = bf + tokList[n] + crlf : next n
print bf