Parsing Utility for general language use
It may be configured for any set of ascii symbol codes and quote marks. But its default settings are for LeanLisp where the only self-terminating symbols are brackets, and token: 40,41, and 255.
inc/ParseUtli.inc
sys sttw 'word position
sys ascb 'word ascii code actual
sys ascw 'word ascii code presented
sys lenw 'word length
sys ntrim 'inhibit-trimlr flag
sys opw 'operator token
function instrword(sys i,string s,k) as sys
===========================================
sys lk=len k
byte b at strptr s
if not k then return 0
if i=0 then i=1
do
i=instr(i,s,k)
if i=0 then
return i
elseif b[i+lk]<33
if i=1 or b[i-1]<33
return i
end if
end if
i+=lk
end do
end function
function instrb(byte*bb,*bk) as sys
===================================
sys b
do
b=bb
select bk
case 0 : return 1
case b : 'match chars
case else : return 0
end select
@bk++
@bb++
end do
end function
function instrev(sys i,string s,k) as sys
=========================================
sys a=asc k
sys lk=len k
sys ls=len s
if i=0 then
i=ls
elseif i>ls then
i=ls
end if
byte b at strptr s
byte c at strptr k
do
if i<1 then exit do
select b[i]
case a : if instrb(b[i],c) then exit do
end select
i--
end do
return i
end function
function skipspace(string s, sys*i)
===================================
byte b at strptr s
do
ascb=b[i]
ascw=ascb
select ascb
case 0 : : exit do
case 41 : ascw=0 : exit do
case 33 to 255 : : exit do
end select
i++
end do
end function
function skiplspace(string s, sys*i)
====================================
byte b at strptr s
do
ascb=b[i]
ascw=ascb
select ascb
case 0,13,10 : : exit do 'stop at end of lines
case 41 : ascw=0 : exit do
case 33 to 255 : : exit do
end select
i++
end do
end function
function endline(string s, sys*i)
=================================
byte b at strptr s
do
select b[i]
case 0,10,12,13 : exit do
end select
i++
end do
end function
function trimlr(string s, sys *i, *le)
=======================================
if ntrim then ntrim=0 : return
sys j=le
byte b at strptr s
skipspace s,i
do
if j<1 then exit do
if b[j]>32 then exit do
j--
end do
le=j
end function
% SymbolTerm 40,41
% SymbolQuote 34,39,96
% SymbolComment 59
macro posword(s,i)
==================
byte b at strptr s
'
rwordpos:
'
skipspace s,i
sttw=i
select ascb 'FIRST CHAR
case SymbolTerm
i++ : jmp fwd nwordpos 'brackets ( )
case 0
jmp fwd nwordpos 'END
case SymbolQuote
do
i++
select b[i]
case 0 : jmp fwd nwordpos
case ascw : i++ : jmp fwd nwordpos
end select
end do
jmp fwd nwordpos
case SymbolComment
endline(s,i) : jmp rwordpos ' ; comment ;
case 255
i++ : opw=b[i] : i++ : jmp fwd nwordpos 'EMBEDDED TOKEN
end select
do 'MOVE TO WORD BOUNDARY
select b[i]
case 0 to 32
exit do
case SymbolTerm
exit do
case SymbolQuote
exit do
case SymbolComment
exit do
case 255
exit do ' embedded token marker
end select
i++
end do
nwordpos:
lenw=i-sttw
end macro
function stepword(string s,sys*i) as sys
========================================
posword(s,i)
end function
function getword(string s, sys*i) as string
===========================================
posword(s,i)
return mid s,sttw,lenw
end function
function stepitem(string sr, sys*i)
===================================
string wr
sys bc
stepword(sr,i)
if ascw<>40 then return 'skip the word only
'otherwiwise skip block including any nested blocks
bc++
do 'STEP OVER NESTED BRACKETS
stepword(sr,i)
select ascw
case 0 :
if ascb = 41 then
if bc <= 0 then exit do
bc--
else
exit do
end if
case 40 : bc++
case 41 : bc--
end select
if bc=0 then exit do
end do
end function
function getitem(string sr, sys*i) as string
============================================
sys b,c
skipspace(sr,i)
b=i
c=ascw
stepitem(sr,i)
sttw=b
ascw=c
return mid sr, b, i-b
end function
function unquote(string s) as string
====================================
ascw=asc s
if asc(s,-1)=ascw then
ascw=asc s,2
return mid s, 2, len(s)-2
else
return s
end if
end function
function inner(string s) as string
==================================
sys i=1, le=len s
trimlr s,i,le
i+=1
le-=2
if le then return mid s,i,le
end function
Demo:
includepath "$/inc/"
include "ParseUtil.inc"
'DEMO
=====
string sr,wr,pr,cr=chr(13,10)
sys i
sr="(a = 100+b/n) ;comment"
i=1
pr=""
do
wr=getword sr,i
if ascb=0 then exit do
pr+= wr cr
end do
print pr
'CUSTOMISED PARSING:
====================
'using macros used by macro posword
'
macro SymbolTerm 'override
==========================
36 to 45 , ' $ % & ' ( ) * + , -
47 , ' /
58 to 64 , ' : ; < = > ? @
91 to 94 , ' [ \ ] ^
123 to 255 ' { | } ~ [del] upper ascii
end macro
macro SymbolQuote 34 'override
macro SymbolComment -1 'override (-1 no case)
function StepSymbol(string s,sys*i) as sys
==========================================
posword(s,i)
end function
function GetSymbol(string s, sys*i) as string
=============================================
posword(s,i)
return mid s,sttw,lenw
end function
i=1
pr=""
do
wr=GetSymbol sr,i
if ascb=0 then exit do
pr+= wr cr
end do
print pr