Oxygen Basic
		Programming => Example Code => General => Topic started by: Charles Pegge on June 03, 2014, 05:41:57 AM
		
			
			- 
				Using the new extended case syntax:
 
 case 34,39,96 : stepquote(s,i) : exit do
 
 
 sys sw   'word position
 sys ascw 'word ascii code
 
 
 function stepquote(string s, sys*i)
 ===================================
 byte b at strptr s
 byte q=b[i]
 do
 i++
 select b[i]
 case 0 : exit do
 case q : i++ : exit do
 end select
 end do
 end function
 
 
 function endline(string s, sys*i)
 =================================
 byte b at strptr s
 do
 select b[i]
 case 0,10,12,13 : exit do
 end select
 i++
 end do
 end function
 
 
 function skipspace(string s, sys*i)
 ==================================
 byte b at strptr s
 sys  a
 do
 a=b[i]
 select a
 case 0         : exit do
 case 33 to 255 : exit do
 end select
 i++
 end do
 end function
 
 
 macro posword(s,i)
 ==================
 byte b at strptr s
 sys  bg,en
 '
 rwordpos:
 '
 skipspace s,i
 ascw=b[i]
 bg=i
 sw=i
 do
 select b[i]
 case 0 to 32  : exit do                      '     whitespace
 case 34,39,96 : stepquote(s,i) : exit do     ' "'` quotemarks
 case 40,41    : i++ : exit do                ' ( ) brackets
 case 59       : endline(s,i) : goto rwordpos ' ;   comment
 end select
 i++
 end do
 en=i
 end macro
 
 
 function stepword(string s,sys*i)
 =================================
 posword(s,i)
 end function
 
 
 function getword(string s, sys*i) as string
 ===========================================
 posword(s,i)
 return mid(s,bg,en-bg)
 end function
 
 
 quick update: ( 18:08 02/06/2014 )
 http://www.oxygenbasic.org/o2zips/Oxygen.zip
 
 
- 
				
 This one can be used for reading Basic words and symbols:
 
 The first part skips white space, the second part captures quoted text, and the third part captures words or individual symbols, according to Basic usage.
 
   function getword(s as string, b as sys) as string
 =================================================
 sys a,c,d,bb,bc,ls
 ls =len s
 if ls=0 then exit function
 if b>ls then exit function
 if b<1  then b=1
 bb=b
 byte bt at b-1+strptr s
 do
 select bt
 case 33 to 255,0 : exit do 'SKIP SPACE
 end select
 b++
 @bt++
 end do
 bc=b
 select bt
 case 34,96 'QUOTE MARKS
 c=bt
 do
 b++
 @bt++
 select bt
 case 0 :      : jmp fwd done
 case c : b++  : jmp fwd done
 end select
 end do
 end select
 do
 select bt
 case 0 to 32
 exit do
 case _
 48 to 57,   'numbers
 65 to 90,   'uppercase
 97 to 122,  'lowercase
 128 to 255, 'higher ascii
 35,46,95 :  'hash dot undescore
 case else
 if b=bc then b++ 'REMAINING SYMBOLS
 exit do
 end select
 b++
 @bt++
 end do
 '
 done:
 '
 if b>bc then return mid s,bc,b-bc
 
 end function
 
- 
				Hi Charles,
 
 How does that last one handle nested double quotes, if at all?
- 
				Hi Mike,
 
 This will handle nested double-quotes if ascii 96 quote marks are used ` outer "inner" `
 
 Oxygen itself uses several word parsers with various parsing rules, mainly for internal data processing.
 
 This makes it possible to define nested quote symbols composed of any combination of characters delimited by white space:
 
 string s
 s= quote """ superquote """
 
 s= quote
 !-!
 my script
 !-!
 
- 
				OK OK Charles,
 
 These are Simple Parsing Functions after all. :)
- 
				
 I have just found 2 anomalies in the the superquote. You are indeed a bugmagnet, Mike. Your merest presence pulls them out of the woodwork :)
- 
				Just tell me "be gone" and I will. Would you? :)
			
- 
				(https://img1.etsystatic.com/034/0/8988668/il_340x270.562933147_prda.jpg)
			
- 
				No. Deadlier.
 
 (http://hobotraps.com/images/spider-black-widow.jpg)
- 
				Bug magnets are good! :)
 
 (http://telcontar.net/Artwork/ThinkNerd/images/Bug-Magnet.png)
- 
				Parsing Utility for general language use
 
 It may be configured for any set of ascii symbol codes and quote marks. But its default settings are for LeanLisp where the only self-terminating symbols are brackets, and token: 40,41, and 255.
 
 inc/ParseUtli.inc
 
 sys    sttw    'word position
 sys    ascb    'word ascii code actual
 sys    ascw    'word ascii code presented
 sys    lenw    'word length
 sys    ntrim   'inhibit-trimlr flag
 sys    opw     'operator token
 
 
 function instrword(sys i,string s,k) as sys
 ===========================================
 sys  lk=len k
 byte b at strptr s
 if not k then return 0
 if i=0 then i=1
 do
 i=instr(i,s,k)
 if i=0 then
 return i
 elseif b[i+lk]<33
 if i=1 or b[i-1]<33
 return i
 end if
 end if
 i+=lk
 end do
 end function
 
 
 function instrb(byte*bb,*bk) as sys
 ===================================
 sys b
 do
 b=bb
 select bk
 case 0    : return 1
 case b    : 'match chars
 case else : return 0
 end select
 @bk++
 @bb++
 end do
 end function
 
 
 function instrev(sys i,string s,k) as sys
 =========================================
 sys  a=asc k
 sys  lk=len k
 sys  ls=len s
 if i=0 then
 i=ls
 elseif i>ls then
 i=ls
 end if
 byte b at strptr s
 byte c at strptr k
 do
 if i<1 then exit do
 select b[i]
 case a : if instrb(b[i],c) then exit do
 end select
 i--
 end do
 return i
 end function
 
 
 function skipspace(string s, sys*i)
 ===================================
 byte b at strptr s
 do
 ascb=b[i]
 ascw=ascb
 select ascb
 case 0         :        : exit do
 case 41        : ascw=0 : exit do
 case 33 to 255 :        : exit do
 end select
 i++
 end do
 end function
 
 
 function skiplspace(string s, sys*i)
 ====================================
 byte b at strptr s
 do
 ascb=b[i]
 ascw=ascb
 select ascb
 case 0,13,10   :        : exit do 'stop at end of lines
 case 41        : ascw=0 : exit do
 case 33 to 255 :        : exit do
 end select
 i++
 end do
 end function
 
 
 function endline(string s, sys*i)
 =================================
 byte b at strptr s
 do
 select b[i]
 case 0,10,12,13 : exit do
 end select
 i++
 end do
 end function
 
 
 function trimlr(string s, sys *i, *le)
 =======================================
 if ntrim then ntrim=0 : return
 sys j=le
 byte b at strptr s
 skipspace s,i
 do
 if j<1 then exit do
 if b[j]>32 then exit do
 j--
 end do
 le=j
 end function
 
 
 
 % SymbolTerm    40,41
 % SymbolQuote   34,39,96
 % SymbolComment 59
 
 macro posword(s,i)
 ==================
 byte b at strptr s
 '
 rwordpos:
 '
 skipspace s,i
 sttw=i
 select ascb 'FIRST CHAR
 case SymbolTerm
 i++ : jmp fwd nwordpos 'brackets    (  )
 case 0
 jmp fwd nwordpos 'END
 case SymbolQuote
 do
 i++
 select b[i]
 case 0    : jmp fwd nwordpos
 case ascw : i++ : jmp fwd nwordpos
 end select
 end do
 jmp fwd nwordpos
 case SymbolComment
 endline(s,i) : jmp rwordpos      ' ;   comment ;
 case 255
 i++ : opw=b[i] : i++ : jmp fwd nwordpos 'EMBEDDED TOKEN
 end select
 do 'MOVE TO WORD BOUNDARY
 select b[i]
 case 0 to 32
 exit do
 case SymbolTerm
 exit do
 case SymbolQuote
 exit do
 case SymbolComment
 exit do
 case 255
 exit do ' embedded token marker
 end select
 i++
 end do
 nwordpos:
 lenw=i-sttw
 end macro
 
 
 function stepword(string s,sys*i) as sys
 ========================================
 posword(s,i)
 end function
 
 
 function getword(string s, sys*i) as string
 ===========================================
 posword(s,i)
 return mid s,sttw,lenw
 end function
 
 
 function stepitem(string sr, sys*i)
 ===================================
 string wr
 sys bc
 stepword(sr,i)
 if ascw<>40 then return 'skip the word only
 'otherwiwise skip block including any nested blocks
 bc++
 do 'STEP OVER NESTED BRACKETS
 stepword(sr,i)
 select ascw
 case 0 :
 if ascb = 41 then
 if bc <= 0 then exit do
 bc--
 else
 exit do
 end if
 case 40 : bc++
 case 41 : bc--
 end select
 if bc=0 then exit do
 end do
 end function
 
 
 function getitem(string sr, sys*i) as string
 ============================================
 sys b,c
 skipspace(sr,i)
 b=i
 c=ascw
 stepitem(sr,i)
 sttw=b
 ascw=c
 return mid sr, b, i-b
 end function
 
 
 function unquote(string s) as string
 ====================================
 ascw=asc s
 if asc(s,-1)=ascw then
 ascw=asc s,2
 return mid s, 2, len(s)-2
 else
 return s
 end if
 end function
 
 
 function inner(string s) as string
 ==================================
 sys i=1, le=len s
 trimlr s,i,le
 i+=1
 le-=2
 if le then return mid s,i,le
 end function
 
 Demo:
 includepath "$/inc/"
 include "ParseUtil.inc"
 
 'DEMO
 =====
 
 string sr,wr,pr,cr=chr(13,10)
 sys i
 sr="(a = 100+b/n) ;comment"
 
 i=1
 pr=""
 do
 wr=getword sr,i
 if ascb=0 then exit do
 pr+= wr cr
 end do
 print pr
 
 
 'CUSTOMISED PARSING:
 ====================
 'using macros used by macro posword
 '
 
 macro SymbolTerm 'override
 ==========================
 36 to 45  , '  $ % & ' ( ) * + , -
 47        , '  /
 58 to 64  , '  : ; < = > ? @
 91 to 94  , '  [ \ ] ^
 123 to 255  '  { | } ~ [del] upper ascii
 end macro
 
 macro SymbolQuote 34   'override
 macro SymbolComment -1 'override (-1 no case)
 
 function StepSymbol(string s,sys*i) as sys
 ==========================================
 posword(s,i)
 end function
 
 function GetSymbol(string s, sys*i) as string
 =============================================
 posword(s,i)
 return mid s,sttw,lenw
 end function
 
 i=1
 pr=""
 do
 wr=GetSymbol sr,i
 if ascb=0 then exit do
 pr+= wr cr
 end do
 print pr