# # KEHOME/src/token.icn # Oct/27/2005 Jul/20/2009 $include "keparam.h" procedure init_parser() #====================== init_char() init_word() init_keyword() # only a minimal set of keywords ? init_group() end global not_dollar global ParserMode # word|line set by init.icn, symbol.icn # CommentCharacter # initially '#' global WordChar # for current word definition #===================================# # pattern matching: # # line -> WORD list -> TOKEN list # #===================================# record WORD ( wtype, # character wvalue # string ) record TOKEN ( ttype, # character tvalue # string ) #=========# # methods # #=========# # get_word(fd,gtype,ps) # map_word(word) # get_token(word,gtype,ps) # map_token(fd,option,ps) # token_type(x) # token_value(x) # token_unparse(x) # init_parser() # called by interpret_line() in symbol.icn # init_parser() # called by knit_init() in knit.icn # init_keyword() # token.icn # init_command() # command.icn # init_parameter() # param.icn # init_quantifier() # token.icn # init_gtype() # token.icn # init_variable() # token.icn # init_char() # char.icn # declare_word() # token.icn # m_dquote() # m_squote() # m_paren() # treat like quote # m_angle() # treat like quote ###### m_slash() # for hierarchy # m_comment() # m_whitespace() # m_dollarvar() # m_separator() # m_assignword() # m_productword() # m_word() global XLINE global SEPARATOR,WhiteSpace,NONSEP global AssignOp,AssignChar global WordChar,xWordChar,NumberChar global DollarNameChar,FileNameChar global CycConstantChar global ListSeparator global mkr_word,HIDDEN global rdf_word,owl_word,cycl_word global dmoz_word,tap_word,cyc_word global CONTEXTword global KEYWORD,CONTROL,VERB,PREPOSITION,OPERATOR,CONJUNCTION global GTYPE,xxGROUP,EXGROUP,INGROUP,LATTICE global QUANTIFIER,QUANT,groupQUANT global VARIABLE,keVARIABLE,PRONOUN global PARAMETER global COMMAND #----------------------------------------------------# # string procedure token_unparse(x,sep) #============================= local y,z /sep := " " case type(x) of { "TOKEN": { y := token_value(x) } "list": { y := unparse(token_value(x),sep) } } return y end # string or list procedure token_type(x) #====================== local y case type(x) of { "TOKEN": { y := x.ttype } "list": { y := [] every put(y,(!x).ttype) } } # end case type return y end # string or list procedure token_value(x) #======================= local y case type(x) of { "TOKEN": { y := x.tvalue } "list": { y := [] every put(y,(!x).tvalue) } } # end case type return y end #===================================================================# #===================================================================# # words # NOTE processing sequence: # get_word() # map_word() # get_token() # map_token() # get_symbol() # map_symbol() # WORD list procedure get_word(fd,pgtype,ps) #============================== # fd ::= file | string | list of string # wtype ::= kutoken | other token type table # ps ::= "ke" | "ksc" # called by get_token() in token.icn # called by parse_list() in word.icn # suspend WORD list for this line # low level parse local ttype,wtype local t,line local wordlist,word,val local matchline,mapline static info,warning,ierror static SaveStack initial { /SaveStack := [] info := "INFO: get_word: " warning := "WARNING: get_word: " ierror := "Internal ERROR: get_word: " } push(SaveStack,KFORMAT) /fd := myin /pgtype := "ku" /ps := "ke$ " case pgtype of { "ku": { ttype := mkr_ttype; wtype := mkr_wtype } "ho": { ttype := ho_ttype; wtype := mkr_wtype } "nrel": { ttype := nrel_ttype; wtype := mkr_wtype } default:{ ttype := mkr_ttype; wtype := mkr_wtype } } # end case pgtype every line := prompt(fd) do { if DEBUG==("WORD"|"PARSE") then { writes_type_all(line,info||"input line") } case t := type(line) of { default: { writes_type_all(line,ierror||"unexpected type line") #return [] fail } ("string"|"integer"): { if *line = 0 then { if DEBUG == "NULL" then writes_type_all(line,warning||"empty string line") suspend [WORD("w","")] next } } } # end case t wordlist := [] matchline := line mapline := "" matchline ? { while word := ( WORD("B",m_whitespace()) | WORD("w",m_cyc_constant()) | # must precede m_comment() WORD("c",m_comment()) | WORD("Q",m_dquote()) | WORD("q",m_squote()) | WORD("w",m_kuword()) | WORD("s",m_separator()) | WORD("a",m_angle()) | WORD("b",m_paren()) | #####WORD("h",m_htmlword()) | WORD("$",m_dollarvar()) | WORD("R",m_product_word()) | # must precede m_dcolon() WORD("S",m_assign_word()) | WORD("L",m_dcolon()) #| # : not currently a separator ####### WORD("/",m_slash()) | # for hierarchy ) #####& tab(0) do { # phase 1: map wtype for get_token() case word.wtype of { #####"h": { # html ##### if DEBUG==("HTML"|"HTM"|"XML"|"RDF") then { ##### writes_type(mybug,word,info||"html: word") ##### writes_type(mylog,word,info||"html: word") ##### } # end if DEBUG ##### } # end "h" ("q"|"Q"): { # quote val := word.wvalue if *val = 1 then { if DEBUG=="QUOTE" then { writes_type(mybug,val,warning||"isolated quote mark") writes_type(mylog,val,warning||"isolated quote mark") } word.wtype := "w" } # end if *val } # end "q"|"Q" ("a"|"b"): { } # angle & paren "c": { # comment if DEBUG=="COMMENT" then writes_type_all(word,info||"parsed comment word") #####case CommentMode of { #####default: { word.wtype := "c" } #####"cyc": { word.wtype := "w" } # CycL constant '#$name' #####} } "B": { } # white space "R": { } # production operator "S": { } # assignment operator "L": { } # dcolon "W": { } # WordNet => "X": { } # WordNet -- "s": { # other separators # [,] {;} () <> word.wtype := ttype[word.wvalue] } #####"=": { ##### case word.wvalue of { ##### } ##### } "w": { # recognize end & exit case word.wvalue of { "end": { word.wtype := ttype[word.wvalue] } # "Z" "exit": { word.wtype := ttype[word.wvalue] } # "z" #####"::": { word.wtype := wtype[word.wvalue] } # "L" #####":=": { word.wtype := wtype[word.wvalue] } # "R" #####"+:=": { word.wtype := wtype[word.wvalue] } # "R" #####"-:=": { word.wtype := wtype[word.wvalue] } # "R" #####"*:=": { word.wtype := wtype[word.wvalue] } # "R" #####"=>": { word.wtype := ttype[word.wvalue] } # "W" #####"--": { word.wtype := ttype[word.wvalue] } # "X" } # recognize question variable & slash case word.wvalue[1] of { default: { } "?": { word.wtype := ttype[word.wvalue] } "/": { word.wtype := ttype[word.wvalue] } "\\":{ word.wtype := ttype[word.wvalue] } } word.wvalue := map_word(word.wvalue) # token.icn ##### don't substitute yet #####if member(wtype,word.wvalue) then ##### word.wtype := wtype[word.wvalue] } } # end case word.wtype if DEBUG == "WORD" then writes_type_all(word,info||"word") put(wordlist,word) mapline ||:= word.wvalue } # end do } # end line ? if DEBUG==("WORD"|"PARSE") then { writes_type(mybug,wordlist,info||"output wordlist") writes_type(mylog,wordlist,info||"output wordlist") } if mapline==line then { } else { if DEBUG=="MAP_WORD" then { writes_type(mybug,mapline,warning||"map_word change: mapline") writes_type(mylog,mapline,warning||"map_word change: mapline") } # end if DEBUG } # end if mapline #if DEBUG==("READ"|"LINE"|"WORD"|"TOKEN"|"SYMBOL") then writes_type(mylog,wordlist,info||"wordlist") case ParserMode of { default: { writes_type(myerr,ParserMode,ierror||"unexpected ParserMode") writes_type(mylog,ParserMode,ierror||"unexpected ParserMode") fail } "word": { every word := !wordlist do suspend [word] } "line": { suspend wordlist } } } # end every line KFORMAT := pop(SaveStack) end # string procedure map_word(word) #======================= local newword,junk static info initial { info := "INFO: map_word: " #####junk := dc2mkr("dc:title") #####junk := rdf2mkr("rdfs:Class") #####junk := owl2mkr("owl:Thing") #####junk := mcf2mkr("Node") #####junk := xml2mkr("&owl;") } case KMAPWORD of { default: { return word } ("NO"|"no"): { return word } ("YES"|"yes"): { } } case UniqueName of { default: { } "shortname": { } "classname": { } "rootname": { } "qualname": { return word } } # end case UniqueName newword := word case word[1] of { "/": { newword := word[2:0] } } #####if member(xml_word,newword) then { ##### newword := xml2mkr(newword) # xml.icn #####} else if member(words_dc,newword) then { ##### newword := dc2mkr(newword) # xml.icn #####} else if member(words_rdf,newword) then { ##### newword := rdf2mkr(newword) # xml.icn #####} else if member(words_owl,newword) then { ##### newword := owl2mkr(newword) # xml.icn #####} else if member(words_mcf,newword) then { ##### newword := mcf2mkr(newword) # xml.icn #####} case word[1] of { "/": { newword := "/"||newword } } #if DEBUG=={"MAP"|"MAPWORD") then if newword ~== word then writes_all([TypeComment||info||"word <",word,"> => newword <",newword,">"]) #} return newword end # string procedure m_whitespace() #======================= # low level parse get_word() # NOTE: get_word() sets wtype := "B" # NOTE: get_token() sets ttype := "B" # NOTE: map_token() deletes ttype "B" initial { WhiteSpace := ' \t\v\r\n\f' } suspend \ tab(many(WhiteSpace)) end # string procedure m_separator() #====================== # low level parse get_word() # NOTE: get_word() sets wtype := "S" or individual character # NOTE: get_token() sets ttype := "S" or individual character # NOTE: map_token() changes ttype from "S" to individual character initial { /SEPARATOR := "\"\'" ++ '[,]' ++ '{;}' ++ '()' ++ '|' ##### ++ '<>' } suspend \ tab(any(SEPARATOR)) end ###### string #####procedure m_end() ######================ #####suspend \ ##### ="end" #####end # string procedure m_assign_word() #======================== # low level parse get_word() # NOTE: get_word() sets wtype := "=" # NOTE: get_token() sets ttype := "=" # NOTE: map_token() leaves ttype := "=" static AssignOp,AssignChar initial { AssignOp := "=" AssignChar := '+-*' } suspend \ (tab(any(AssignChar)) || =AssignOp) | (=AssignOp) end # string procedure m_product_word() #========================= # low level parse get_word() # NOTE: get_word() sets wtype := "R" # NOTE: get_token() sets ttype := "R" # NOTE: map_token() leaves ttype := "R" static ProductOp,ProductChar initial { ProductOp := ":=" ProductChar := "+-*" } suspend \ (tab(any(ProductChar)) || =ProductOp) | (=ProductOp) end # string procedure m_dcolon() #=================== # match any number of ":" # low level parse get_word() # NOTE: get_word() sets wtype := "L" # NOTE: get_token() sets ttype := "L" # NOTE: map_token() leaves ttype := "L" static colon initial { colon := ':' } suspend \ tab(many(colon)) end ###### string #####procedure m_word() ######================= ###### low level parse ###### NOTE: get_word() sets wtype := "w" ###### NOTE: get_token() sets ttype := "w" ###### NOTE: map_token() leaves ttype := "w" #####suspend \ ##### tab(many(WordChar)) #####end #===================================================================# #===================================================================# # extended words # string procedure m_xword() #================== # intermediate level parse # words & weak separators # NOTE: m_xword() includes "w", "q","Q", "+","-","*",":" # NOTE: get_token() sets ttype := "x" # NOTE: map_token() changes ttype from "x" to "w" suspend \ tab(many(xWordChar)) end # string procedure m_dollarvar() #====================== # dollar variable # low level parse get_word() suspend \ ="$"||tab(many(DollarNameChar)) | ="${"||tab(many(DollarNameChar))||="}" end # string procedure m_cyc_constant() #========================= # cyc constant # low level parse get_word() suspend \ ="#$"||tab(many(CycConstantChar)) end # string procedure m_filename() #===================== # high level parse # legal file name suspend \ tab(many(FileNameChar)) end # string procedure m_dotvar() #=================== # dot variable (pronouns) suspend \ ="..." | =".." | ="." end # string procedure m_number() #=================== # must begin with a digit suspend \ ( tab(any(&digits)) ) | ( tab(any(&digits)) || tab(many(NumberChar)) ) end # string procedure m_dquote() #=================== # low level parse # match shortest string "..." # include empty string "" # NOTE: get_word() sets wtype := "Q" # NOTE: get_token() includes "Q" in ttype "x" # NOTE: map_token() changes ttype from "x" to "w" # NOTE: multi-line dquote -- see m_quotemark() in myio.icn static dquote,QChar initial { dquote := "\"" QChar := &cset -- dquote } suspend \ #####( =dquote || tab(upto(dquote)\1) || =dquote ) | ( m_quotemark() || tab(many(QChar)) || m_quotemark() ) | ( =dquote || =dquote ) | # empty dquote ( =dquote ) # isolated dquote end # string procedure m_squote() #=================== # low level parse # match shortest string '...' # include empty string '' # NOTE: apostrophe has no matching squote # NOTE: get_word() sets wtype := "q" # NOTE: get_token() includes "q" in ttype "x" # NOTE: map_token() changes ttype from "x" to "w" static squote,qChar initial { squote := "\'" qChar := &cset -- squote } suspend \ #####( =squote || tab(upto(squote)\1) || =squote ) | ( =squote || tab(many(qChar)) || =squote ) | ( =squote || =squote ) | # empty squote ( =squote ) # apostrophe end # string procedure m_paren() #================== # low level parse # match shortest string (...) # include empty string () # NOTE: apostrophe has no matching squote # NOTE: get_word() sets wtype := "b" # NOTE: get_token() includes "b" in ttype "x" # NOTE: map_token() changes ttype from "x" to "w" static lparen,rparen,qChar initial { lparen := "(" rparen := ")" qChar := &cset -- rparen } suspend \ #####( =lparen || tab(upto(rparen)\1) || =rparen ) | ( =lparen || tab(many(qChar)) || =rparen ) | ( =lparen || =rparen ) # empty squote end # string procedure m_angle() #================== # low level parse # match shortest string <...> # include empty string () # NOTE: get_word() sets wtype := "a" # NOTE: get_token() includes "a" in ttype "x" # NOTE: map_token() changes ttype from "x" to "a" static langle,rangle,qChar initial { langle := "<" rangle := ">" qChar := &cset -- langle -- rangle } suspend \ #####( =langle || tab(upto(rangle)\1) || =rangle ) | ( =langle || tab(many(qChar)) || =rangle ) | ( =langle || =rangle ) # empty angle end # string procedure m_comment(sharp) #========================= # low level parse # comment ::= CommentCharacter not_dollar arb EndOfLine # caution: cyc uses "#$" prefix for constant # NOTE: get_word() sets wtype := "c" or "w" # NOTE: get_token() sets ttype := "c" or "w" # NOTE: map_token() deletes ttype "c" # # NOTE: some comments removed by prompt() in myio.icn # see trimcomment() in word.icn initial { /CommentCharacter := '#' #####/CommentMode := "mke" /not_dollar := &cset -- '$' } ##/sharp := CommentCharacter sharp := "#" #####case CommentMode of { #####default: { suspend \ #####( =sharp ) | ( =sharp || tab(any(not_dollar)) || tab(0) ) ##### } #####"cyc": { # '#$name' ##### suspend \ ##### ( =sharp || tab(many(WordChar)) ) | ##### ( =sharp ) ##### } #####} end # string procedure m_nvsep() #================== # assignment # for get_token() suspend \ ="+=" | ="-=" | ="*=" | ="=" end # string procedure m_sphrase() #==================== # NOTE: symbol.icn now uses m_lphrase instead of m_sphrase # intermediate level parse (whitespace is now "B") # consecutive words excluding MKR words (is,has,do, ...) # use word or qword ??? # NOTE: sphrase isa string (token.icn) # lphrase isa list (symbol.icn) # NOTE: pword defined in symbol.icn (excludes is,has,do,...) # NOTE: blank removed by map_symbol() in symbol.icn # sphrase ::= # pword # pword whitespace sphrase suspend \ ( m_qword() ) | ( m_qword() || ="B" || m_sphrase() ) end # string procedure Blank(s) #================= # "replace" string by whitespace return "B" end # string procedure m_slash() #================== # for hierarchy suspend \ tab(many("/")) | tab(many("\\")) end #-----------------------------------------------# #-----------------------------------------------# #-----------------------------------------------# procedure init_word() #==================== # called by knit_init() in knit.icn # define global parsing variables # SEPARATOR,WhiteSpace, WEAKSEP,NONSEP # WordChar,xWordChar,NumberChar, FileNameChar # mkr_word,HIDDEN # rdf_word,owl_word,cycl_word # dmoz_word,tap_word,cyc_word # BinaryRelation # KEYWORD,CONTROL,VERB,PREPOSITION,CONJUNCTION,OPERATOR # QUANTIFIER,QUANT,groupQUANT # GTYPE,xxGROUP,EXGROUP,INGROUP # VARIABLE,keVARIABLE,PRONOUN # PARAMETER,COMMAND # special_ctype,special_charname # legal_utype,legal_chartype local sep,ws,word,param,cmd initial { CommentCharacter := '#' } NONSEP := '/' ++ # hierarchy,filename (/) '\\' ++ # hierarchy,filename (/) '~' ++ # logic,filename (~) '&' ++ # logic (&) '|' ++ # logic (|) '?' ++ # question (?) '$' ++ # dollar variable ($) '.' ++ # dot variable,number (.) '!' ++ # sh command (!) '+' ++ # set add (+) '-' ++ # set delete (-) '*' ++ # wildcard,set intersection (*) ':' # production,format,view (:) SEPARATOR := ###'$' ++ # dollar variable ($) "\"" ++ # dquote (Q) "\'" ++ # squote (q) ###CommentCharacter ++ # comment (c) ###'<>' ++ # HTML command (S) '[,]' ++ # comma list (S) '{;}' ++ # semicolon list (S) '()' ++ # precedence (q) '|' # pipeline (|) AssignOp := '=' # assignment (=) WhiteSpace := ' ' ++ # blank (B) '\t' ++ # tab (B) '\v' ++ # vertical tab (B) '\r' ++ # linereturn (B) '\n' ++ # newline (B) '\f' # newpage (B) # low level parse WordChar := &cset -- WhiteSpace -- SEPARATOR -- AssignOp DollarNameChar := WordChar -- NONSEP CycConstantChar := WordChar -- NONSEP AssignChar := '+-*' ++ '&|~' ++ ':' # intermediate level parse xWordChar := 'w' ++ 'qQ' # high level parse ListSeparator := "," FileNameChar := &letters ++ &digits ++ '/\\:.~ ' NumberChar := '.' ++ &digits #WordChar := &cset -- WhiteSpace -- '[,];' -- "=" -- '{}' --'<>' -- '()' #-----------------------------------------------------# #-----------------------------------------------------# # token types mkr_word := table() rdf_word := table() owl_word := table() cycl_word := table() dmoz_word := table() tap_word := table() cyc_word := table() BinaryRelation := table() # see binrel.icn #====================# # OpenCyc vocabulary # #====================# # Classes insert(cyc_word,"Thing","w") # existent insert(mkr_word,"Thing","w") # existent # Properties insert(cyc_word,"#$genlMt","V") # Mt1 #$genlMt Mt2 insert(cyc_word,"#$specMt","V") # Mt1 #$specMt Mt2 insert(mkr_word,"genlmt","V") # Mt1 #$genlMt Mt2 insert(mkr_word,"specmt","V") # Mt1 #$specMt Mt2 insert(mkr_word,"genlmt*","V") # Mt1 #$genlMt Mt2 insert(mkr_word,"specmt*","V") # Mt1 #$specMt Mt2 insert(mkr_word,"genlmt**","V") # Mt1 #$genlMt Mt2 insert(mkr_word,"specmt**","V") # Mt1 #$specMt Mt2 insert(mkr_word,"genlmt**?","?") # Mt1 #$genlMt Mt2 insert(mkr_word,"specmt**?","?") # Mt1 #$specMt Mt2 #====================# # RDF/OWL vocabulary # #====================# # Classes insert(owl_word,"Thing","w") # existent insert(owl_word,"Nothing","w") # nonexistent insert(owl_word,"owl:Thing","w") # existent insert(owl_word,"owl:Nothing","w") # nonexistent insert(owl_word,"owl:Class","w") # set of concepts insert(owl_word,"owl:DatatypeProperty","w") # attribute insert(owl_word,"owl:ObjectProperty","w") # relation part interaction insert(owl_word,"owl:Ontology","w") # view insert(owl_word,"owl:Restriction","w") # differentia insert(owl_word,"owl:FunctionalProperty","w") # subcharacteristic insert(owl_word,"owl:InverseFunctionalProperty","w") # subcharacteristic insert(owl_word,"owl:SymmetricProperty","w") # subcharacteristic insert(owl_word,"owl:TransitiveProperty","w") # subcharacteristic insert(owl_word,"owl:AllDifferent","w") # insert(owl_word,"owl:DeprecatedClass","w") # insert(owl_word,"owl:DeprecatedProperty","w") # # Properties insert(owl_word,"owl:sameAs","i") # is insert(owl_word,"owl:sameIndividualAs","i") # is insert(owl_word,"owl:equivalentClass","V") # maybe is insert(owl_word,"owl:equivalentProperty","V") # maybe is insert(owl_word,"owl:oneOf","V") # isany insert(owl_word,"owl:unionOf","V") # isunion insert(owl_word,"owl:intersectionOf","V") # isintersection insert(owl_word,"owl:complementOf","V") # is not insert(owl_word,"owl:inverseOf","V") # inverse insert(owl_word,"owl:onProperty","w") # insert(owl_word,"owl:cardinality","w") # insert(owl_word,"owl:maxCardinality","w") # insert(owl_word,"owl:minCardinality","w") # insert(owl_word,"owl:allValuesFrom","w") # insert(owl_word,"owl:someValuesFrom","w") # insert(owl_word,"owl:differentFrom","V") # insert(owl_word,"owl:disjointWith","V") # insert(owl_word,"owl:distinctMembers","V") # insert(owl_word,"owl:backwardCompatibleWith","V") # insert(owl_word,"owl:incompatibleWith","V") # insert(owl_word,"owl:priorVersion","w") # insert(rdf_word,"rdfs:Class","w") # set of concepts insert(rdf_word,"rdfs:Resource","w") # existent insert(rdf_word,"rdf:Property","w") # characteristic insert(rdf_word,"rdf:type","V") # has type= <=> isu insert(rdf_word,"rdfs:subClassOf","V") # iss* insert(rdf_word,"rdfs:subPropertyOf","V") # iss* insert(rdf_word,"rdfs:domain","w") # insert(rdf_word,"rdfs:range","w") # #================# # non separators # #================# insert(mkr_word,"?","?") # question insert(mkr_word,"/","/") # hierarchy,filename,HTML insert(mkr_word,"/","/") # hierarchy,filename,HTML #insert(mkr_word,"/","S") # hierarchy,filename,HTML (strong separator) insert(mkr_word,"\\","/") # hierarchy,filename,HTML insert(mkr_word,"$","$") # dollar variable #insert(mkr_word,".",".") # dot variable,number insert(mkr_word,"::","L") # named proposition # assign words insert(mkr_word,":=","R") # production insert(mkr_word,"+:=","R") # production insert(mkr_word,"-:=","R") # production insert(mkr_word,"*:=","R") # production insert(mkr_word,"=","=") # assignment,production insert(mkr_word,"+","+") # assignment,word insert(mkr_word,"-","-") # assignment,word insert(mkr_word,"*","*") # assignment,word, wildcard insert(mkr_word,":",":") # assignment,production, format, view insert(mkr_word,"*","*") # assignment,wildcard insert(mkr_word,"!","D") # sh command insert(mkr_word,"~","~") # assignment,not insert(mkr_word,"&","&") # assignment,and #============# # separators # #============# # quote, comment #insert(mkr_word,"\'","q") # squote for get_word() #insert(mkr_word,"\"","Q") # dquote for get_word() #insert(mkr_word,"(","b") # paren for get_word() #insert(mkr_word,")","b") # paren for get_word() #insert(mkr_word,"<","a") # angle for get_word() #insert(mkr_word,">","a") # angle for get_word() #insert(mkr_word,CommentCharacter,"c") # comment for get_word() # strong separators insert(mkr_word,",","S") # list of phrase insert(mkr_word,";","S") # list of proposition insert(mkr_word,"[","S") # list of phrase insert(mkr_word,"]","S") # list of phrase ###insert(mkr_word,"[","[") # list of phrase ###insert(mkr_word,"]","]") # list of phrase insert(mkr_word,"{","S") # list of proposition insert(mkr_word,"}","S") # list of proposition insert(mkr_word,"(","S") # precedence & quote insert(mkr_word,")","S") # precedence & quote insert(mkr_word,"<","S") # HTML & multi-line quote insert(mkr_word,">","S") # HTML & multi-line quote #============# # whitespace # #============# insert(mkr_word," ","B") # blank insert(mkr_word,"\t","B") # tab #insert(mkr_word,"\v","B") # vertical tab insert(mkr_word,"\r","B") # linereturn insert(mkr_word,"\n","B") # newline insert(mkr_word,"\f","B") # newpage #=======# # words # #=======# # generic names for grammar examples #####insert(mkr_word,"proposition","w") #####insert(mkr_word,"production","w") #####insert(mkr_word,"sentence","w") #####insert(mkr_word,"statement","w") #####insert(mkr_word,"question","w") #####insert(mkr_word,"command","w") # do command od arg done; #####insert(mkr_word,"arg","w") # do command od arg done; #####insert(mkr_word,"relverb","B") # subject relverb object; #####insert(mkr_word,"subject","w") #####insert(mkr_word,"verb","w") #####insert(mkr_word,"object","w") #####insert(mkr_word,"preposition","P") # action insert(mkr_word,"do","D") # action insert(mkr_word,"ido","D") # interaction insert(mkr_word,"DO","D") # axiomatic level insert(mkr_word,"!","D") # UNIX shell insert(mkr_word,"can","w") # capability ###insert(mkr_word,"can","D") # capability ###insert(mkr_word,"do*","D") # capability insert(mkr_word,"vdo","D") # all views insert(mkr_word,"hdo","D") # hierarchy insert(mkr_word,"done","d") # preposition insert(mkr_word,"at","A") # context ==> use "A" #insert(mkr_word,"at","P") # context ==> use "A" insert(mkr_word,"out","P") # product insert(mkr_word,"of","P") # domain (part) insert(mkr_word,"with","P") # modifier (also definition) insert(mkr_word,"od","P") # direct object insert(mkr_word,"from","P") # initial insert(mkr_word,"to","P") # final insert(mkr_word,"in","P") # array (index) # hierarchy insert(mkr_word,"is","V") # alias (also definition) insert(mkr_word,"syn","V") # synonym insert(mkr_word,"isu","V") # unit isu genus (also definition) insert(mkr_word,"iss","V") # species iss genus (also definition) insert(mkr_word,"isa","V") # unit|species isa genus (also definition) insert(mkr_word,"isp","V") # genus isp unit insert(mkr_word,"isg","V") # genus isg species insert(mkr_word,"isc","V") # genus isc unit|species insert(mkr_word,"isa+","V") # 1 or more levels insert(mkr_word,"isc+","V") # 1 or more levels insert(mkr_word,"is*","V") # 0 or more levels insert(mkr_word,"isa*","V") # 0 or more levels insert(mkr_word,"isc*","V") # 0 or more levels insert(mkr_word,"iss*","V") # 0 or more levels insert(mkr_word,"isg*","V") # 0 or more levels insert(mkr_word,"isu*","V") # 0 or more levels insert(mkr_word,"isp*","V") # 0 or more levels insert(mkr_word,"isa**","l") # followed by integer insert(mkr_word,"isc**","l") # followed by integer insert(mkr_word,"isa**?","?") # question verb insert(mkr_word,"isc**?","?") # question verb # attribute insert(mkr_word,"iswith","V") # differentia insert(mkr_word,"has","V") insert(mkr_word,"HAS","V") # part insert(mkr_word,"haspart","V") insert(mkr_word,"isapart","V") insert(mkr_word,"haspart*","V") insert(mkr_word,"isapart*","V") # relation insert(mkr_word,"rel","V") ##insert(mkr_word,"nrel","V") ##insert(mkr_word,"trel","V") ##insert(mkr_word,"brel","V") ##insert(mkr_word,"urel","V") insert(mkr_word,"isin","V") # assignment insert(mkr_word,"let","r") insert(mkr_word,"vlet","r") insert(mkr_word,"unlet","r") # NSM concepts insert(mkr_word,"causes","V") # cause-effect insert(mkr_word,"because","V") # NSM effect-cause insert(mkr_word,"like","V") # NSM similarity insert(mkr_word,"happens","D") # NSM happen insert(mkr_word,"happensod","B")# NSM happen insert(mkr_word,"before","V") # NSM time insert(mkr_word,"after","V") # NSM time insert(mkr_word,"above","V") # NSM space insert(mkr_word,"below","V") # NSM space insert(mkr_word,"beside","V") # NSM space insert(mkr_word,"inside","V") # NSM space insert(mkr_word,"outside","V") # NSM space insert(mkr_word,"causes*","V") # cause-effect insert(mkr_word,"because*","V") # effect-cause insert(mkr_word,"like*","V") # NSM similarity insert(mkr_word,"happensod*","B")# NSM happen insert(mkr_word,"before*","V") # time insert(mkr_word,"after*","V") # time insert(mkr_word,"above*","V") # space insert(mkr_word,"below*","V") # space insert(mkr_word,"beside*","V") # space insert(mkr_word,"inside*","V") # space insert(mkr_word,"outside*","V") # space # NSM view #insert(mkr_word,"maybe","w") # view # generator verb insert(mkr_word,"in","P") # x in concept list # exgroup, ingroup insert(mkr_word,"isalt","V") # alternative isalt exgroup insert(mkr_word,"isany","V") # exgroup isany alternative insert(mkr_word,"isall","V") # ingroup isall member insert(mkr_word,"ismem","V") # member ismem ingroup # for OWL insert(mkr_word,"isand","V") # intersection (requisite) insert(mkr_word,"ismem","V") # intersection (requisite) insert(mkr_word,"isor","V") # union insert(mkr_word,"isxor","V") # disjoint union insert(mkr_word,"xor","V") # differentFrom insert(mkr_word,"ismem","V") # union insert(mkr_word,"isand","V") # restriction ??? insert(mkr_word,"ismem","V") # restriction ??? insert(mkr_word,"isnon","V") # complement wrt genus insert(mkr_word,"isnot","V") # complement wrt existent insert(mkr_word,"inverse","V") # inverse insert(mkr_word,"isalt*","V") # alternative isalt* exgroup insert(mkr_word,"isany*","V") # exgroup isany* alternative insert(mkr_word,"isall*","V") # ingroup isall* member insert(mkr_word,"ismem*","V") # member ismem* ingroup # concept formation insert(mkr_word,"isd","V") # differentiate insert(mkr_word,"isi","V") # integrate insert(mkr_word,"means","V") insert(mkr_word,"isref","V") # is referent of insert(mkr_word,"means*","V") insert(mkr_word,"isref*","V") # groups insert(mkr_word,"begin","Y") # group definition insert(mkr_word,"end","Z") # group definition insert(mkr_word,"hierarchy","w") # hierarchy,lattice insert(mkr_word,"unithierarchy","w") # unithierarchy (backwards compatibility) insert(mkr_word,"relation","w") # relation insert(mkr_word,"group","w") # group,triple,mcf,... # control structure insert(mkr_word,"exit","z") # exit KE insert(mkr_word,"break","z") # exit every,while,until,when insert(mkr_word,"return","r") # return Product=value; insert(mkr_word,"suspend","r") # suspend Product=value; (Unicon generator) insert(mkr_word,"if","I") # conditional insert(mkr_word,"then","T") # conditional insert(mkr_word,"else","E") # conditional insert(mkr_word,"fi","F") # conditional insert(mkr_word,"iff","J") # conjunction insert(mkr_word,"|","|") # conjunction: pipeline - treat like ";" insert(mkr_word,"for","G") # iteration insert(mkr_word,"every","G") # iteration insert(mkr_word,"while","I") # iteration insert(mkr_word,"until","I") # iteration insert(mkr_word,"when","I") # monitor events # other words # NSM words insert(mkr_word,"good","w") # NSM evaluator insert(mkr_word,"bad","w") # NSM evaluator insert(mkr_word,"big","w") # NSM descriptor insert(mkr_word,"small","w") # NSM descriptor insert(mkr_word,"very","Q") # NSM intensifier insert(mkr_word,"more","Q") # NSM augmentor insert(mkr_word,"near","w") # NSM space distance insert(mkr_word,"far","w") # NSM space distance insert(mkr_word,"long","w") # NSM time duration insert(mkr_word,"short","w") # NSM time duration #insert(mkr_word,"some","Q") # NSM time duration insert(mkr_word,"and","j") # logic insert(mkr_word,"or","j") # logic insert(mkr_word,"not","N") # complement wrt existent insert(mkr_word,"non","N") # complement wrt genus insert(mkr_word,"a","Q") # quantifier insert(mkr_word,"all","Q") # ingroup quantifier insert(mkr_word,"any","Q") # exgroup quantifier insert(mkr_word,"either","Q") # exgroup quantifier insert(mkr_word,"no","Q") # quantifier insert(mkr_word,"some","Q") # quantifier insert(mkr_word,"the","Q") # quantifier ##insert(mkr_word,"exists","G") # KIF first order logic #####insert(mkr_word,"forall","G") # KIF first order logic #####insert(mkr_word,"forany","G") # MKR first order logic #####insert(mkr_word,"forSome","G") # OWL first order logic #####insert(mkr_word,"forAll","G") # OWL first order logic insert(mkr_word,"implies","J") # logic #####insert(mkr_word,"|-","J") # implies #insert(mkr_word,"delete","w") # do delete ... done # iQ,iG words for reparsing compound statements #insert(mkr_word,"iseither","V") #insert(mkr_word,"isthe","V") # ig,ih,ir words for exgroup and ingroup #insert(mkr_word,"isconcept","V") #insert(mkr_word,"isset","V") #insert(mkr_word,"islist","V") #insert(mkr_word,"issequence","V") #insert(mkr_word,"isrelation","V") #insert(mkr_word,"ishierarchy","V") #insert(mkr_word,"islattice","V") #insert(mkr_word,"isdirectory","V") #insert(mkr_word,"isconcept","V") # ad-hoc additions for parsing English phrases #============================================= #####insert(mkr_word,"for","j") # conjunction insert(mkr_word,"vs.","j") # conjunction # end mkr_word #-----------------------------------------------------# #-----------------------------------------------------# init_keyword() # token.icn init_gtype() # token.icn init_quantifier() # token.icn init_variable() # token.icn init_parameter() # param.icn init_command() # command.icn init_char() # char.icn # ignore hidden words when dumping concepts every sep := string(!SEPARATOR) do insert(HIDDEN,sep) delete(HIDDEN,".") every ws := !WhiteSpace do insert(HIDDEN,ws) #####every word := key(mkr_word) do ##### insert(HIDDEN,word) delete(HIDDEN,"let") delete(HIDDEN,"vlet") delete(HIDDEN,"unlet") every param := !PARAMETER do insert(HIDDEN,param) every cmd := ! COMMAND do insert(HIDDEN,cmd) # mental actions - know,believe,... # action object is (may be) proposition list # kaction is action with ktype=kt # declared by tabrasa.def, user kaction_set := set() # identification actions # <== no longer used identify_set := set([ "identify","perceive","classify","measure","define", "see","hear","touch","smell","taste" ]) # initialize rdf map #####rdf2mkr("rdf:type") end procedure declare_word() #======================= # declare special words # called by initialize_knit() in knit.icn add_separator() # token.icn add_keyword() # token.icn add_quantifier() # token.icn add_gtype() # token.icn add_variable() # token.icn add_parameter() # param.icn add_command() # command.icn end #===================================================================# #===================================================================# # KFORMAT == "ku" # #===================================================================# ###### TOKEN #####procedure m_endtoken() ######===================== ###### block end for hierarchy|relation|directory #####suspend \ ##### TOKEN("Z",="end") # begin end #####end # TOKEN list procedure m_kulist() #=================== # kulist ::= # kutoken # kutoken kulist suspend [m_kutoken()] | [m_kutoken()] ||| m_kulist() end # TOKEN procedure m_kutoken() #==================== # intermediate level parse # NOTE: "q","Q","w" included in xword ("$" ???) # NOTE: map_token() changes "x" to "w" # NOTE: map_token() deletes comment "c" # NOTE: map_token() deletes whitespace "B" suspend \ TOKEN("?", ( ="?" )) | # question variable TOKEN("/", m_slash()) | # hierarchy, HTML end #TOKEN("$", m_dollarvar()) | # dollar variable #TOKEN("$", ( ="$w" )) | # dollar variable #TOKEN("$", ( ="${w}" )) | # dollar variable #TOKEN("$", ( ="$R" )) | # dollar variable TOKEN("$", ( ="$" )) | # dollar variable #TOKEN("n", m_number()) | # integer or real #TOKEN(".", m_dotvar()) | # dot pronoun (included in $variable) #TOKEN(".", ( ="..." )) | # dot pronoun (included in $variable) #TOKEN(".", ( =".." )) | # dot pronoun (included in $variable) #TOKEN(".", ( ="." )) | # dot pronoun (included in $variable) ###TOKEN("h", ( ="h" )) | # HTML command TOKEN("c", ( ="c" )) | # comment #... TOKEN("B", ( ="B" )) | # whitespace TOKEN(",", ( ="," )) | # list TOKEN(";", ( =";" )) | # list TOKEN("!", ( ="!" )) | # sh command TOKEN("{", ( ="{" )) | # strong separators TOKEN("}", ( ="}" )) | # strong separators TOKEN("[", ( ="[" )) | # strong separators TOKEN("]", ( ="]" )) | # strong separators TOKEN("(", ( ="(" )) | # strong separators TOKEN(")", ( =")" )) | # strong separators TOKEN("<", ( ="<" )) | # strong separators TOKEN(">", ( =">" )) | # strong separators TOKEN("|", ( ="|" )) | # strong separators TOKEN("L", ( ="L" )) | # propname :: #TOKEN("L", ( ="::" )) | # propname :: TOKEN("W", ( ="W" )) | # WordNet => TOKEN("X", ( ="X" )) | # WordNet -- ##TOKEN("R", ( ="::=" )) | # BNF production ::= TOKEN("R", ( ="R" )) | # production := #TOKEN("R", ( =":=" )) | # production := #TOKEN("R", ( ="+:=" )) | # production := #TOKEN("R", ( ="-:=" )) | # production := #TOKEN("R", ( ="*:=" )) | # production := TOKEN("S", ( ="S" )) | # assignment #TOKEN("=", m_nvsep()) | # assignment = #TOKEN("=", ( ="+=" )) | # assignment += #TOKEN("=", ( ="-=" )) | # assignment -= #TOKEN("=", ( ="*=" )) | # assignment *= ##TOKEN("=", ( ="=" )) | # assignment = TOKEN("x", m_xword()) | # (wqQ) TOKEN("x", ( ="+" )) | # weak separator TOKEN("x", ( ="-" )) | # weak separator TOKEN("x", ( ="*" )) | # weak separator TOKEN("x", ( =":" )) | # weak separator TOKEN("q", ( ="q" )) | # squote '...' TOKEN("Q", ( ="Q" )) | # dquote "..." TOKEN("a", ( ="a" )) | # angle <...> TOKEN("b", ( ="b" )) | # paren (...) TOKEN("Y", ( ="Y" )) | # begin group TOKEN("Z", ( ="Z" )) | # end group TOKEN("z", ( ="z" )) | # exit MKE TOKEN("x", ( ="w" )) | # word TOKEN("w", ( ="" )) | # empty string TOKEN("U", m_byte()) # anything else is unknown end #===================================================================# #===================================================================# # KFORMAT == "ho" | "hounit" #===================================================================# # TOKEN list procedure m_holist() #=================== # holist ::= # hotoken # hotoken holist suspend \ [m_hotoken()] | [m_hotoken()] ||| m_holist() end # TOKEN procedure m_hotoken(sep) #======================= # hotoken ::= # endtoken <<== obsolete -- in kutoken # holevel <<== obsolete -- do in symbol.icn # hophrase <<== obsolete -- do in symbol.icn # kutoken /sep := dequote(HOSEPARATOR) suspend \ #TOKEN("Z",="end") | #TOKEN("/",m_holevel(sep)) | #TOKEN("h",m_hophrase(sep)) | m_kutoken() end #===================================================================# #===================================================================# # KFORMAT == "dir" #===================================================================# # TOKEN list procedure m_dirlist() #==================== # dirlist ::= # dirtoken # dirtoken dirlist suspend \ [m_dirtoken()] | [m_dirtoken()] ||| m_dirlist() end # TOKEN procedure m_dirtoken(sep) #======================= # dirtoken ::= # endtoken # dirword # kutoken suspend \ TOKEN("Z",="end") | TOKEN("d",m_dirword()) | m_kutoken() end # string procedure m_dirword() #===================# static wordchar initial { wordchar := &cset wordchar --:= '/' # no hierarchy separators wordchar --:= '!=' # allow commands & assignments } suspend \ tab(many(wordchar)) end #===================================================================# #===================================================================# # KFORMAT == "nrel" #===================================================================# # TOKEN list procedure m_rellist() #==================== # rellist ::= # reltoken # reltoken rellist suspend [m_reltoken()] | [m_reltoken()] ||| m_rellist() end # TOKEN procedure m_reltoken() #===================== # reltoken ::= # endtoken <<== obsolete -- in kutoken # relsep <<== obsolete -- in kutoken # relphrase <<== obsolete -- do in symbol.icn # kutoken suspend \ #TOKEN("Z",="end") | #TOKEN(";",m_relsep()) | #TOKEN("r",m_relphrase()) | m_kutoken() end # string procedure m_relsep(sep) #====================== /sep := dequote(RELSEPARATOR) suspend \ =sep end #===================================================================# #===================================================================# # KFORMAT == "nv" # now same rules as "ku" #===================================================================# #===================================================================# #===================================================================# #===================================================================# procedure init_keyword() #======================= CONTROL:= set([ "begin", "end", "exit", "break", "for", "every","done", "while","until", "when", "forall","exists", "if","then","else","fi" ]) VERB := set([ "inverse", # for CycL "genlmt","genlmt+","genlmt*","genlmt**","genlmt**?", "specmt","specmt+","specmt*","specmt**","specmt**?", # for OWL "isand", # intersection "isor", # union "isxor", # disjoint union "xor", # disjoint # for NSM "before","before*", "after","after*", "above","above*", "below","below*", "beside","beside*", "inside","inside*", "outside","outside*", "happensod","happensod*", "like", "like*", # NSM # MKR "IS", "ISA", "ISC", "HAS", "DO", "isa","isa+","isa*","isa**","isa**?", "isc","isc+","isc*","isc**","isc**?", "isu","isu*","isu**","isu**?", "isp","isp*","isp**","isp**?", "iss","iss*","iss**","iss**?", "isg","isg*","isg**","isg**?", "ismem","ismem*", "isall","isall*", "isalt","isalt*", "isany","isany*", "is","is*","iswith", "isd", "isi", "rel","nrel","trel","brel","urel", "has","can", "haspart","haspart*", "isapart","isapart*", "do","can","vdo","hdo", "ido", "happens", "let","vlet","unlet", "means", "means*", "isref", "isref*", # iQ words for reparsing compound statements #"iseither", #"isno", #"issome", #"isthe" ]) PREPOSITION := set([ "in", "at", # context "out", # product "of", # part "with", # characteristic "od", # direct object "from", # initial characteristic "to" # final characteristic ]) OPERATOR := set([ "=", # name = value "+=", # name op value "-=", # name op value "*=", # name op value ":=", # product := producer "+:=", # product prodop producer "-:=", # product prodop producer "*:=", # product prodop producer "::=" # BNF grammar ]) CONJUNCTION := set([ "iff", "implies", # "|-" "supports", # "|=" situation theory notation "causes", "causes*", # NSM "because", "because*" # NSM ]) KEYWORD := CONTROL ++ VERB ++ PREPOSITION ++ OPERATOR ++ CONJUNCTION end procedure init_quantifier() #========================== QUANT := set([ "a", "no", "some", "the" ]) groupQUANT := set([ "either", "all", "any" ]) QUANTIFIER := QUANT ++ groupQUANT end procedure init_gtype() #===================== xxGROUP := set([ "exgroup","ingroup", "concept" ]) INGROUP := set([ "list", "requisite", "sequence", "set", "intersection", # OWL "LATTICE" ]) EXGROUP := set([ "enum", # OWL oneOf "union" # OWL ]) LATTICE := set([ "hierarchy", "lattice" ]) GTYPE := xxGROUP ++ INGROUP ++ EXGROUP ++ LATTICE end procedure init_variable() #======================== # context-dependent variable keVARIABLE := set([ "Sentence", #####"Statement", "Question", "Command", "Assignment", "If", "Every" ]) PRONOUN := set([ ".","..","...", # pronoun "I","we", "you", "he","she","they", "it", "none", "someone", # NSM "something", # NSM "people" # NSM ]) VARIABLE := keVARIABLE ++ PRONOUN end procedure add_separator() #======================== local sep,whitespace initial { } # end initial new_concept("separator",,"separator") add_species("separator","symbol") whitespace := ["blank","tab","vertical tab","line return","newline","newpage"] every sep := !whitespace do { add_unit(sep,"separator") } #####add_alias(" ","blank") #####add_alias("\t","tab") #####add_alias("\v","vertical tab") #####add_alias("\r","line return") #####add_alias("\n","newline") #####add_alias("\f","newpage") # $. are filed as variables, not separators every sep := ! (SEPARATOR -- '$.') do { add_unit(string(sep),"separator") } #####add_alias("sharp","#") #####add_alias("dquote","\"") #####add_alias("squote","\'") #####add_alias("lparen","(") #####add_alias("rparen",")") #####merge_alias(set([",","comma"])) #####merge_alias(set([";","semicolon"])) #####merge_alias(set(["&","and"])) #####merge_alias(set(["|","or"])) #####merge_alias(set(["~","not"])) # complement wrt existent #####merge_alias(set(["~","non"])) # complement wrt genus end procedure add_keyword() #====================== local kw add_species("symbol","OBJECT") add_species("word","symbol") add_species("verb","word") every kw := ! VERB do { add_unit(kw,"verb") insert(HIDDEN,kw) } add_species("conjunction","word") every kw := ! CONJUNCTION do { add_unit(kw,"conjunction") insert(HIDDEN,kw) } add_species("preposition","word") every kw := ! PREPOSITION do { add_unit(kw,"preposition") insert(HIDDEN,kw) } add_species("separator","symbol") every kw := ! OPERATOR do { add_unit(kw,"separator") insert(HIDDEN,kw) } add_species("control","word") every kw := ! CONTROL do { add_unit(kw,"control") insert(HIDDEN,kw) } merge_alias(set(["forall","forAll"]),"forAll") # use OWL name merge_alias(set(["exists","forSome"]),"forSome") # use OWL name #####merge_alias(set(["isc","iseither"])) #####merge_alias(set(["|=","supports"])) #####merge_alias(set(["|-","implies"])) end procedure add_quantifier() #========================= local kw new_concept("quantifier",,"quantifier") add_species("quantifier","word") every kw := ! QUANTIFIER do { add_unit(kw,"quantifier") insert(HIDDEN,kw) } end procedure add_gtype() #==================== local kw every kw := ! xxGROUP do { add_species(kw,"group") insert(HIDDEN,kw) } every kw := ! INGROUP do { add_species(kw,"ingroup") insert(HIDDEN,kw) } every kw := ! EXGROUP do { add_species(kw,"exgroup") insert(HIDDEN,kw) } every kw := ! LATTICE do { add_species(kw,"LATTICE") insert(HIDDEN,kw) } #####add_alias("enum","enum") #####add_alias("req","requisite") #####add_alias("seq","sequence") end procedure add_variable() #======================= # context-dependent variable local var,pro # declare variables every var := ! keVARIABLE do { add_species(var,"variable") # var iss variable #put_char("attr","ke",var,[]) # ke has var insert(HIDDEN,var) } add_species("pronoun","attribute") every pro := ! PRONOUN do { add_species(pro,"pronoun") # pro iss pronoun put_ctype(pro,"pronoun") # pro has ctype=pronoun #put_char("attr","ke",pro,[]) # ke has pronoun insert(HIDDEN,var) } end #===================================================================# #===================================================================# #===================================================================# # TOKEN procedure copy_token(tok) #======================== local newtok newtok := TOKEN(tok.ttype,tok.tvalue) return newtok end # list of TOKEN procedure copy_tlist(tok) #======================== local newtok newtok := [] every put(newtok,copy_token(!tok)) return newtok end #------------------------------------------------------------- # TOKEN list procedure get_token(fd,pgtype,ps) #================================ # fd ::= file | string | list of string # ttype ::= mkr_word | ho_ttype | nrel_ttype # ps ::= "ke" | "ksc" # called by parse_file() in symbol.icn # called by get_prop() in proplist.icn # suspend TOKEN list for WORD list local m_token local ttype,wtype local line,wordlist,wline,wlist local wtok,tok local i,j,k,y,z local ij,tline local KHRV,VD local word,token local prog,info,error,warning,ierror static SaveStack initial { /SaveStack := [] /DOT1 := "existent" /VIEW := "tabula rasa" /KFORMAT := "ku" /HOSEPARATOR := "/" /RELSEPARATOR := "," /NVSEPARATOR := "=" /PIPESEPARATOR := "|" } push(SaveStack,PGTYPE) push(SaveStack,KFORMAT) /fd := myin /pgtype := "ku" # DOTCONCEPT := ".="||DOT1 # VD := VIEW||":"||DOTCONCEPT # KHRV := KFORMAT||HOSEPARATOR||RELSEPARATOR||NVSEPARATOR prog := "get_token("||pgtype||"): " info := "INFO: "||prog error := "ERROR: "||prog ierror := "Internal ERROR: "||prog warning := "WARNING: "||prog case pgtype of { "ku": { ttype := mkr_ttype; wtype := mkr_wtype } "ho": { ttype := ho_ttype; wtype := mkr_wtype } "nrel": { ttype := nrel_ttype; wtype := mkr_wtype } default:{ ttype := mkr_ttype; wtype := mkr_wtype } } # end case pgtype #------------------------------------------------------------# every word := get_word(fd,pgtype,ps) do { wordlist := [] wordlist := word_put(wordlist,word) line := unparse(wordlist,"") # blanks are still in wordlist if DEBUG == ("WORD"|"TOKEN"|"PARSE") then { writes_type_all(wordlist,info||"input wordlist") ##writes_type_all(line,info||"input word line") } if *line = 0 then next #####if *line = 0 then fail tok := [TOKEN("X",line)] # unknown token if no match case KFORMAT of { "list": { m_token := m_kulist } "ku": { m_token := m_kulist } "cu": { m_token := m_kulist } "ho": { m_token := m_kulist } # was m_holist "hounit":{ m_token := m_kulist } # was m_holist "dir": { m_token := m_kulist } # was m_dirlist "group": { m_token := m_kulist } # was m_rellist "nrel": { m_token := m_kulist } # was m_rellist "nv": { m_token := m_kulist } "kb": { m_token := m_kulist } "html": { m_token := m_kulist } "htm": { m_token := m_kulist } "xml": { m_token := m_kulist } "rdf": { m_token := m_kulist } "owl": { m_token := m_kulist } "nt": { m_token := m_kulist } "mcf": { m_token := m_kulist } default: { writes_type(mylog,KFORMAT,warning||"unknown kformat") KFORMAT := "ku" m_token := m_kulist } } # end case KFORMAT wline := "" wlist := [] every word := !wordlist do { if DEBUG == ("WORD"|"TOKEN"|"PARSE") then { writes_type_all(word,info||"input word") } /word.wtype := "w" wline ||:= word.wtype put(wlist,word.wvalue) } # end every word if DEBUG == ("WORD"|"TOKEN"|"PARSE") then { writes_type_all(wline,info||"input wline") #writes_type_all(wlist,info||"input wlist") } if wline ? { ( wtok <- m_token() ) & pos(0) } then { if DEBUG==("WORD"|"TOKEN"|"PARSE") then { writes_type_all(wtok,info||"parse result wtok") } j := 0 every i := 1 to *wtok do { j +:= 1 ij := "(i="||i||",j="||j||")" y := wtok[i].tvalue z := wlist[j] case *y of { 0: { writes_type(mylog,wline,ierror||"zero length tvalue"||ij) } 1: { } default: { # composite words if DEBUG==("MAP"|"WORD"|"TOKEN") then { writes_type_all(y,info||"composite y"||ij) } every k := 2 to *y do { j +:= 1 z ||:= wlist[j] } # end every k } # end default } # end case *y wtok[i].tvalue := z } # end every i tok := map_token(wtok,ttype) tline := token2string(tok) if DEBUG==("WORD"|"TOKEN"|"PARSE") then { writes_type(mylog,tok,info||"map result tok") writes_type(mylog,tline,info||"map result tline") } } else { KERROR +:= 1 writes_type_all(line,error||"unknown token") #####OLDcomplete("_UnknownToken_") # token.icn } # end if wline ? { } if DEBUG == ("TOKEN") then writes_type_all(tok,info||"tokenlist") case ParserMode of { default: { writes_type_all(ParserMode,ierror||"unexpected ParserMode") KFORMAT := pop(SaveStack) PGTYPE := pop(SaveStack) fail } "word": { every token := !tok do suspend token } "line": { suspend tok } } } # end every word #----------------------------------------------------------# KFORMAT := pop(SaveStack) PGTYPE := pop(SaveStack) end #==========================================================# #==========================================================# # integer procedure NEWcomplete(word) #========================== # NOTE: map_token() no longer calls NEWcomplete() # for begin,end (allows immediate processing # of relation tuples & hierarchy info.) local x static nesting,html_nest static BeginEnd static ku_begin,ku_for,ku_every,ku_brace,ku_paren static html_beginend,xml_beginend static info initial { info := "INFO: NEWcomplete: " BeginEnd := 0 ku_begin := 0 # begin gtype gname; ... end gtype gname; ku_for := 0 # for quantifier generator; { ... }; ku_every := 0 # every generator; { ... }; ku_brace := 0 # { ... } ku_paren := 0 # ( ... ) # for WordNet html_beginend := 0 # < ... > xml_beginend := 0 # <tag> content </tag> nesting := [] # stack html_nest := [] # stack } # check nesting #--------------# # called by map_token() in token.icn word # called by parse_file() in symbol.icn "_count_" # called by map_token() in token.icn "_group_" # called by map_token() in token.icn "_begin_" # called by map_token() in token.icn "_end_" case word of { "_count_": { if ku_begin > 0 then return 1 else if ku_for > 0 then return 1 else if ku_every > 0 then return 1 else if ku_brace > 0 then return 1 else if ku_paren > 0 then return 1 #else if html_beginend > 0 then # return 1 #else if xml_beginend > 0 then # return 1 else return *nesting } "_html_": { return html_beginend } "_html_reset_": { html_nest := []; html_beginend := 1 } "_group_": { return BeginEnd } # > 0 means in begin end group "_begin_": { BeginEnd +:= 1; return BeginEnd } "_end_": { BeginEnd -:= 1; return BeginEnd } "_for_reset_": { ku_for := 0; return *nesting } "_every_reset_": { ku_every := 0; return *nesting } "_begin_reset_": { ku_begin := 0; return *nesting } "_brace_reset_": { ku_brace := 0; return *nesting } "_paren_reset_": { ku_paren := 0; return *nesting } ("_start_"|"_restart_"): { ###BeginEnd := 0 # finish begin-end group nesting := [] ###html_nest := [] ku_begin := 0 ku_for := 0 ku_every := 0 ku_brace := 0 ku_paren := 0 ###html_beginend := 1 xml_beginend := 0 return *nesting } default: { } # continue below } # end case word if ku_begin > 0 then { # inside begin-end group if word == "end" then { } else return *nesting } #####if html_beginend > 0 then { ##### # inside < > ##### if word == ">" then { } else ##### return *nesting #####} if DEBUG=="COMPLETE" then { writes_type_all(nesting,info||"before update: nesting") writes_type_all(word,info||"word") } # update nesting #---------------# case KFORMAT of { "list": { } ("ku"|"cu"): { case word of { #"_start_": { push(nesting,word) } #"_restart_": { push(nesting,word) } #";": { popcheck("_start_",word,nesting) } "=>": { push(nesting,word) } # WordNet entry "--": { popcheck("=>",word,nesting) }# WordNet entry "if": { push(nesting,word) } "fi": { popcheck("if",word,nesting) } "for": { ku_for +:= 1 push(nesting,"for") } ( "every"| "while"|"until"|"when" ): { ku_every +:= 1 push(nesting,"every") } "do": { push(nesting,word) } "DO": { push(nesting,"do") } #"can": { push(nesting,"do") } "vdo": { push(nesting,"do") } "hdo": { push(nesting,"do") } # hwalk "ido": { push(nesting,"do") } # hwalk "!": { push(nesting,"do") } # shell command (! ... done;) "happens": { push(nesting,"do") } # interaction #(";"| "done"): { popcheck("do",word,nesting) } "done": { popcheck("do",word,nesting) } "[": { push(nesting,word) } "]": { popcheck("[",word,nesting) } "{": { if ku_for > 0 then { popcheck("for",word,nesting) ku_for -:= 1 } else if ku_every > 0 then { popcheck("every",word,nesting) ku_every -:= 1 } ku_brace +:= 1 push(nesting,word) } "}": { ku_brace -:= 1 popcheck("{",word,nesting) } "(": { ku_paren +:= 1 push(nesting,word) } ")": { ku_paren -:= 1 popcheck("(",word,nesting) } "<": { push(nesting,word) } #####">": { popcheck("</",word,nesting) } ">": { popcheck("<",word,nesting) } "begin": { ku_begin +:= 1 } #; push(nesting,word) } "end": { ku_begin -:= 1 } #; popcheck("begin",word,nesting) } } # end case word } ("ho"|"hounit"|"nrel"|"nt"|"mcf"): { case word of { "begin": { ku_begin +:= 1 } #; push(nesting,word) } "end": { ku_begin -:= 1 } #; popcheck("begin",word,nesting) } "(": { ku_paren +:= 1 push(nesting,word) } ")": { ku_paren -:= 1 popcheck("(",word,nesting) } } # end case word } ("html"|"htm"|"xlm"|"rdf"|"daml"|"owl"): { ##case word of { ##"<": { html_beginend +:= 1; push(html_nest,word) } ##">": { html_beginend -:= 1; popcheck("</",word,html_nest) } ##"/": { html_beginend -:= 1; popcheck("/",word,html_nest) } ##} # end case word } } # end case KFORMAT if DEBUG=="COMPLETE" then { writes_type_all(nesting,info||"after update: nesting") } return *nesting end # WORD list procedure word_put(wordlist,word) #================================ static ierror initial { ierror := "Internal ERROR: word_put: " } case type(word) of { default: { writes_type_all(word,ierror||"unexpected type word") fail } "WORD": { put(wordlist,word) } "list": { wordlist |||:= word if *word = 0 then { if DEBUG == "NULL" then writes_type_all(word,ierror||"empty list word") #####fail } } } # end case type return wordlist end procedure popcheck(nestbegin,nestend,nest) #========================================= local xbegin,found,lookfor static info,popfail,mismatch initial { info := "INFO: popcheck: " popfail := "WARNING: popcheck: empty nesting stack: " mismatch := "WARNING: popcheck: mismatch: " } lookfor := nestbegin||nestend if DEBUG=="COMPLETE" then { writes_type_all(lookfor,info||"lookfor") } if xbegin := pop(nest) then { } else { case lookfor of { "": { } "<>": { } # rdfs:comment = <...>; ??? "()": { } "[]": { } "{}": { } default: { writes_type_all(lookfor,popfail||"looking for") } } # end case lookfor NEWcomplete("_restart_") return } found := xbegin||nestend if DEBUG=="COMPLETE" then { writes_type_all(found,info||"found") } if found == lookfor then { } else { writes_all(["# "||mismatch,"looking for ",lookfor," found "||found]) case xbegin of { "for": { NEWcomplete("_for_reset_") NEWcomplete(nestend) } "every": { NEWcomplete("_every_reset_") NEWcomplete(nestend) } "begin": { NEWcomplete("_begin_reset_") NEWcomplete(nestend) } "{": { NEWcomplete("_brace_reset_") NEWcomplete(nestend) } "(": { NEWcomplete("_paren_reset_") NEWcomplete(nestend) } "do": { NEWcomplete(nestend) } "if": { NEWcomplete(nestend) } "=>": { # WordNet entry NEWcomplete(nestend) } default: { writes_all(["# "||mismatch,"unexpected xbegin ",xbegin]) } } # end case xbegin ## if lookfor == "</>" then { ## # wait for next HMTL group ## } else { ## NEWcomplete("_restart_") ## } } end #==========================================================# # integer procedure OLDcomplete(word) #======================= # set syntactic group count # begin - end # if - fi # do - done # for # [ ] { } < > ( ) local gcount local iinfo static info,ierror static beginend, iffi, dodone, fordo, whendo, whiledo, untildo static bracket, brace, angle, paren static htmlgroup static semicolon initial { info := "INFO: OLDcomplete: " ierror := "Internal ERROR: OLDcomplete: " beginend := integer(0) iffi := integer(0) dodone := integer(0) fordo := integer(0) whendo := integer(0) whiledo := integer(0) untildo := integer(0) bracket := integer(0) brace := integer(0) angle := integer(0) paren := integer(0) htmlgroup := integer(0) #semicolon := integer(1) } # end initial iinfo := info||" word <"||word||"> " # do initial count #-----------------# case KFORMAT of { default: { gcount := 0 } "list": { } ("ku"|"cu"): { gcount := iffi + fordo + whendo + whiledo + untildo + dodone + bracket + brace + angle + paren + angle # + semicolon } ("ho"|"hounit"|"nrel"): { gcount := beginend } ("html"|"htm"|"xlm"|"rdf"): { gcount := angle } } # end case KFORMAT if DEBUG == "COMPLETE" then { writes_type(mybug,gcount,iinfo||"initial gcount") writes_type(mylog,gcount,iinfo||"initial gcount") } # do reset #---------# case word of { default: { } # continue ( "_TabulaRasaComplete_"| # initialize_tabrasa() in init.icn "_InitializationComplete_"| # main() in ke.icn,ksc.icn,tap2mkr.icn "_BeginRead_"| # command() in command.icn "_EndRead_"| # command() in command.icn "_BeginGroup_"| # init.icn "_EndGroup_"| # init.icn "_GroupError_"| # init.icn "_SyntaxError_" # get_symbol() in symbol.icn #"_LineComplete_"| # continue_token(), main() #"_UnknownToken_" # get_token() in token.icn ): { writes_type(mylog,word,info||"reset word") beginend := integer(0) iffi := integer(0) dodone := integer(0) fordo := integer(0) whendo := integer(0) whiledo := integer(0) untildo := integer(0) bracket := integer(0) brace := integer(0) angle := integer(0) paren := integer(0) htmlgroup := integer(0) ##semicolon := integer(1) } # end reset } # end case # update count #-------------# case KFORMAT of { "list": { } ("ku"|"cu"): { case word of { "if": { iffi +:= 1 } "fi": { iffi -:= 1 } "for": { fordo +:= 1 } "every": { fordo +:= 1 } "when": { whendo +:= 1 } "while": { whiledo +:= 1 } "until": { untildo +:= 1 } ("do"| "DO"| "!"| #"can"| "vdo"| "hdo"| "ido"): { dodone +:= 1 } "happens": { dodone +:= 1 } #(";"| "done"): { dodone -:= 1 } "done": { dodone -:= 1 } "[": { bracket +:= 1 } "]": { bracket -:= 1 } "{": { brace +:= 1 if fordo > 0 then fordo -:= 1 if whendo > 0 then whendo -:= 1 if whiledo > 0 then whiledo -:= 1 if untildo > 0 then untildo -:= 1 } "}": { brace -:= 1 } "(": { paren +:= 1 } ")": { paren -:= 1 } ##";": { semicolon -:= 1 } "<": { angle +:= 1 } ##; semicolon := 0 } ">": { angle -:= 1 } } # end case word gcount := iffi + fordo + whendo + whiledo + untildo + dodone + bracket + brace + paren + angle # + semicolon } ("ho"|"hounit"|"nrel"): { case word of { "begin": { beginend +:= 1 } "end": { beginend -:= 1 } } # end case word gcount := beginend } ("html"|"htm"|"xlm"|"rdf"): { case word of { "<": { angle +:= 1 } ">": { angle -:= 1 } } # end case word gcount := angle } } # end case KFORMAT #####case word of { #####"htmlbegin": { htmlgroup +:= 1 } #####"htmlend": { htmlgroup -:= 1 } #####} # end case word #####gcount := htmlgroup if DEBUG == "COMPLETE" then { writes_type(mybug,gcount,iinfo||"final gcount") writes_type(mylog,gcount,iinfo||"final gcount") } return gcount end #==========================================================# #==========================================================# # TOKEN procedure OLDcontinue_token(linetoken,infd,continue) #================================================ # called from get_token() in token.icn # automatic continuation for beginning & middle keywords & special characters # , [ { ( < \ # isa,isc, is,has,do, if,then,else, for, ..., with # at, of, with, from, to # begin, end # know, believe, etc. <== NOT currently reserved local line,tok,newline,newtok,newlinetoken local xlineword local tlast,xline,xlinetoken local nocont,cont,icomplete local head,KHRV,cilast local iinfo,iierror static info,ierror initial { info := "INFO: OLDcontinue_token: " ierror := "Internal ERROR: OLDcontinue_token: " } /infd := myin /continue := "yes" KHRV := "("||KFORMAT||HOSEPARATOR||RELSEPARATOR||NVSEPARATOR||")" case KFORMAT of { "list": { } ("ku"|"cu"): { nocont := ';' ++ '>' cont := '\\' ++ '!,&|~' ++ '[{(<' ++ 'ACDEGHIiJPpQRTWYZ' ++ '=' icomplete := OLDcomplete("") # token.icn } ("ho"|"hounit"): { nocont := '>' cont := '<' icomplete := OLDcomplete("html") # token.icn } ("dir"|"nrel"|"nv"): { return linetoken } ("rdf"|"xml"|"html"|"htm"): { nocont := '>' cont := '<' ++ '=' icomplete := OLDcomplete("html") # token.icn } "nt": { nocont := '' cont := '' icomplete := OLDcomplete("") # token.icn } "mcf": { nocont := '' cont := '' icomplete := OLDcomplete("") # token.icn } default: { writes_type(mylog,KFORMAT,ierror||"unexpected KFORMAT") return linetoken } } # end case KFORMAT line := linetoken.line tok := linetoken.token newline := line newtok := copy_list(tok) tlast := tok[-1].ttype | fail cilast := "("||continue||icomplete||tlast||")" head := KHRV||cilast iinfo := info||head iierror := ierror||head if DEBUG == "CONTINUE" then { writes_type(mybug,line,iinfo||"input line") writes_type(mylog,line,iinfo||"input line") } newlinetoken := linetoken if continue=="yes" then { if upto(tlast,nocont) & (icomplete <= 0) then { # no continuation if DEBUG=="CONTINUE" then { writes_type(mybug,line,iinfo||"no continutaion") writes_type(mylog,line,iinfo||"no continutaion") OLDcomplete("_LineComplete_") } } else if upto(tlast,cont) | (icomplete > 0) then { # continue to next line if tlast == "\\" then { # delete \ newtok := newtok[1:-1] newline := newline[1:-1] } if xline := prompt(infd) then { xlineword := get_word(xline) xlinetoken := get_token(xlineword,infd,continue) newline ||:= " "|| xlinetoken.line newtok |||:= copy_list(xlinetoken.token) newlinetoken := TOKEN(newline,newtok) if DEBUG=="CONTINUE" then { writes_type(mybug,newline,iinfo||"next line") writes_type(mylog,newline,iinfo||"next line") } } # end if xline } else { if DEBUG=="CONTINUE" then { writes_type(mybug,line,iinfo||"no continutaion") writes_type(mylog,line,iinfo||"no continutaion") OLDcomplete("_LineComplete_") } } # end if upto } # end if continue if DEBUG=="CONTINUE" then { writes_type(mybug,newlinetoken.line,iinfo||"output line") writes_type(mylog,newlinetoken.line,iinfo||"output line") } return newlinetoken end # TOKEN list procedure map_token(tok,tokentype) #================================= # phase 2 parse # map ttype for get_symbol() # delete whitespace [ttype=="B"] # delete comment [ttype=="c"] local x,y,word local tvalue local kinfo static info initial { info := "INFO: map_token: " } /tokentype := mkr_word kinfo := info||"kformat <"||KFORMAT||"> " #DEBUG := "MAP_TOKEN" if DEBUG==("MAP_TOKEN"|"MAP"|"PARSE"|"END") then { writes_type(mybug,tok,kinfo||"input tok") writes_type(mylog,tok,kinfo||"input tok") } x := [] every y := ! tok do { case y.ttype of { default:{ } "q": { y.ttype := "w" } # squote q => w for get_symbol() "Q": { y.ttype := "w" } # dquote Q => w for get_symbol() "a": { ##### case KFORMAT of { ##### default: { y.ttype := "a" # angle <...> ##### } ##### ("html"|"htm"|"xml"|"rdf"|"owl"): { ##### tvalue := y.tvalue ##### if tvalue[2] == "/" then { ##### y.ttype := "Z" # html tag end ##### } else if tvalue[-2] == "/" then { ##### y.ttype := "a" # html tag complete ##### } else { ##### y.ttype := "Y" # html tag begin ##### } ##### } # end "html"|... ##### } # end case KFORMAT } "b": { y.ttype := "w" } # paren b => w for get_symbol() "c": { # delete comment if DEBUG=="COMMENT" then writes_type_all(y,info||"deleted comment token") next } ##### "h": { ##### # HTML command ##### if REMOVEHTML==("yes"|"YES") then { ##### do_html(y) ##### next # delete HTML ##### } # end if REMOVEHTML ##### } # end "h" "B": { case KFORMAT of { #("ho"|"hounit"|"nrel"): { } # keep whitespace for parsing phrase default: { # delete whitespace if DEBUG=="WHITESPACE" then writes_type_all(y,info||"deleting whitespace token") next } } # end case } "j": { # and,or case KFORMAT of { default: { } # reserved ("ho"|"hounit"|"nrel"): { y.ttype := "w" } # not reserved } } #"h": { y.tvalue := trimws(y.tvalue) } # howord <== obsolete #"r": { y.tvalue := trimws(y.tvalue) } # relword <== obsolete #"d": { y.tvalue := trimws(y.tvalue) } # dirword <== obsolete #"A": { y.ttype := "=" } # assignment,production <== obsolete #"D": { y.ttype := "$" } # $variable <== obsolete ",": { y.ttype := y.tvalue } # list ";": { y.ttype := y.tvalue } # relation,proplist #"=": { y.ttype := y.tvalue } # assignment,production "!": { y.ttype := y.tvalue } # sh command #"~": { y.ttype := y.tvalue } # not <== nonseparator #"?": { y.ttype := y.tvalue } # question <== nonseparator #"/": { y.ttype := y.tvalue } # hierarchy <== nonseparator #"$": { y.ttype := y.tvalue } # $variable <== nonseparator "W": { y.ttype := "w" } # weak separators #":": { y.ttype := y.tvalue } # #"+": { y.ttype := y.tvalue } # #"-": { y.ttype := y.tvalue } # #"*": { y.ttype := y.tvalue } # "S": { # strong separators case y.tvalue of { default: { y.ttype := y.tvalue } } } "w": { # word inlcudes no separators if member(tokentype,y.tvalue) then { y := group_map_token(y,tokentype) #case y.tvalue of { #"next": { y.tvalue := ";" } #"non": { y.tvalue := "~" } #"not": { y.tvalue := "~" } #} } else if member(rdf_word,y.tvalue) then { y.ttype := rdf_word[y.tvalue] } else if member(owl_word,y.tvalue) then { y.ttype := owl_word[y.tvalue] } # end if } # end "w" "x": { # xword includes weak separators W if y.tvalue ? { ="isa**" | ="isc**" | ="iss**" | ="isg**" | ="isu**" | ="isp**" | ="genlmt**" | ="specmt**" } then { # "lw" | "l?" y.ttype := "V" #} else if upto("*",y.tvalue) then { # wildcard pattern } else if member(tokentype,y.tvalue) then { y := group_map_token(y,tokentype) #case y.tvalue of { #"next": { y.tvalue := ";" } #"non": { y.tvalue := "~" } #"not": { y.tvalue := "~" } #} } else if member(rdf_word,y.tvalue) then { y.ttype := rdf_word[y.tvalue] } else if member(owl_word,y.tvalue) then { y.ttype := owl_word[y.tvalue] } else { y.ttype := "w" # x => w for get_symbol() } # end if } # end "x" } # end case y.ttype # set syntactic nesting group count word := y.tvalue if DEBUG=="COMPLETE" then { writes_type_all(word,info||"NEWcomplete") } case word of { "": { } "begin": { NEWcomplete("_begin_") } "end": { NEWcomplete("_end_") } "if": { NEWcomplete(word) } "fi": { NEWcomplete(word) } "=>": { NEWcomplete(word) } "--": { NEWcomplete(word) } "for": { NEWcomplete(word) } "every": { NEWcomplete(word) } "when": { NEWcomplete(word) } "while": { NEWcomplete(word) } "until": { NEWcomplete(word) } "do": { NEWcomplete(word) } "DO": { NEWcomplete(word) } "!": { NEWcomplete(word) } #"can": { NEWcomplete(word) } "vdo": { NEWcomplete(word) } "hdo": { NEWcomplete(word) } "ido": { NEWcomplete(word) } "happens":{ NEWcomplete(word) } #(";"|"done"): { NEWcomplete(word) } "done": { NEWcomplete(word) } ("<"|">"):{ NEWcomplete(word) } ("{"|"}"):{ NEWcomplete(word) } ("["|"]"):{ NEWcomplete(word) } ("("|")"):{ NEWcomplete(word) } ";": { NEWcomplete(word) } } # end case y.tvalue # separate leading and trailing "/" in HTML parsing case KFORMAT of { default : { put(x,y) } ("html"|"htm"|"xml"|"rdf"|"owl"): { if y.ttype == "w" then { if y.tvalue[1] == "/" then { y.tvalue := y.tvalue[2:0] put(x,TOKEN("/","/"),y) } else if y.tvalue[-1] == "/" then { y.tvalue := y.tvalue[1:-1] put(x,y,TOKEN("/","/")) } else { put(x,y) } # end if == "/" } else { put(x,y) } # end if == "w" } # end "html"|... } # end case KFORMAT } # end every y if DEBUG==("MAP_TOKEN"|"MAP"|"PARSE"|"END") then { writes_type(mybug,x,kinfo||"output x") writes_type(mylog,x,kinfo||"output x") } return x end # TOKEN procedure group_map_token(y,tokentype) #==================================== # member(mkr_word,y.tvalue) is true # allow reserved words in hierarchies & relations # => bad idea! everything gets parsed as group_statement local kinfo static info initial { info := "INFO: group_map_token" } /tokentype := mkr_word kinfo := info||"("||KFORMAT||"): " #DEBUG := "END" if DEBUG==("MAP_TOKEN"|"BEGIN"|"END") then { writes_type(mybug,y,kinfo||"input token") writes_type(mylog,y,kinfo||"input token") } case KFORMAT of { default: { y.ttype := tokentype[y.tvalue] } ("ho"|"hounit"|"nrel"): { case y.tvalue of { "end": { y.ttype := tokentype[y.tvalue] } # "Z" "exit": { y.ttype := tokentype[y.tvalue] } # "z" "break": { y.ttype := tokentype[y.tvalue] } # "z" "let": { y.ttype := tokentype[y.tvalue] } # "S" default: { y.ttype := "w" } } } } # end case KFORMAT if DEBUG==("MAP_TOKEN"|"BEGIN"|"END") then { writes_type(mybug,y,kinfo||"output token") writes_type(mylog,y,kinfo||"output token") } return y end # TOKEN list procedure horel_token(tok) #========================= # no special meaning for words in HO and REL local x,y,t,kinfo,kwarning static info,warning initial { info := "INFO: horel_token" warning := "WARNING: horel_token" } kinfo := info||"("||KFORMAT||"): " kwarning := warning||"("||KFORMAT||"): " if DEBUG == ("HO"|"REL"|"NREL") then writes_type_all(tok,kinfo||"input token list") x := [] every t := !tok do { y := copy(t) case y.tvalue of { default: { case y.ttype of { "/": { } "\\": { } # should be "/" default: { y.ttype := "w" } } # end case y.ttype } # exceptions: group "begin": { } "end": { } "exit": { } ";": { } # exceptions: hierarchy "ho": { } "hierarchy": { } "unithierarchy": { } #####"/": { } #####"\\": { } #####"isu": { } #####"iss": { } # exceptions: relation "nrel": { } "relation": { } ",": { } "[": { } "]": { } # exceptions: do read from file done; x has y=z; "do": {} "read": {} "from": {} "done": {} ##"has": {} ##"=": {} } # end case t.tvalue put(x,y) } # end every t if DEBUG == ("HO"|"NREL") then writes_type_all(x,kinfo||"output token list") return x end #