# # KEHOME/src/token.icn # # Oct/27/2005 Dec/27/2006 # Jan/24/2007 treat (...) like quote - parens # Jan/25/2007 no NONSEP in DollarNameChar # Feb/3/2007 treat <...> like quote - angles # Feb/8/2007 xor ~ daml:disjointWith, owl:differentFrom # Feb/9/2007 new relation syntax # multi-line dquote # Feb/18/2007 purge "variable" # Feb/25/2007 "HAS" "l" # Mar/5/2007 "isnon" # Mar/28/2007 "verb" -- change "R" back to "w" # Apr/1/2007 not_dollar, # not SEPARATOR # Apr/3/2007 fix bug: NEWcomplete() x has y = <...>; # Jun/5/2008 owl:Class in owl_word # Jun/9/2008 "iswith","rel" # Jul/25/2008 "suspend" (Unicon generator) # Jul/26/2008 "unithierarchy" (for backwards compatibility) # Jul/27/2008 ido # Jul/28/2008 horel_token() - no special meaning for words in HO and REL # Aug/1/2008 remove HO and NREL context stuff - "W","X" # Aug/6/2008 popcheck: no warning message for "<>" # Aug/7/2008 popcheck: no warning message for "()" "[]" "{}" # Aug/17/2008 RDF,OWL words - change "p" to "R", "C" to "w" # Aug/17/2008 get_word: look for "/" and "\\" # Aug/17/2008 horel_token: no exception for "isu" "iss" # Aug/24/2008 horel_token: exception for "has" "=" # Aug/25/2008 get_word: empty string # Aug/26/2008 PIPESEPARATOR := "|" # Aug/31/2008 process "|" like ";" for now procedure init_parser() #====================== init_char() init_word() init_keyword() # only a minimal set of keywords ? init_group() end global not_dollar global ParserMode # word|line set by init.icn, symbol.icn global CommentCharacter # initially '#' global WordChar # for current word definition #===================================# # pattern matching: # # line -> WORD list -> TOKEN list # #===================================# record WORD ( wtype, # character wvalue # string ) record TOKEN ( ttype, # character tvalue # string ) #=========# # methods # #=========# # get_word(fd,ps,option) # map_word(word) # get_token(word) # map_token(fd,ps,option) # token_type(x) # token_value(x) # token_unparse(x) # init_parser() # called by interpret_line() in symbol.icn # init_parser() # called by knit_init() in knit.icn # init_keyword() # token.icn # init_command() # command.icn # init_parameter() # param.icn # init_quantifier() # token.icn # init_gtype() # token.icn # init_variable() # token.icn # init_char() # char.icn # declare_word() # token.icn # m_dquote() # m_squote() # m_paren() # treat like quote # m_angle() # treat like quote # m_html() # single-line only # m_comment() # m_whitespace() # m_dollarvar() # m_separator() # m_assignword() # m_word() global XLINE global SEPARATOR,WhiteSpace,NONSEP global AssignOp,AssignChar global WordChar,xWordChar,NumberChar global DollarNameChar,FileNameChar global ListSeparator global mkr_word,HIDDEN global rdf_word,owl_word,cycl_word global dmoz_word,tap_word,cyc_word global CONTEXTword global KEYWORD,CONTROL,VERB,PREPOSITION,OPERATOR,CONJUNCTION global GTYPE,xxGROUP,EXGROUP,INGROUP,LATTICE global QUANTIFIER,QUANT,groupQUANT global VARIABLE,keVARIABLE,PRONOUN global PARAMETER global COMMAND #----------------------------------------------------# # string procedure token_unparse(x,sep) #============================= local y,z /sep := " " case type(x) of { "TOKEN": { y := token_value(x) } "list": { y := unparse(token_value(x),sep) } } return y end # string or list procedure token_type(x) #====================== local y case type(x) of { "TOKEN": { y := x.ttype } "list": { y := [] every put(y,(!x).ttype) } } # end case type return y end # string or list procedure token_value(x) #======================= local y case type(x) of { "TOKEN": { y := x.tvalue } "list": { y := [] every put(y,(!x).tvalue) } } # end case type return y end #===================================================================# #===================================================================# # words # NOTE processing sequence: # get_word() # map_word() # get_token() # map_token() # get_symbol() # map_symbol() # WORD list procedure get_word(fd,ps,option) #=============================== # called by get_token() in token.icn # called by parse_list() in word.icn # suspend WORD list for this line # low level parse local t,line local wordlist,word,val local matchline,mapline static info,warning,ierror initial { info := "INFO: get_word: " warning := "WARNING: get_word: " ierror := "Internal ERROR: get_word: " } /fd := myin /ps := "ke$ " /option := "" every line := prompt(fd) do { if DEBUG==("WORD"|"PARSE") then { writes_type(mybug,line,info||"input line") writes_type(mylog,line,info||"input line") } case t := type(line) of { default: { writes_type(myerr,line,ierror||"unexpected type line") writes_type(mylog,line,ierror||"unexpected type line") #return [] fail } ("string"|"integer"): { if *line = 0 then { if DEBUG == "NULL" then writes_type_all(line,warning||"empty string line") suspend [WORD("w","")] next } } } # end case t wordlist := [] matchline := line mapline := "" matchline ? { while word := ( WORD("Q",m_dquote()) | WORD("q",m_squote()) | WORD("b",m_paren()) | ## WORD("a",m_angle()) | ## WORD("h",m_htmlword()) | WORD("c",m_comment()) | WORD("B",m_whitespace()) | WORD("$",m_dollarvar()) | WORD("s",m_separator()) | WORD("=",m_assignword()) | WORD("V",m_qualword()) | # not currently a separator WORD("w",m_word()) ) #####& tab(0) do { # phase 1: map wtype for get_token() case word.wtype of { #####"h": { # html ##### if DEBUG==("HTML"|"HTM"|"XML"|"RDF") then { ##### writes_type(mybug,word,info||"html: word") ##### writes_type(mylog,word,info||"html: word") ##### } # end if DEBUG ##### } # end "h" ("q"|"Q"|"a"|"b"): { # quote & angle & paren val := word.wvalue if *val = 1 then { if DEBUG=="QUOTE" then { writes_type(mybug,val,warning||"isolated quote mark") writes_type(mylog,val,warning||"isolated quote mark") } word.wtype := "w" } # end if *val } # end "q"|"Q"|"a"|"b" "c": { # comment case CommentMode of { default: { word.wtype := "c" } "cyc": { word.wtype := "w" } # CycL constant '#$name' } } "B": { word.wtype := "B" } # white space "s": { # other separators # <> [,] {;} () word.wtype := mkr_word[word.wvalue] # "S" } "=": { case word.wvalue of { ":=": { word.wtype := mkr_word[word.wvalue] } # "V" "+:=": { word.wtype := mkr_word[word.wvalue] } # "V" "-:=": { word.wtype := mkr_word[word.wvalue] } # "V" "*:=": { word.wtype := mkr_word[word.wvalue] } # "V" } } "w": { # recognize end & exit case word.wvalue of { "end": { word.wtype := mkr_word[word.wvalue] } "exit": { word.wtype := mkr_word[word.wvalue] } } # recognize question variable & slash case word.wvalue[1] of { default: { } "?": { word.wtype := mkr_word["?"] } "/": { word.wtype := mkr_word["/"] } "\\": { word.wtype := mkr_word["\\"] } } word.wvalue := map_word(word.wvalue) # token.icn ##### don't substitute yet #####if member(mkr_word,word.wvalue) then ##### word.wtype := mkr_word[word.wvalue] } } # end case word.wtype if DEBUG == "WORD" then writes_type_all(word,info||"word") put(wordlist,word) mapline ||:= word.wvalue } # end do } # end line ? if DEBUG==("WORD"|"PARSE") then { writes_type(mybug,wordlist,info||"output wordlist") writes_type(mylog,wordlist,info||"output wordlist") } if mapline==line then { } else { if DEBUG=="MAP_WORD" then { writes_type(mybug,mapline,warning||"map_word change: mapline") writes_type(mylog,mapline,warning||"map_word change: mapline") } # end if DEBUG } # end if mapline case ParserMode of { default: { writes_type(myerr,ParserMode,ierror||"unexpected ParserMode") writes_type(mylog,ParserMode,ierror||"unexpected ParserMode") fail } "word": { every word := !wordlist do suspend [word] } "line": { suspend wordlist } } } # end every line end # string procedure map_word(word) #======================= local newword,junk static info initial { info := "INFO: map_word: " junk := dc2mkr("dc:title") junk := rdf2mkr("rdfs:Class") junk := owl2mkr("owl:Thing") junk := mcf2mkr("Node") junk := xml2mkr("&owl;") } case KMAPWORD of { default: { return word } ("NO"|"no"): { return word } ("YES"|"yes"): { } } case UniqueName of { default: { } "shortname": { } "classname": { } "rootname": { } "qualname": { return word } } # end case UniqueName newword := word case word[1] of { "/": { newword := word[2:0] } } if member(xml_word,newword) then { newword := xml2mkr(newword) # xml.icn } else if member(words_dc,newword) then { newword := dc2mkr(newword) # xml.icn } else if member(words_rdf,newword) then { newword := rdf2mkr(newword) # xml.icn } else if member(words_owl,newword) then { newword := owl2mkr(newword) # xml.icn } else if member(words_mcf,newword) then { newword := mcf2mkr(newword) # xml.icn } case word[1] of { "/": { newword := "/"||newword } } #if DEBUG=={"MAP"|"MAPWORD") then if newword ~== word then writes_all([TypeComment||info||"word <",word,"> => newword <",newword,">"]) #} return newword end # string procedure m_whitespace() #======================= # low level parse get_word() # NOTE: get_word() sets wtype := "B" # NOTE: get_token() sets ttype := "B" # NOTE: map_token() deletes ttype "B" initial { WhiteSpace := ' \t\v\r\n\f' } suspend \ tab(many(WhiteSpace)) end # string procedure m_separator() #====================== # low level parse get_word() # NOTE: get_word() sets wtype := "S" or individual character # NOTE: get_token() sets ttype := "S" or individual character # NOTE: map_token() changes ttype from "S" to individual character initial { /SEPARATOR := "\"\'" ++ '<>' ++ '[,]' ++ '{;}' ++ '()' ++ '|' } suspend \ tab(any(SEPARATOR)) end ###### string #####procedure m_end() ######================ #####suspend \ ##### ="end" #####end # string procedure m_assignword() #======================= # low level parse get_word() # NOTE: get_word() sets wtype := "=" # NOTE: get_token() sets ttype := "=" # NOTE: map_token() leaves ttype := "=" initial { /AssignOp := '=' /AssignChar := '+-*' ++ '&|~' ++ ':' } suspend \ =AssignOp | tab(many(AssignChar)) || =AssignOp end # string procedure m_qualword() #===================== # Note: I may change ":" to separator # low level parse get_word() # NOTE: get_word() sets wtype := "V" # NOTE: get_token() sets ttype := "V" # NOTE: map_token() leaves ttype := "V" static QualOp,QualChar initial { /QualOp := ':' /QualChar := ':' } suspend \ #####=QualOp | #####tab(many(QualChar)) || =QualOp ="::" end # string procedure m_word() #================= # low level parse # NOTE: get_word() sets wtype := "w" # NOTE: get_token() sets ttype := "w" # NOTE: map_token() leaves ttype := "w" suspend \ tab(many(WordChar)) end #===================================================================# #===================================================================# # extended words # string procedure m_xword() #================== # intermediate level parse # words & weak separators # NOTE: m_xword() includes "w", "q","Q", "+","-","*",":" # NOTE: get_token() sets ttype := "x" # NOTE: map_token() changes ttype from "x" to "w" suspend \ tab(many(xWordChar)) end # string procedure m_dollarvar() #====================== # dollar variable # low level parse get_word() suspend \ ="$"||tab(many(DollarNameChar)) | ="${"||tab(many(DollarNameChar))||="}" end # string procedure m_filename() #===================== # high level parse # legal file name suspend \ tab(many(FileNameChar)) end # string procedure m_dotvar() #=================== # dot variable (pronouns) suspend \ ="..." | =".." | ="." end # string procedure m_number() #=================== # must begin with a digit suspend \ ( tab(any(&digits)) ) | ( tab(any(&digits)) || tab(many(NumberChar)) ) end # string procedure m_dquote() #=================== # low level parse # match shortest string "..." # include empty string "" # NOTE: get_word() sets wtype := "Q" # NOTE: get_token() includes "Q" in ttype "x" # NOTE: map_token() changes ttype from "x" to "w" # NOTE: multi-line dquote -- see m_quotemark() in myio.icn static dquote,QChar initial { dquote := "\"" QChar := &cset -- dquote } suspend \ #####( =dquote || tab(upto(dquote)\1) || =dquote ) | ( m_quotemark() || tab(many(QChar)) || m_quotemark() ) | ( =dquote || =dquote ) | # empty dquote ( =dquote ) # isolated dquote end # string procedure m_squote() #=================== # low level parse # match shortest string '...' # include empty string '' # NOTE: apostrophe has no matching squote # NOTE: get_word() sets wtype := "q" # NOTE: get_token() includes "q" in ttype "x" # NOTE: map_token() changes ttype from "x" to "w" static squote,qChar initial { squote := "\'" qChar := &cset -- squote } suspend \ #####( =squote || tab(upto(squote)\1) || =squote ) | ( =squote || tab(many(qChar)) || =squote ) | ( =squote || =squote ) | # empty squote ( =squote ) # apostrophe end # string procedure m_paren() #================== # low level parse # match shortest string (...) # include empty string () # NOTE: apostrophe has no matching squote # NOTE: get_word() sets wtype := "b" # NOTE: get_token() includes "b" in ttype "x" # NOTE: map_token() changes ttype from "x" to "w" static lparen,rparen,qChar initial { lparen := "(" rparen := ")" qChar := &cset -- rparen } suspend \ #####( =lparen || tab(upto(rparen)\1) || =rparen ) | ( =lparen || tab(many(qChar)) || =rparen ) | ( =lparen || =rparen ) # empty squote end # string procedure m_angle() #================== # low level parse # match shortest string <...> # include empty string () # NOTE: apostrophe has no matching squote # NOTE: get_word() sets wtype := "a" # NOTE: get_token() includes "a" in ttype "x" # NOTE: map_token() changes ttype from "x" to "a" static langle,rangle,qChar initial { langle := "<" rangle := ">" qChar := &cset -- rangle } suspend \ #####( =langle || tab(upto(rangle)\1) || =rangle ) | ( =langle || tab(many(qChar)) || =rangle ) | ( =langle || =rangle ) # empty squote end # string procedure m_comment(sharp) #========================= # low level parse # comment ::= CommentCharacter not_dollar arb EndOfLine # caution: cyc uses "#$" prefix for constant # NOTE: get_word() sets wtype := "c" or "w" # NOTE: get_token() sets ttype := "c" or "w" # NOTE: map_token() deletes ttype "c" # # NOTE: some comments removed by prompt() in myio.icn # see trimcomment() in word.icn initial { /CommentCharacter := '#' /CommentMode := "mke" /not_dollar := &cset -- '$' } ##/sharp := CommentCharacter sharp := "#" #####case CommentMode of { #####default: { suspend \ #####( =sharp ) | ( =sharp || tab(any(not_dollar)) || tab(0) ) ##### } #####"cyc": { # '#$name' ##### suspend \ ##### ( =sharp || tab(many(WordChar)) ) | ##### ( =sharp ) ##### } #####} end # string procedure m_nvsep() #================== # assignment # for get_token() suspend \ ="+=" | ="-=" | ="*=" | ="=" end # string procedure m_sphrase() #==================== # NOTE: symbol.icn now uses m_lphrase instead of m_sphrase # intermediate level parse (whitespace is now "B") # consecutive words excluding MKR words (is,has,do, ...) # use word or qword ??? # NOTE: sphrase isa string (token.icn) # lphrase isa list (symbol.icn) # NOTE: pword defined in symbol.icn (excludes is,has,do,...) # NOTE: blank removed by map_symbol() in symbol.icn # sphrase ::= # pword # pword whitespace sphrase suspend \ ( m_qword() ) | ( m_qword() || ="B" || m_sphrase() ) end # string procedure Blank(s) #================= # "replace" string by whitespace return "B" end #-----------------------------------------------# #-----------------------------------------------# #-----------------------------------------------# procedure init_word() #==================== # called by knit_init() in knit.icn # define global parsing variables # SEPARATOR,WhiteSpace, WEAKSEP,NONSEP # WordChar,xWordChar,NumberChar, FileNameChar # mkr_word,HIDDEN # rdf_word,owl_word,cycl_word # dmoz_word,tap_word,cyc_word # BinaryRelation # KEYWORD,CONTROL,VERB,PREPOSITION,CONJUNCTION,OPERATOR # QUANTIFIER,QUANT,groupQUANT # GTYPE,xxGROUP,EXGROUP,INGROUP # VARIABLE,keVARIABLE,PRONOUN # PARAMETER,COMMAND # special_ctype,special_charname # legal_utype,legal_chartype local sep,ws,word,param,cmd initial { CommentCharacter := '#' } NONSEP := '/' ++ # hierarchy,filename (/) '\\' ++ # hierarchy,filename (/) '~' ++ # logic,filename (~) '&' ++ # logic (&) '|' ++ # logic (|) '?' ++ # question (?) '$' ++ # dollar variable ($) '.' ++ # dot variable,number (.) '!' ++ # sh command (!) '+' ++ # set add (+) '-' ++ # set delete (-) '*' ++ # wildcard,set intersection (*) ':' # production,format,view (:) SEPARATOR := ###'$' ++ # dollar variable ($) "\"" ++ # dquote (Q) "\'" ++ # squote (q) ###CommentCharacter ++ # comment (c) '<>' ++ # HTML command (S) '[,]' ++ # comma list (S) '{;}' ++ # semicolon list (S) '()' ++ # precedence (q) '|' # pipeline (|) AssignOp := '=' # assignment (=) WhiteSpace := ' ' ++ # blank (B) '\t' ++ # tab (B) '\v' ++ # vertical tab (B) '\r' ++ # linereturn (B) '\n' ++ # newline (B) '\f' # newpage (B) # low level parse WordChar := &cset -- WhiteSpace -- SEPARATOR -- AssignOp DollarNameChar := WordChar -- NONSEP AssignChar := '+-*' ++ '&|~' ++ ':' # intermediate level parse xWordChar := 'w' ++ 'qQ' # high level parse ListSeparator := "," FileNameChar := &letters ++ &digits ++ '/\\:.~ ' NumberChar := '.' ++ &digits #WordChar := &cset -- WhiteSpace -- '[,];' -- "=" -- '{}' --'<>' -- '()' #-----------------------------------------------------# #-----------------------------------------------------# # token types mkr_word := table() rdf_word := table() owl_word := table() cycl_word := table() dmoz_word := table() tap_word := table() cyc_word := table() BinaryRelation := table() # see binrel.icn #====================# # OpenCyc vocabulary # #====================# # Classes insert(cyc_word,"Thing","w") # existent insert(mkr_word,"Thing","w") # existent # Properties insert(cyc_word,"#$genlMt","R") # Mt1 #$genlMt Mt2 insert(cyc_word,"#$specMt","R") # Mt1 #$specMt Mt2 insert(mkr_word,"genlmt","R") # Mt1 #$genlMt Mt2 insert(mkr_word,"specmt","R") # Mt1 #$specMt Mt2 insert(mkr_word,"genlmt*","R") # Mt1 #$genlMt Mt2 insert(mkr_word,"specmt*","R") # Mt1 #$specMt Mt2 insert(mkr_word,"genlmt**","l") # Mt1 #$genlMt Mt2 insert(mkr_word,"specmt**","l") # Mt1 #$specMt Mt2 insert(mkr_word,"genlmt**?","?") # Mt1 #$genlMt Mt2 insert(mkr_word,"specmt**?","?") # Mt1 #$specMt Mt2 #====================# # RDF/OWL vocabulary # #====================# # Classes insert(owl_word,"Thing","w") # existent insert(owl_word,"Nothing","w") # nonexistent insert(owl_word,"owl:Thing","w") # existent insert(owl_word,"owl:Nothing","w") # nonexistent insert(owl_word,"owl:Class","w") # set of concepts insert(owl_word,"owl:DatatypeProperty","w") # attribute insert(owl_word,"owl:ObjectProperty","w") # relation part interaction insert(owl_word,"owl:Ontology","w") # view insert(owl_word,"owl:Restriction","w") # differentia insert(owl_word,"owl:FunctionalProperty","w") # subcharacteristic insert(owl_word,"owl:InverseFunctionalProperty","w") # subcharacteristic insert(owl_word,"owl:SymmetricProperty","w") # subcharacteristic insert(owl_word,"owl:TransitiveProperty","w") # subcharacteristic insert(owl_word,"owl:AllDifferent","w") # insert(owl_word,"owl:DeprecatedClass","w") # insert(owl_word,"owl:DeprecatedProperty","w") # # Properties insert(owl_word,"owl:sameAs","i") # is insert(owl_word,"owl:sameIndividualAs","i") # is insert(owl_word,"owl:equivalentClass","R") # maybe is insert(owl_word,"owl:equivalentProperty","R") # maybe is insert(owl_word,"owl:oneOf","R") # isany insert(owl_word,"owl:unionOf","R") # isunion insert(owl_word,"owl:intersectionOf","R") # isintersection insert(owl_word,"owl:complementOf","R") # is not insert(owl_word,"owl:inverseOf","R") # inverse insert(owl_word,"owl:onProperty","w") # insert(owl_word,"owl:cardinality","w") # insert(owl_word,"owl:maxCardinality","w") # insert(owl_word,"owl:minCardinality","w") # insert(owl_word,"owl:allValuesFrom","w") # insert(owl_word,"owl:someValuesFrom","w") # insert(owl_word,"owl:differentFrom","R") # insert(owl_word,"owl:disjointWith","R") # insert(owl_word,"owl:distinctMembers","R") # insert(owl_word,"owl:backwardCompatibleWith","R") # insert(owl_word,"owl:incompatibleWith","R") # insert(owl_word,"owl:priorVersion","w") # insert(rdf_word,"rdfs:Class","w") # set of concepts insert(rdf_word,"rdfs:Resource","w") # existent insert(rdf_word,"rdf:Property","w") # characteristic insert(rdf_word,"rdf:type","R") # has type= <=> isu insert(rdf_word,"rdfs:subClassOf","R") # iss* insert(rdf_word,"rdfs:subPropertyOf","R") # iss* insert(rdf_word,"rdfs:domain","w") # insert(rdf_word,"rdfs:range","w") # #================# # non separators # #================# insert(mkr_word,"?","?") # question insert(mkr_word,"/","/") # hierarchy,filename,HTML insert(mkr_word,"/","/") # hierarchy,filename,HTML #insert(mkr_word,"/","S") # hierarchy,filename,HTML (strong separator) insert(mkr_word,"\\","/") # hierarchy,filename,HTML insert(mkr_word,"$","$") # dollar variable #insert(mkr_word,".",".") # dot variable,number # assign words insert(mkr_word,":=","V") # production insert(mkr_word,"+:=","V") # production insert(mkr_word,"-:=","V") # production insert(mkr_word,"*:=","V") # production insert(mkr_word,"::","V") # named proposition insert(mkr_word,"=","=") # assignment,production insert(mkr_word,"+","+") # assignment,word insert(mkr_word,"-","-") # assignment,word insert(mkr_word,"*","*") # assignment,word, wildcard insert(mkr_word,":",":") # assignment,production, format, view insert(mkr_word,"*","*") # assignment,wildcard insert(mkr_word,"!","D") # sh command insert(mkr_word,"~","~") # assignment,not insert(mkr_word,"&","&") # assignment,and #============# # separators # #============# # quote, comment #insert(mkr_word,"\'","q") # squote for get_word() #insert(mkr_word,"\"","Q") # dquote for get_word() #insert(mkr_word,"(","b") # paren for get_word() #insert(mkr_word,")","b") # paren for get_word() #insert(mkr_word,"<","a") # angle for get_word() #insert(mkr_word,">","a") # angle for get_word() #insert(mkr_word,CommentCharacter,"c") # comment for get_word() # strong separators insert(mkr_word,",","S") # list of phrase insert(mkr_word,";","S") # list of proposition insert(mkr_word,"[","S") # list of phrase insert(mkr_word,"]","S") # list of phrase ###insert(mkr_word,"[","[") # list of phrase ###insert(mkr_word,"]","]") # list of phrase insert(mkr_word,"{","S") # list of proposition insert(mkr_word,"}","S") # list of proposition insert(mkr_word,"(","S") # precedence & quote insert(mkr_word,")","S") # precedence & quote insert(mkr_word,"<","S") # HTML & multi-line quote insert(mkr_word,">","S") # HTML & multi-line quote #============# # whitespace # #============# insert(mkr_word," ","B") # blank insert(mkr_word,"\t","B") # tab #insert(mkr_word,"\v","B") # vertical tab insert(mkr_word,"\r","B") # linereturn insert(mkr_word,"\n","B") # newline insert(mkr_word,"\f","B") # newpage #=======# # words # #=======# # generic names for grammar examples insert(mkr_word,"proposition","w") insert(mkr_word,"production","w") insert(mkr_word,"sentence","w") insert(mkr_word,"statement","w") insert(mkr_word,"question","w") insert(mkr_word,"command","w") # do command od arg done; insert(mkr_word,"arg","w") # do command od arg done; ####insert(mkr_word,"relverb","B") # subject relverb object; insert(mkr_word,"subject","w") insert(mkr_word,"verb","w") insert(mkr_word,"object","w") insert(mkr_word,"preposition","P") # action insert(mkr_word,"do","D") # action insert(mkr_word,"ido","D") # interaction insert(mkr_word,"DO","D") # axiomatic level insert(mkr_word,"!","D") # UNIX shell insert(mkr_word,"can","D") # capability ###insert(mkr_word,"do*","D") # capability insert(mkr_word,"vdo","D") # all views insert(mkr_word,"hdo","D") # hierarchy insert(mkr_word,"done","d") # preposition insert(mkr_word,"at","A") # context ==> use "A" #insert(mkr_word,"at","P") # context ==> use "A" insert(mkr_word,"out","P") # product insert(mkr_word,"of","P") # domain (part) insert(mkr_word,"with","P") # modifier (also definition) insert(mkr_word,"od","P") # direct object insert(mkr_word,"from","P") # initial insert(mkr_word,"to","P") # final insert(mkr_word,"in","P") # index # hierarchy insert(mkr_word,"is","i") # alias (also definition) insert(mkr_word,"isu","i") # unit isu genus (also definition) insert(mkr_word,"iss","i") # species iss genus (also definition) insert(mkr_word,"isa","i") # unit|species isa genus (also definition) insert(mkr_word,"isp","R") # genus isp unit insert(mkr_word,"isg","R") # genus isg species insert(mkr_word,"isc","R") # genus isc unit|species insert(mkr_word,"isa+","R") # 1 or more levels insert(mkr_word,"isc+","R") # 1 or more levels insert(mkr_word,"is*","i") # 0 or more levels insert(mkr_word,"isa*","R") # 0 or more levels insert(mkr_word,"isc*","R") # 0 or more levels insert(mkr_word,"iss*","R") # 0 or more levels insert(mkr_word,"isg*","R") # 0 or more levels insert(mkr_word,"isu*","R") # 0 or more levels insert(mkr_word,"isp*","R") # 0 or more levels insert(mkr_word,"isa**","l") # followed by integer insert(mkr_word,"isc**","l") # followed by integer insert(mkr_word,"isa**?","?") # question verb insert(mkr_word,"isc**?","?") # question verb # attribute insert(mkr_word,"iswith","R") # differentia insert(mkr_word,"has","H") insert(mkr_word,"HAS","l") # part insert(mkr_word,"haspart","H") insert(mkr_word,"isapart","H") insert(mkr_word,"haspart*","H") insert(mkr_word,"isapart*","H") # relation insert(mkr_word,"rel","L") insert(mkr_word,"nrel","w") insert(mkr_word,"trel","L") insert(mkr_word,"brel","L") insert(mkr_word,"urel","L") insert(mkr_word,"isin","R") # assignment insert(mkr_word,"let","S") insert(mkr_word,"vlet","S") insert(mkr_word,"unlet","S") # NSM concepts insert(mkr_word,"causes","B") # cause-effect insert(mkr_word,"because","B") # NSM effect-cause insert(mkr_word,"like","R") # NSM similarity insert(mkr_word,"happens","D") # NSM happen insert(mkr_word,"happensod","B")# NSM happen insert(mkr_word,"before","B") # NSM time insert(mkr_word,"after","B") # NSM time insert(mkr_word,"above","B") # NSM space insert(mkr_word,"below","B") # NSM space insert(mkr_word,"beside","B") # NSM space insert(mkr_word,"inside","B") # NSM space insert(mkr_word,"outside","B") # NSM space insert(mkr_word,"causes*","B") # cause-effect insert(mkr_word,"because*","B") # effect-cause insert(mkr_word,"like*","R") # NSM similarity insert(mkr_word,"happensod*","B")# NSM happen insert(mkr_word,"before*","B") # time insert(mkr_word,"after*","B") # time insert(mkr_word,"above*","B") # space insert(mkr_word,"below*","B") # space insert(mkr_word,"beside*","B") # space insert(mkr_word,"inside*","B") # space insert(mkr_word,"outside*","B") # space # NSM view #insert(mkr_word,"maybe","w") # view # generator verb insert(mkr_word,"in","P") # x in concept list # exgroup, ingroup insert(mkr_word,"isalt","R") # alternative isalt exgroup insert(mkr_word,"isany","R") # exgroup isany alternative insert(mkr_word,"isall","R") # ingroup isall member insert(mkr_word,"ismem","R") # member ismem ingroup # for OWL insert(mkr_word,"isand","R") # intersection (requisite) insert(mkr_word,"ismem","R") # intersection (requisite) insert(mkr_word,"isor","R") # union insert(mkr_word,"isxor","R") # disjoint union insert(mkr_word,"xor","R") # differentFrom insert(mkr_word,"ismem","R") # union insert(mkr_word,"isand","R") # restriction ??? insert(mkr_word,"ismem","R") # restriction ??? insert(mkr_word,"isnon","R") # complement wrt genus insert(mkr_word,"isnot","R") # complement wrt existent insert(mkr_word,"inverse","R") # inverse insert(mkr_word,"isalt*","R") # alternative isalt* exgroup insert(mkr_word,"isany*","R") # exgroup isany* alternative insert(mkr_word,"isall*","R") # ingroup isall* member insert(mkr_word,"ismem*","R") # member ismem* ingroup # concept formation insert(mkr_word,"isd","i") # differentiate insert(mkr_word,"isi","i") # integrate insert(mkr_word,"means","R") insert(mkr_word,"isref","R") # is referent of insert(mkr_word,"means*","R") insert(mkr_word,"isref*","R") # groups insert(mkr_word,"begin","Y") # group definition insert(mkr_word,"end","Z") # group definition insert(mkr_word,"hierarchy","h") # hierarchy,lattice insert(mkr_word,"unithierarchy","h") # unithierarchy (backwards compatibility) insert(mkr_word,"relation","r") # relation insert(mkr_word,"group","w") # group,triple,mcf,... # control structure insert(mkr_word,"exit","z") # exit KE insert(mkr_word,"break","z") # exit every,while,until,when insert(mkr_word,"return","S") # return Product=value; insert(mkr_word,"suspend","S") # suspend Product=value; (Unicon generator) insert(mkr_word,"if","I") # conditional insert(mkr_word,"then","T") # conditional insert(mkr_word,"else","E") # conditional insert(mkr_word,"fi","F") # conditional insert(mkr_word,"every","G") # iteration insert(mkr_word,"while","I") # iteration insert(mkr_word,"until","I") # iteration insert(mkr_word,"when","I") # monitor events insert(mkr_word,"iff","J") # conjunction insert(mkr_word,"|","|") # conjunction: pipeline - treat like ";" # other words # NSM words insert(mkr_word,"good","w") # NSM evaluator insert(mkr_word,"bad","w") # NSM evaluator insert(mkr_word,"big","w") # NSM descriptor insert(mkr_word,"small","w") # NSM descriptor insert(mkr_word,"very","Q") # NSM intensifier insert(mkr_word,"more","Q") # NSM augmentor insert(mkr_word,"near","w") # NSM space distance insert(mkr_word,"far","w") # NSM space distance insert(mkr_word,"long","w") # NSM time duration insert(mkr_word,"short","w") # NSM time duration #insert(mkr_word,"some","Q") # NSM time duration insert(mkr_word,"and","j") # logic insert(mkr_word,"or","j") # logic insert(mkr_word,"not","N") # complement wrt existent insert(mkr_word,"non","N") # complement wrt genus insert(mkr_word,"a","Q") # quantifier insert(mkr_word,"all","Q") # ingroup quantifier insert(mkr_word,"any","Q") # exgroup quantifier insert(mkr_word,"either","Q") # exgroup quantifier insert(mkr_word,"no","Q") # quantifier insert(mkr_word,"some","Q") # quantifier insert(mkr_word,"the","Q") # quantifier ##insert(mkr_word,"exists","G") # KIF first order logic insert(mkr_word,"forall","G") # KIF first order logic insert(mkr_word,"forany","G") # MKR first order logic insert(mkr_word,"forSome","G") # OWL first order logic insert(mkr_word,"forAll","G") # OWL first order logic insert(mkr_word,"implies","J") # logic #####insert(mkr_word,"|-","J") # implies #insert(mkr_word,"delete","w") # do delete ... done # iQ,iG words for reparsing compound statements #insert(mkr_word,"iseither","R") #insert(mkr_word,"isthe","R") # ig,ih,ir words for exgroup and ingroup #insert(mkr_word,"isconcept","R") #insert(mkr_word,"isset","R") #insert(mkr_word,"islist","R") #insert(mkr_word,"issequence","R") #insert(mkr_word,"isrelation","R") #insert(mkr_word,"ishierarchy","R") #insert(mkr_word,"islattice","R") #insert(mkr_word,"isdirectory","R") #insert(mkr_word,"isconcept","R") # ad-hoc additions for parsing English phrases #============================================= insert(mkr_word,"for","j") # conjunction insert(mkr_word,"vs.","j") # conjunction # end mkr_word #-----------------------------------------------------# #-----------------------------------------------------# init_keyword() # token.icn init_gtype() # token.icn init_quantifier() # token.icn init_variable() # token.icn init_parameter() # param.icn init_command() # command.icn init_char() # char.icn # ignore hidden words when dumping concepts every sep := string(!SEPARATOR) do insert(HIDDEN,sep) delete(HIDDEN,".") every ws := !WhiteSpace do insert(HIDDEN,ws) every word := key(mkr_word) do insert(HIDDEN,word) delete(HIDDEN,"let") delete(HIDDEN,"vlet") delete(HIDDEN,"unlet") every param := !PARAMETER do insert(HIDDEN,param) every cmd := ! COMMAND do insert(HIDDEN,cmd) # mental actions - know,believe,... # action object is (may be) proposition list # kaction is action with ktype=kt # declared by tabrasa.def, user kaction_set := set() # identification actions # <== no longer used identify_set := set([ "identify","perceive","classify","measure","define", "see","hear","touch","smell","taste" ]) # initialize rdf map #####rdf2mkr("rdf:type") end procedure declare_word() #======================= # declare special words # called by initialize_knit() in knit.icn add_separator() # token.icn add_keyword() # token.icn add_quantifier() # token.icn add_gtype() # token.icn add_variable() # token.icn add_parameter() # param.icn add_command() # command.icn end #===================================================================# #===================================================================# # KFORMAT == "ku" # #===================================================================# ###### TOKEN #####procedure m_endtoken() ######===================== ###### block end for hierarchy|relation|directory #####suspend \ ##### TOKEN("Z",="end") # begin end #####end # TOKEN list procedure m_kulist() #=================== # kulist ::= # kutoken # kutoken kulist suspend [m_kutoken()] | [m_kutoken()] ||| m_kulist() end # TOKEN procedure m_kutoken() #==================== # intermediate level parse # NOTE: "q","Q","w" included in xword ("$" ???) # NOTE: map_token() changes "x" to "w" # NOTE: map_token() deletes comment "c" # NOTE: map_token() deletes whitespace "B" suspend \ TOKEN("?", ( ="?" )) | # question variable TOKEN("/", m_slash()) | # hierarchy, HTML end #TOKEN("$", m_dollarvar()) | # dollar variable #TOKEN("$", ( ="$w" )) | # dollar variable #TOKEN("$", ( ="${w}" )) | # dollar variable #TOKEN("$", ( ="$R" )) | # dollar variable TOKEN("$", ( ="$" )) | # dollar variable #TOKEN("n", m_number()) | # integer or real #TOKEN(".", m_dotvar()) | # dot pronoun (included in $variable) #TOKEN(".", ( ="..." )) | # dot pronoun (included in $variable) #TOKEN(".", ( =".." )) | # dot pronoun (included in $variable) #TOKEN(".", ( ="." )) | # dot pronoun (included in $variable) ###TOKEN("h", ( ="h" )) | # HTML command TOKEN("c", ( ="c" )) | # comment #... TOKEN("B", ( ="B" )) | # whitespace TOKEN(",", ( ="," )) | # list TOKEN(";", ( =";" )) | # list TOKEN("!", ( ="!" )) | # sh command TOKEN("S", ( ="S" )) | # strong separators ###TOKEN("[", ( ="[" )) | # strong separators ###TOKEN("]", ( ="]" )) | # strong separators TOKEN("<", ( ="<" )) | # strong separators TOKEN(">", ( =">" )) | # strong separators TOKEN("|", ( ="|" )) | # strong separators #TOKEN("=", m_nvsep()) | # assignment TOKEN("V", ( ="V" )) | # production := TOKEN("=", ( ="::=" )) | # BNF production ::= TOKEN("=", ( ="+=" )) | # assignment += TOKEN("=", ( ="-=" )) | # assignment -= TOKEN("=", ( ="*=" )) | # assignment *= TOKEN("=", ( ="=" )) | # assignment = TOKEN("x", m_xword()) | # (wqQ) TOKEN("x", ( ="+" )) | # weak separator TOKEN("x", ( ="-" )) | # weak separator TOKEN("x", ( ="*" )) | # weak separator TOKEN("x", ( =":" )) | # weak separator TOKEN("q", ( ="q" )) | # squote '...' TOKEN("Q", ( ="Q" )) | # dquote "..." TOKEN("x", ( ="b" )) | # paren (...) TOKEN("x", ( ="a" )) | # angle <...> TOKEN("Y", ( ="Y" )) | # begin group TOKEN("Z", ( ="Z" )) | # end group TOKEN("z", ( ="z" )) | # exit MKE TOKEN("x", ( ="w" )) | # word TOKEN("w", ( ="" )) | # empty string TOKEN("U", m_byte()) # anything else is unknown end #===================================================================# #===================================================================# # KFORMAT == "ho" | "hounit" #===================================================================# # TOKEN list procedure m_holist() #=================== # holist ::= # hotoken # hotoken holist suspend \ [m_hotoken()] | [m_hotoken()] ||| m_holist() end # TOKEN procedure m_hotoken(sep) #======================= # hotoken ::= # endtoken <<== obsolete -- in kutoken # holevel <<== obsolete -- do in symbol.icn # hophrase <<== obsolete -- do in symbol.icn # kutoken /sep := dequote(HOSEPARATOR) suspend \ #TOKEN("Z",="end") | #TOKEN("/",m_holevel(sep)) | #TOKEN("h",m_hophrase(sep)) | m_kutoken() end #===================================================================# #===================================================================# # KFORMAT == "dir" #===================================================================# # TOKEN list procedure m_dirlist() #==================== # dirlist ::= # dirtoken # dirtoken dirlist suspend \ [m_dirtoken()] | [m_dirtoken()] ||| m_dirlist() end # TOKEN procedure m_dirtoken(sep) #======================= # dirtoken ::= # endtoken # dirword # kutoken suspend \ TOKEN("Z",="end") | TOKEN("d",m_dirword()) | m_kutoken() end # string procedure m_dirword() #===================# static wordchar initial { wordchar := &cset wordchar --:= '/' # no hierarchy separators wordchar --:= '!=' # allow commands & assignments } suspend \ tab(many(wordchar)) end #===================================================================# #===================================================================# # KFORMAT == "nrel" #===================================================================# # TOKEN list procedure m_rellist() #==================== # rellist ::= # reltoken # reltoken rellist suspend [m_reltoken()] | [m_reltoken()] ||| m_rellist() end # TOKEN procedure m_reltoken() #===================== # reltoken ::= # endtoken <<== obsolete -- in kutoken # relsep <<== obsolete -- in kutoken # relphrase <<== obsolete -- do in symbol.icn # kutoken suspend \ #TOKEN("Z",="end") | #TOKEN(";",m_relsep()) | #TOKEN("r",m_relphrase()) | m_kutoken() end # string procedure m_relsep(sep) #====================== /sep := dequote(RELSEPARATOR) suspend \ =sep end #===================================================================# #===================================================================# # KFORMAT == "nv" # now same rules as "ku" #===================================================================# #===================================================================# #===================================================================# #===================================================================# procedure init_keyword() #======================= CONTROL:= set([ "begin", "end", "exit", "break", "every","done", "while","until", "when", "forall","exists", "if","then","else","fi" ]) VERB := set([ "inverse", # for CycL "genlmt","genlmt+","genlmt*","genlmt**","genlmt**?", "specmt","specmt+","specmt*","specmt**","specmt**?", # for OWL "isand", # intersection "isor", # union "isxor", # disjoint union "xor", # disjoint # for NSM "before","before*", "after","after*", "above","above*", "below","below*", "beside","beside*", "inside","inside*", "outside","outside*", "happensod","happensod*", "like", "like*", # NSM # MKR "IS", "ISA", "ISC", "HAS", "DO", "isa","isa+","isa*","isa**","isa**?", "isc","isc+","isc*","isc**","isc**?", "isu","isu*","isu**","isu**?", "isp","isp*","isp**","isp**?", "iss","iss*","iss**","iss**?", "isg","isg*","isg**","isg**?", "ismem","ismem*", "isall","isall*", "isalt","isalt*", "isany","isany*", "is","is*","iswith", "isd", "isi", "rel","nrel","trel","brel","urel", "has","can", "haspart","haspart*", "isapart","isapart*", "do","can","vdo","hdo", "ido", "happens", "let","vlet","unlet", "means", "means*", "isref", "isref*", # iQ words for reparsing compound statements #"iseither", #"isno", #"issome", #"isthe" ]) PREPOSITION := set([ "in", "at", # context "out", # product "of", # part "with", # characteristic "od", # direct object "from", # initial characteristic "to" # final characteristic ]) OPERATOR := set([ "=", # name = value "+=", # name op value "-=", # name op value "*=", # name op value ":=", # product := producer "+:=", # product prodop producer "-:=", # product prodop producer "*:=", # product prodop producer "::=" # BNF grammar ]) CONJUNCTION := set([ "iff", "implies", # "|-" "supports", # "|=" situation theory notation "causes", "causes*", # NSM "because", "because*" # NSM ]) KEYWORD := CONTROL ++ VERB ++ PREPOSITION ++ OPERATOR ++ CONJUNCTION end procedure init_quantifier() #========================== QUANT := set([ "a", "no", "some", "the" ]) groupQUANT := set([ "either", "all", "any" ]) QUANTIFIER := QUANT ++ groupQUANT end procedure init_gtype() #===================== xxGROUP := set([ "exgroup","ingroup", "concept" ]) INGROUP := set([ "list", "requisite", "sequence", "set", "intersection", # OWL "LATTICE" ]) EXGROUP := set([ "enum", # OWL oneOf "union" # OWL ]) LATTICE := set([ "hierarchy", "lattice" ]) GTYPE := xxGROUP ++ INGROUP ++ EXGROUP ++ LATTICE end procedure init_variable() #======================== # context-dependent variable keVARIABLE := set([ "Sentence", #####"Statement", "Question", "Command", "Assignment", "If", "Every" ]) PRONOUN := set([ ".","..","...", # pronoun "I","we", "you", "he","she","they", "it", "none", "someone", # NSM "something", # NSM "people" # NSM ]) VARIABLE := keVARIABLE ++ PRONOUN end procedure add_separator() #======================== local sep,whitespace initial { } # end initial new_concept("separator",,"separator") add_species("separator","symbol") whitespace := ["blank","tab","vertical tab","line return","newline","newpage"] every sep := !whitespace do { add_unit(sep,"separator") } #####add_alias(" ","blank") #####add_alias("\t","tab") #####add_alias("\v","vertical tab") #####add_alias("\r","line return") #####add_alias("\n","newline") #####add_alias("\f","newpage") # $. are filed as variables, not separators every sep := ! (SEPARATOR -- '$.') do { add_unit(string(sep),"separator") } #####add_alias("sharp","#") #####add_alias("dquote","\"") #####add_alias("squote","\'") #####add_alias("lparen","(") #####add_alias("rparen",")") #####merge_alias(set([",","comma"])) #####merge_alias(set([";","semicolon"])) #####merge_alias(set(["&","and"])) #####merge_alias(set(["|","or"])) #####merge_alias(set(["~","not"])) # complement wrt existent #####merge_alias(set(["~","non"])) # complement wrt genus end procedure add_keyword() #====================== local kw add_species("symbol","OBJECT") add_species("word","symbol") add_species("verb","word") every kw := ! VERB do { add_unit(kw,"verb") insert(HIDDEN,kw) } add_species("conjunction","word") every kw := ! CONJUNCTION do { add_unit(kw,"conjunction") insert(HIDDEN,kw) } add_species("preposition","word") every kw := ! PREPOSITION do { add_unit(kw,"preposition") insert(HIDDEN,kw) } add_species("separator","symbol") every kw := ! OPERATOR do { add_unit(kw,"separator") insert(HIDDEN,kw) } add_species("control","word") every kw := ! CONTROL do { add_unit(kw,"control") insert(HIDDEN,kw) } merge_alias(set(["forall","forAll"]),"forAll") # use OWL name merge_alias(set(["exists","forSome"]),"forSome") # use OWL name #####merge_alias(set(["isc","iseither"])) #####merge_alias(set(["|=","supports"])) #####merge_alias(set(["|-","implies"])) end procedure add_quantifier() #========================= local kw new_concept("quantifier",,"quantifier") add_species("quantifier","word") every kw := ! QUANTIFIER do { add_unit(kw,"quantifier") insert(HIDDEN,kw) } end procedure add_gtype() #==================== local kw every kw := ! xxGROUP do { add_species(kw,"group") insert(HIDDEN,kw) } every kw := ! INGROUP do { add_species(kw,"ingroup") insert(HIDDEN,kw) } every kw := ! EXGROUP do { add_species(kw,"exgroup") insert(HIDDEN,kw) } every kw := ! LATTICE do { add_species(kw,"LATTICE") insert(HIDDEN,kw) } #####add_alias("enum","enum") #####add_alias("req","requisite") #####add_alias("seq","sequence") end procedure add_variable() #======================= # context-dependent variable local var,pro # declare variables every var := ! keVARIABLE do { add_species(var,"attribute") # var iss variable #put_char("attr","ke",var,[]) # ke has var insert(HIDDEN,var) } add_species("pronoun","attribute") every pro := ! PRONOUN do { add_species(pro,"pronoun") # pro iss pronoun put_ctype(pro,"pronoun") # pro has ctype=pronoun #put_char("attr","ke",pro,[]) # ke has pronoun insert(HIDDEN,var) } end #===================================================================# #===================================================================# #===================================================================# # TOKEN procedure copy_token(tok) #======================== local newtok newtok := TOKEN(tok.ttype,tok.tvalue) return newtok end # list of TOKEN procedure copy_tlist(tok) #======================== local newtok newtok := [] every put(newtok,copy_token(!tok)) return newtok end #------------------------------------------------------------- # TOKEN list procedure get_token(fd,ps,option) #================================ # called by parse_file() in symbol.icn # suspend TOKEN list for WORD list local m_token local t local line,wordlist,wline,wlist local wtok,tok local i,j,k,y,z local ij,tline local KHRV,VD local word,token local prog,info,error,warning,ierror initial { /DOT1 := "existent" /VIEW := "tabula rasa" /KFORMAT := "ku" /HOSEPARATOR := "/" /RELSEPARATOR := "," /NVSEPARATOR := "=" /PIPESEPARATOR := "|" } /fd := myin /ps := "ke$ " /option := "" #------------------------------------------------------------# every word := get_word(fd,ps,option) do { wordlist := [] case t := type(word) of { default: { writes_type(myerr,word,ierror||"unexpected type word") writes_type(mylog,word,ierror||"unexpected type word") fail } "WORD": { put(wordlist,word) } "list": { wordlist |||:= word if *word = 0 then { if DEBUG == "NULL" then writes_type_all(word,ierror||"empty list word") #####fail } } } # end case t DOTCONCEPT := ".="||DOT1 VD := VIEW||":"||DOTCONCEPT KHRV := KFORMAT||HOSEPARATOR||RELSEPARATOR||NVSEPARATOR prog := "get_token("||KHRV||"): " info := "INFO: "||prog error := "ERROR: "||prog warning := "WARNING: "||prog ierror := "Internal ERROR: "||prog line := unparse(wordlist,"") # blanks are still in wordlist if DEBUG == ("WORD"|"TOKEN"|"PARSE") then { writes_type(mybug,line,info||"input line") writes_type(mybug,wordlist,info||"input wordlist") writes_type(mylog,line,info||"input line") writes_type(mylog,wordlist,info||"input wordlist") } if *line = 0 then next #####if *line = 0 then fail tok := [TOKEN("X",line)] # unknown token if no match case KFORMAT of { "list": { m_token := m_kulist } "ku": { m_token := m_kulist } "cu": { m_token := m_kulist } "ho": { m_token := m_kulist } # was m_holist "hounit":{ m_token := m_kulist } # was m_holist "dir": { m_token := m_kulist } # was m_dirlist "group": { m_token := m_kulist } # was m_rellist "nrel": { m_token := m_kulist } # was m_rellist "nv": { m_token := m_kulist } "kb": { m_token := m_kulist } "html": { m_token := m_kulist } "htm": { m_token := m_kulist } "xml": { m_token := m_kulist } "rdf": { m_token := m_kulist } "owl": { m_token := m_kulist } "nt": { m_token := m_kulist } "mcf": { m_token := m_kulist } default: { writes_type(mylog,KFORMAT,warning||"unknown kformat") KFORMAT := "ku" m_token := m_kulist } } # end case KFORMAT wline := "" wlist := [] every word := !wordlist do { if DEBUG == ("WORD"|"TOKEN"|"PARSE") then { writes_type_all(word,info||"input word") } wline ||:= word.wtype put(wlist,word.wvalue) } # end every word if DEBUG == ("WORD"|"TOKEN"|"PARSE") then { writes_type_all(wline,info||"input wline") #writes_type_all(wlist,info||"input wlist") } if wline ? { ( wtok <- m_token() ) & pos(0) } then { if DEBUG==("WORD"|"TOKEN"|"PARSE") then { writes_type(mybug,wtok,info||"parse result wtok") writes_type(mylog,wtok,info||"parse result wtok") } j := 0 every i := 1 to *wtok do { j +:= 1 ij := "(i="||i||",j="||j||")" y := wtok[i].tvalue z := wlist[j] case *y of { 0: { writes_type(mylog,wline,ierror||"zero length tvalue"||ij) } 1: { } default: { # composite words if DEBUG==("MAP"|"WORD"|"TOKEN") then { writes_type(mybug,y,info||"composite y"||ij) writes_type(mylog,y,info||"composite y"||ij) } every k := 2 to *y do { j +:= 1 z ||:= wlist[j] } # end every k } # end default } # end case *y wtok[i].tvalue := z } # end every i tok := map_token(wtok) tline := token2string(tok) if DEBUG==("WORD"|"TOKEN"|"PARSE") then { writes_type(mylog,tok,info||"map result tok") writes_type(mylog,tline,info||"map result tline") } } else { KERROR +:= 1 writes_type(myerr,line,error||"unknown token") writes_type(mylog,line,error||"unknown token") #####OLDcomplete("_UnknownToken_") # token.icn } # end if wline ? { } if DEBUG == ("WORD"|"TOKEN"|"PARSE") then { writes_type(mybug,tok,info||"output tokenlist tok") writes_type(mylog,tok,info||"output tokenlist tok") } case ParserMode of { default: { writes_type(myerr,ParserMode,ierror||"unexpected ParserMode") writes_type(mylog,ParserMode,ierror||"unexpected ParserMode") fail } "word": { every token := !tok do suspend token } "line": { suspend tok } } } # end every word #----------------------------------------------------------# end #==========================================================# #==========================================================# # integer procedure NEWcomplete(word) #========================== # NOTE: map_token() no longer calls NEWcomplete() # for begin,end (allows immediate processing # of relation tuples & hierarchy info.) local x static nesting,html_nest static BeginEnd static ku_beginend,ku_every,ku_brace static html_beginend,xml_beginend static info initial { info := "# INFO: NEWcomplete: " BeginEnd := 0 ku_beginend := 0 # begin gtype gname; ... end gtype gname; ku_every := 0 # every sentence; { ... }; ku_brace := 0 # { ... } html_beginend := 1 # < ... > xml_beginend := 0 # <tag> content </tag> nesting := [] # stack html_nest := [] # stack } # check nesting #--------------# # called by map_token() in token.icn word # called by parse_file() in symbol.icn "_count_" # called by map_token() in token.icn "_group_" # called by map_token() in token.icn "_begin_" # called by map_token() in token.icn "_end_" case word of { "_count_": { if ku_beginend > 0 then return 1 else if ku_every > 0 then return 1 else if ku_brace > 0 then return 1 #else if html_beginend > 0 then # return 1 #else if xml_beginend > 0 then # return 1 else return *nesting } "_html_": { return html_beginend } "_html_reset_": { html_nest := []; html_beginend := 1 } "_group_": { return BeginEnd } # > 0 means in begin end group "_begin_": { BeginEnd +:= 1; return BeginEnd } "_end_": { BeginEnd -:= 1; return BeginEnd } ("_start_"|"_restart_"): { ###BeginEnd := 0 # finish begin-end group nesting := [] ###html_nest := [] ku_beginend := 0 ku_every := 0 ku_brace := 0 ###html_beginend := 1 xml_beginend := 0 return *nesting } default: { } # continue below } # end case word if ku_beginend > 0 then { # inside begin-end group if word == "end" then { } else return *nesting } #####if html_beginend > 0 then { ##### # inside < > ##### if word == ">" then { } else ##### return *nesting #####} if DEBUG=="COMPLETE" then { writes_type(mybug,nesting,info||"before update: nesting") writes_type(mybug,word,info||"word") writes_type(mylog,nesting,info||"before update: nesting") writes_type(mylog,word,info||"word") } # update nesting #---------------# case KFORMAT of { "list": { } ("ku"|"cu"): { case word of { #"_start_": { push(nesting,word) } #"_restart_": { push(nesting,word) } #";": { popcheck("_start_",word,nesting) } "if": { push(nesting,word) } "fi": { popcheck("if",word,nesting) } ( "every"|"while"|"until"|"when"| "forall"|"forAll"| "exists"|"forSome" ): { ku_every +:= 1 push(nesting,"every") } "do": { push(nesting,word) } "DO": { push(nesting,"do") } "can": { push(nesting,"do") } "vdo": { push(nesting,"do") } "hdo": { push(nesting,"do") } # hwalk "ido": { push(nesting,"do") } # hwalk "!": { push(nesting,"do") } # shell command (! ... done;) "happens": { push(nesting,"do") } # interaction "done": { popcheck("do",word,nesting) } "[": { push(nesting,word) } "]": { popcheck("[",word,nesting) } "{": { if ku_every > 0 then { popcheck("every",word,nesting) ku_every -:= 1 } ku_brace +:= 1 push(nesting,word) } "}": { ku_brace -:= 1 popcheck("{",word,nesting) } "(": { push(nesting,word) } ")": { popcheck("(",word,nesting) } "<": { push(nesting,word) } #####">": { popcheck("</",word,nesting) } ">": { popcheck("<",word,nesting) } "begin": { ku_beginend +:= 1 } #; push(nesting,word) } "end": { ku_beginend -:= 1 } #; popcheck("begin",word,nesting) } } # end case word } ("ho"|"hounit"|"nrel"|"nt"|"mcf"): { case word of { "begin": { ku_beginend +:= 1 } #; push(nesting,word) } "end": { ku_beginend -:= 1 } #; popcheck("begin",word,nesting) } } # end case word } ("html"|"htm"|"xlm"|"rdf"|"daml"|"owl"): { ##case word of { ##"<": { html_beginend +:= 1; push(html_nest,word) } ##">": { html_beginend -:= 1; popcheck("</",word,html_nest) } ##"/": { html_beginend -:= 1; popcheck("/",word,html_nest) } ##} # end case word } } # end case KFORMAT if DEBUG=="COMPLETE" then { writes_type(mybug,nesting,info||"after update: nesting") writes_type(mylog,nesting,info||"after update: nesting") } return *nesting end procedure popcheck(nestbegin,nestend,nest) #========================================= local xbegin,found,lookfor static info,popfail,mismatch initial { info := "# INFO: popcheck: " popfail := "# WARNING: popcheck: empty nesting stack: " mismatch := "# WARNING: popcheck: mismatch: " } if DEBUG=="COMPLETE" then { writes_type_all(nestbegin,info||"nestbegin") writes_type_all(nestend,info||"nestend") } lookfor := nestbegin||nestend if xbegin := pop(nest) then { } else { case lookfor of { "": { } "<>": { } # rdfs:comment = <...>; ??? "()": { } "[]": { } "{}": { } default: { writes_type_all(lookfor,popfail||"looking for") } } # end case lookfor NEWcomplete("_restart_") return } if DEBUG=="COMPLETE" then { writes_type_all(xbegin,info||"xbegin") } found := xbegin||nestend if found == lookfor then { } else { if DEBUG=="COMPLETE" then { write(myerr,mismatch,"looking for ",lookfor," found "||found) write(mylog,mismatch,"looking for ",lookfor," found "||found) } if lookfor == "</>" then { # wait for next HMTL group } else { NEWcomplete("_restart_") } } end #==========================================================# # integer procedure OLDcomplete(word) #======================= # set syntactic group count # begin - end # if - fi # do - done # every # [ ] { } < > ( ) local gcount local iinfo static info,ierror static beginend, iffi, dodone, everydo, whendo, whiledo, untildo static bracket, brace, angle, paren static htmlgroup static semicolon initial { info := "INFO: OLDcomplete: " ierror := "Internal ERROR: OLDcomplete: " beginend := integer(0) iffi := integer(0) dodone := integer(0) everydo := integer(0) whendo := integer(0) whiledo := integer(0) untildo := integer(0) bracket := integer(0) brace := integer(0) angle := integer(0) paren := integer(0) htmlgroup := integer(0) #semicolon := integer(1) } # end initial iinfo := info||" word <"||word||"> " # do initial count #-----------------# case KFORMAT of { default: { gcount := 0 } "list": { } ("ku"|"cu"): { gcount := iffi + everydo + whendo + whiledo + untildo + dodone + bracket + brace + angle + paren + angle # + semicolon } ("ho"|"hounit"|"nrel"): { gcount := beginend } ("html"|"htm"|"xlm"|"rdf"): { gcount := angle } } # end case KFORMAT if DEBUG == "COMPLETE" then { writes_type(mybug,gcount,iinfo||"initial gcount") writes_type(mylog,gcount,iinfo||"initial gcount") } # do reset #---------# case word of { default: { } # continue ( "_TabulaRasaComplete_"| # initialize_tabrasa() in init.icn "_InitializationComplete_"| # main() in ke.icn,ksc.icn,tap2mkr.icn "_BeginRead_"| # command() in command.icn "_EndRead_"| # command() in command.icn "_BeginGroup_"| # init.icn "_EndGroup_"| # init.icn "_GroupError_"| # init.icn "_SyntaxError_" # get_symbol() in symbol.icn #"_LineComplete_"| # continue_token(), main() #"_UnknownToken_" # get_token() in token.icn ): { writes_type(mylog,word,info||"reset word") beginend := integer(0) iffi := integer(0) dodone := integer(0) everydo := integer(0) whendo := integer(0) whiledo := integer(0) untildo := integer(0) bracket := integer(0) brace := integer(0) angle := integer(0) paren := integer(0) htmlgroup := integer(0) ##semicolon := integer(1) } # end reset } # end case # update count #-------------# case KFORMAT of { "list": { } ("ku"|"cu"): { case word of { "if": { iffi +:= 1 } "fi": { iffi -:= 1 } "every": { everydo +:= 1 } "when": { whendo +:= 1 } "while": { whiledo +:= 1 } "until": { untildo +:= 1 } ("do"| "DO"| "!"| "can"| "vdo"| "hdo"| "ido"): { dodone +:= 1 } "happens": { dodone +:= 1 } "done": { dodone -:= 1 } "[": { bracket +:= 1 } "]": { bracket -:= 1 } "{": { brace +:= 1 if everydo > 0 then everydo -:= 1 if whendo > 0 then whendo -:= 1 if whiledo > 0 then whiledo -:= 1 if untildo > 0 then untildo -:= 1 } "}": { brace -:= 1 } "(": { paren +:= 1 } ")": { paren -:= 1 } ##";": { semicolon -:= 1 } "<": { angle +:= 1 } ##; semicolon := 0 } ">": { angle -:= 1 } } # end case word gcount := iffi + everydo + whendo + whiledo + untildo + dodone + bracket + brace + paren + angle # + semicolon } ("ho"|"hounit"|"nrel"): { case word of { "begin": { beginend +:= 1 } "end": { beginend -:= 1 } } # end case word gcount := beginend } ("html"|"htm"|"xlm"|"rdf"): { case word of { "<": { angle +:= 1 } ">": { angle -:= 1 } } # end case word gcount := angle } } # end case KFORMAT #####case word of { #####"htmlbegin": { htmlgroup +:= 1 } #####"htmlend": { htmlgroup -:= 1 } #####} # end case word #####gcount := htmlgroup if DEBUG == "COMPLETE" then { writes_type(mybug,gcount,iinfo||"final gcount") writes_type(mylog,gcount,iinfo||"final gcount") } return gcount end #==========================================================# #==========================================================# # TOKEN procedure OLDcontinue_token(linetoken,infd,continue) #================================================ # called from get_token() in token.icn # automatic continuation for beginning & middle keywords & special characters # , [ { ( < \ # isa,isc, is,has,do, if,then,else, every, ..., with # at, of, with, from, to # begin, end # know, believe, etc. <== NOT currently reserved local line,tok,newline,newtok,newlinetoken local xlineword local tlast,xline,xlinetoken local nocont,cont,icomplete local head,KHRV,cilast local iinfo,iierror static info,ierror initial { info := "INFO: OLDcontinue_token: " ierror := "Internal ERROR: OLDcontinue_token: " } /infd := myin /continue := "yes" KHRV := "("||KFORMAT||HOSEPARATOR||RELSEPARATOR||NVSEPARATOR||")" case KFORMAT of { "list": { } ("ku"|"cu"): { nocont := ';' ++ '>' cont := '\\' ++ '!,&|~' ++ '[{(<' ++ 'ACDEGHIiJPpQRTWYZ' ++ '=' icomplete := OLDcomplete("") # token.icn } ("ho"|"hounit"): { nocont := '>' cont := '<' icomplete := OLDcomplete("html") # token.icn } ("dir"|"nrel"|"nv"): { return linetoken } ("rdf"|"xml"|"html"|"htm"): { nocont := '>' cont := '<' ++ '=' icomplete := OLDcomplete("html") # token.icn } "nt": { nocont := '' cont := '' icomplete := OLDcomplete("") # token.icn } "mcf": { nocont := '' cont := '' icomplete := OLDcomplete("") # token.icn } default: { writes_type(mylog,KFORMAT,ierror||"unexpected KFORMAT") return linetoken } } # end case KFORMAT line := linetoken.line tok := linetoken.token newline := line newtok := copy_list(tok) tlast := tok[-1].ttype | fail cilast := "("||continue||icomplete||tlast||")" head := KHRV||cilast iinfo := info||head iierror := ierror||head if DEBUG == "CONTINUE" then { writes_type(mybug,line,iinfo||"input line") writes_type(mylog,line,iinfo||"input line") } newlinetoken := linetoken if continue=="yes" then { if upto(tlast,nocont) & (icomplete <= 0) then { # no continuation if DEBUG=="CONTINUE" then { writes_type(mybug,line,iinfo||"no continutaion") writes_type(mylog,line,iinfo||"no continutaion") OLDcomplete("_LineComplete_") } } else if upto(tlast,cont) | (icomplete > 0) then { # continue to next line if tlast == "\\" then { # delete \ newtok := newtok[1:-1] newline := newline[1:-1] } if xline := prompt(infd) then { xlineword := get_word(xline) xlinetoken := get_token(xlineword,infd,continue) newline ||:= " "|| xlinetoken.line newtok |||:= copy_list(xlinetoken.token) newlinetoken := TOKEN(newline,newtok) if DEBUG=="CONTINUE" then { writes_type(mybug,newline,iinfo||"next line") writes_type(mylog,newline,iinfo||"next line") } } # end if xline } else { if DEBUG=="CONTINUE" then { writes_type(mybug,line,iinfo||"no continutaion") writes_type(mylog,line,iinfo||"no continutaion") OLDcomplete("_LineComplete_") } } # end if upto } # end if continue if DEBUG=="CONTINUE" then { writes_type(mybug,newlinetoken.line,iinfo||"output line") writes_type(mylog,newlinetoken.line,iinfo||"output line") } return newlinetoken end # TOKEN list procedure map_token(tok) #======================= # phase 2 parse # map ttype for get_symbol() local x,y,word local kinfo static info initial { info := "INFO: map_token: " } kinfo := info||"kformat <"||KFORMAT||"> " #DEBUG := "MAP_TOKEN" if DEBUG==("MAP_TOKEN"|"MAP"|"PARSE"|"END") then { writes_type(mybug,tok,kinfo||"input tok") writes_type(mylog,tok,kinfo||"input tok") } x := [] every y := ! tok do { case y.ttype of { default:{ } "q": { y.ttype := "w" } # squote q => w for get_symbol() "Q": { y.ttype := "w" } # dquote Q => w for get_symbol() "a": { y.ttype := "a" } # angle a => a for get_symbol() "b": { y.ttype := "w" } # paren b => w for get_symbol() "c": { case CommentMode of { default: { next } # delete comment "cyc": { y.ttype := "w" } } } ##### "h": { ##### # HTML command ##### if REMOVEHTML==("yes"|"YES") then { ##### do_html(y) ##### next # delete HTML ##### } # end if REMOVEHTML ##### } # end "h" "B": { case KFORMAT of { default: { next } # delete whitespace #("ho"|"hounit"|"nrel"): { } # keep whitespace for parsing phrase } } "j": { # and,or case KFORMAT of { default: { } # reserved ("ho"|"hounit"|"nrel"): { y.ttype := "w" } # not reserved } } #"h": { y.tvalue := trimws(y.tvalue) } # howord <== obsolete #"r": { y.tvalue := trimws(y.tvalue) } # relword <== obsolete #"d": { y.tvalue := trimws(y.tvalue) } # dirword <== obsolete #"A": { y.ttype := "=" } # assignment,production <== obsolete #"D": { y.ttype := "$" } # $variable <== obsolete ",": { y.ttype := y.tvalue } # list ";": { y.ttype := y.tvalue } # relation,proplist #"=": { y.ttype := y.tvalue } # assignment,production "!": { y.ttype := y.tvalue } # sh command #"~": { y.ttype := y.tvalue } # not <== nonseparator #"?": { y.ttype := y.tvalue } # question <== nonseparator #"/": { y.ttype := y.tvalue } # hierarchy <== nonseparator #"$": { y.ttype := y.tvalue } # $variable <== nonseparator "W": { y.ttype := "w" } # weak separators #":": { y.ttype := y.tvalue } # #"+": { y.ttype := y.tvalue } # #"-": { y.ttype := y.tvalue } # #"*": { y.ttype := y.tvalue } # "S": { # strong separators case y.tvalue of { default: { y.ttype := y.tvalue } } } "w": { # word inlcudes no separators if member(mkr_word,y.tvalue) then { y := group_map_token(y) #case y.tvalue of { #"next": { y.tvalue := ";" } #"non": { y.tvalue := "~" } #"not": { y.tvalue := "~" } #} } else if member(rdf_word,y.tvalue) then { y.ttype := rdf_word[y.tvalue] } else if member(owl_word,y.tvalue) then { y.ttype := owl_word[y.tvalue] } # end if } # end "w" "x": { # xword includes weak separators W if y.tvalue ? { ="isa**" | ="isc**" | ="iss**" | ="isg**" | ="isu**" | ="isp**" | ="genlmt**" | ="specmt**" } then { # "lw" | "l?" y.ttype := "R" #} else if upto("*",y.tvalue) then { # wildcard pattern } else if member(mkr_word,y.tvalue) then { y := group_map_token(y) #case y.tvalue of { #"next": { y.tvalue := ";" } #"non": { y.tvalue := "~" } #"not": { y.tvalue := "~" } #} } else if member(rdf_word,y.tvalue) then { y.ttype := rdf_word[y.tvalue] } else if member(owl_word,y.tvalue) then { y.ttype := owl_word[y.tvalue] } else { y.ttype := "w" # x => w for get_symbol() } # end if } # end "x" } # end case y.ttype # set syntactic nesting group count word := y.tvalue if DEBUG=="COMPLETE" then { writes_type_all(word,info||"NEWcomplete") } case word of { "": { } "begin": { NEWcomplete("_begin_") } "end": { NEWcomplete("_end_") } "if": { NEWcomplete(word) } "fi": { NEWcomplete(word) } "every": { NEWcomplete(word) } "when": { NEWcomplete(word) } "while": { NEWcomplete(word) } "until": { NEWcomplete(word) } "do": { NEWcomplete(word) } "DO": { NEWcomplete(word) } "!": { NEWcomplete(word) } "can": { NEWcomplete(word) } "vdo": { NEWcomplete(word) } "hdo": { NEWcomplete(word) } "ido": { NEWcomplete(word) } "happens":{ NEWcomplete(word) } "done": { NEWcomplete(word) } ("<"|">"):{ NEWcomplete(word) } ("{"|"}"):{ NEWcomplete(word) } ("["|"]"):{ NEWcomplete(word) } ("("|")"):{ NEWcomplete(word) } ";": { NEWcomplete(word) } } # end case y.tvalue # separate leading and trailing "/" in HTML parsing case KFORMAT of { default : { put(x,y) } ("html"|"htm"|"xml"|"rdf"|"owl"): { if y.ttype == "w" then { if y.tvalue[1] == "/" then { y.tvalue := y.tvalue[2:0] put(x,TOKEN("/","/"),y) } else if y.tvalue[-1] == "/" then { y.tvalue := y.tvalue[1:-1] put(x,y,TOKEN("/","/")) } else { put(x,y) } # end if == "/" } else { put(x,y) } # end if == "w" } # end "html"|... } # end case KFORMAT } # end every y if DEBUG==("MAP_TOKEN"|"MAP"|"PARSE"|"END") then { writes_type(mybug,x,kinfo||"output x") writes_type(mylog,x,kinfo||"output x") } return x end # TOKEN procedure group_map_token(y) #=========================== # member(mkr_word,y.tvalue) is true # allow reserved words in hierarchies & relations # => bad idea! everything gets parsed as group_statement local kinfo static info initial { info := "INFO: group_map_token" } kinfo := info||"("||KFORMAT||"): " #DEBUG := "END" if DEBUG==("MAP_TOKEN"|"BEGIN"|"END") then { writes_type(mybug,y,kinfo||"input token") writes_type(mylog,y,kinfo||"input token") } case KFORMAT of { default: { y.ttype := mkr_word[y.tvalue] } ("ho"|"hounit"|"nrel"): { case y.tvalue of { "end": { y.ttype := mkr_word[y.tvalue] } # "Z" "exit": { y.ttype := mkr_word[y.tvalue] } # "z" "break": { y.ttype := mkr_word[y.tvalue] } # "z" "let": { y.ttype := mkr_word[y.tvalue] } # "S" default: { y.ttype := "w" } } } } # end case KFORMAT if DEBUG==("MAP_TOKEN"|"BEGIN"|"END") then { writes_type(mybug,y,kinfo||"output token") writes_type(mylog,y,kinfo||"output token") } return y end # TOKEN list procedure horel_token(tok) #========================= # no special meaning for words in HO and REL local x,y,t,kinfo,kwarning static info,warning initial { info := "INFO: horel_token" warning := "WARNING: horel_token" } kinfo := info||"("||KFORMAT||"): " kwarning := warning||"("||KFORMAT||"): " if DEBUG == ("HO"|"REL"|"NREL") then writes_type_all(tok,kinfo||"input token list") x := [] every t := !tok do { y := copy(t) case y.tvalue of { default: { case y.ttype of { "/": { } "\\": { } # should be "/" default: { y.ttype := "w" } } # end case y.ttype } # exceptions: group "begin": { } "end": { } "exit": { } ";": { } # exceptions: hierarchy "ho": { } "hierarchy": { } "unithierarchy": { } #####"/": { } #####"\\": { } #####"isu": { } #####"iss": { } # exceptions: relation "nrel": { } "relation": { } ",": { } "[": { } "]": { } # exceptions: do read from file done; x has y=z; "do": {} "read": {} "from": {} "done": {} ##"has": {} ##"=": {} } # end case t.tvalue put(x,y) } # end every t if DEBUG == ("HO"|"NREL") then writes_type_all(x,kinfo||"output token list") return x end #