#
# KEHOME/src/token.icn
#
# Oct/27/2005 Dec/27/2006
# Jan/24/2007 treat (...) like quote - parens
# Jan/25/2007 no NONSEP in DollarNameChar
# Feb/3/2007 treat <...> like quote - angles
# Feb/8/2007 xor ~ daml:disjointWith, owl:differentFrom
# Feb/9/2007 new relation syntax
# multi-line dquote
# Feb/18/2007 purge "variable"
# Feb/25/2007 "HAS" "l"
# Mar/5/2007 "isnon"
# Mar/28/2007 "verb" -- change "R" back to "w"
# Apr/1/2007 not_dollar, # not SEPARATOR
# Apr/3/2007 fix bug: NEWcomplete() x has y = <...>;
# Jun/5/2008 owl:Class in owl_word
# Jun/9/2008 "iswith","rel"
# Jul/25/2008 "suspend" (Unicon generator)
# Jul/26/2008 "unithierarchy" (for backwards compatibility)
# Jul/27/2008 ido
# Jul/28/2008 horel_token() - no special meaning for words in HO and REL
# Aug/1/2008 remove HO and NREL context stuff - "W","X"
# Aug/6/2008 popcheck: no warning message for "<>"
# Aug/7/2008 popcheck: no warning message for "()" "[]" "{}"
# Aug/17/2008 RDF,OWL words - change "p" to "R", "C" to "w"
# Aug/17/2008 get_word: look for "/" and "\\"
# Aug/17/2008 horel_token: no exception for "isu" "iss"
# Aug/24/2008 horel_token: exception for "has" "="
# Aug/25/2008 get_word: empty string
# Aug/26/2008 PIPESEPARATOR := "|"
# Aug/31/2008 process "|" like ";" for now
procedure init_parser()
#======================
init_char()
init_word()
init_keyword() # only a minimal set of keywords ?
init_group()
end
global not_dollar
global ParserMode # word|line set by init.icn, symbol.icn
global CommentCharacter # initially '#'
global WordChar # for current word definition
#===================================#
# pattern matching: #
# line -> WORD list -> TOKEN list #
#===================================#
record WORD (
wtype, # character
wvalue # string
)
record TOKEN (
ttype, # character
tvalue # string
)
#=========#
# methods #
#=========#
# get_word(fd,ps,option)
# map_word(word)
# get_token(word)
# map_token(fd,ps,option)
# token_type(x)
# token_value(x)
# token_unparse(x)
# init_parser() # called by interpret_line() in symbol.icn
# init_parser() # called by knit_init() in knit.icn
# init_keyword() # token.icn
# init_command() # command.icn
# init_parameter() # param.icn
# init_quantifier() # token.icn
# init_gtype() # token.icn
# init_variable() # token.icn
# init_char() # char.icn
# declare_word() # token.icn
# m_dquote()
# m_squote()
# m_paren() # treat like quote
# m_angle() # treat like quote
# m_html() # single-line only
# m_comment()
# m_whitespace()
# m_dollarvar()
# m_separator()
# m_assignword()
# m_word()
global XLINE
global SEPARATOR,WhiteSpace,NONSEP
global AssignOp,AssignChar
global WordChar,xWordChar,NumberChar
global DollarNameChar,FileNameChar
global ListSeparator
global mkr_word,HIDDEN
global rdf_word,owl_word,cycl_word
global dmoz_word,tap_word,cyc_word
global CONTEXTword
global KEYWORD,CONTROL,VERB,PREPOSITION,OPERATOR,CONJUNCTION
global GTYPE,xxGROUP,EXGROUP,INGROUP,LATTICE
global QUANTIFIER,QUANT,groupQUANT
global VARIABLE,keVARIABLE,PRONOUN
global PARAMETER
global COMMAND
#----------------------------------------------------#
# string
procedure token_unparse(x,sep)
#=============================
local y,z
/sep := " "
case type(x) of {
"TOKEN": { y := token_value(x) }
"list": { y := unparse(token_value(x),sep) }
}
return y
end
# string or list
procedure token_type(x)
#======================
local y
case type(x) of {
"TOKEN": { y := x.ttype }
"list": {
y := []
every put(y,(!x).ttype)
}
} # end case type
return y
end
# string or list
procedure token_value(x)
#=======================
local y
case type(x) of {
"TOKEN": { y := x.tvalue }
"list": {
y := []
every put(y,(!x).tvalue)
}
} # end case type
return y
end
#===================================================================#
#===================================================================#
# words
# NOTE processing sequence:
# get_word()
# map_word()
# get_token()
# map_token()
# get_symbol()
# map_symbol()
# WORD list
procedure get_word(fd,ps,option)
#===============================
# called by get_token() in token.icn
# called by parse_list() in word.icn
# suspend WORD list for this line
# low level parse
local t,line
local wordlist,word,val
local matchline,mapline
static info,warning,ierror
initial {
info := "INFO: get_word: "
warning := "WARNING: get_word: "
ierror := "Internal ERROR: get_word: "
}
/fd := myin
/ps := "ke$ "
/option := ""
every line := prompt(fd) do {
if DEBUG==("WORD"|"PARSE") then {
writes_type(mybug,line,info||"input line")
writes_type(mylog,line,info||"input line")
}
case t := type(line) of {
default: {
writes_type(myerr,line,ierror||"unexpected type line")
writes_type(mylog,line,ierror||"unexpected type line")
#return []
fail
}
("string"|"integer"): {
if *line = 0 then {
if DEBUG == "NULL" then
writes_type_all(line,warning||"empty string line")
suspend [WORD("w","")]
next
}
}
} # end case t
wordlist := []
matchline := line
mapline := ""
matchline ? {
while word := (
WORD("Q",m_dquote()) |
WORD("q",m_squote()) |
WORD("b",m_paren()) |
## WORD("a",m_angle()) |
## WORD("h",m_htmlword()) |
WORD("c",m_comment()) |
WORD("B",m_whitespace()) |
WORD("$",m_dollarvar()) |
WORD("s",m_separator()) |
WORD("=",m_assignword()) |
WORD("V",m_qualword()) | # not currently a separator
WORD("w",m_word())
) #####& tab(0)
do {
# phase 1: map wtype for get_token()
case word.wtype of {
#####"h": { # html
##### if DEBUG==("HTML"|"HTM"|"XML"|"RDF") then {
##### writes_type(mybug,word,info||"html: word")
##### writes_type(mylog,word,info||"html: word")
##### } # end if DEBUG
##### } # end "h"
("q"|"Q"|"a"|"b"): { # quote & angle & paren
val := word.wvalue
if *val = 1 then {
if DEBUG=="QUOTE" then {
writes_type(mybug,val,warning||"isolated quote mark")
writes_type(mylog,val,warning||"isolated quote mark")
}
word.wtype := "w"
} # end if *val
} # end "q"|"Q"|"a"|"b"
"c": { # comment
case CommentMode of {
default: { word.wtype := "c" }
"cyc": { word.wtype := "w" } # CycL constant '#$name'
}
}
"B": { word.wtype := "B" } # white space
"s": {
# other separators
# <> [,] {;} ()
word.wtype := mkr_word[word.wvalue] # "S"
}
"=": {
case word.wvalue of {
":=": { word.wtype := mkr_word[word.wvalue] } # "V"
"+:=": { word.wtype := mkr_word[word.wvalue] } # "V"
"-:=": { word.wtype := mkr_word[word.wvalue] } # "V"
"*:=": { word.wtype := mkr_word[word.wvalue] } # "V"
}
}
"w": {
# recognize end & exit
case word.wvalue of {
"end": { word.wtype := mkr_word[word.wvalue] }
"exit": { word.wtype := mkr_word[word.wvalue] }
}
# recognize question variable & slash
case word.wvalue[1] of {
default: { }
"?": { word.wtype := mkr_word["?"] }
"/": { word.wtype := mkr_word["/"] }
"\\": { word.wtype := mkr_word["\\"] }
}
word.wvalue := map_word(word.wvalue) # token.icn
##### don't substitute yet
#####if member(mkr_word,word.wvalue) then
##### word.wtype := mkr_word[word.wvalue]
}
} # end case word.wtype
if DEBUG == "WORD" then
writes_type_all(word,info||"word")
put(wordlist,word)
mapline ||:= word.wvalue
} # end do
} # end line ?
if DEBUG==("WORD"|"PARSE") then {
writes_type(mybug,wordlist,info||"output wordlist")
writes_type(mylog,wordlist,info||"output wordlist")
}
if mapline==line then { } else {
if DEBUG=="MAP_WORD" then {
writes_type(mybug,mapline,warning||"map_word change: mapline")
writes_type(mylog,mapline,warning||"map_word change: mapline")
} # end if DEBUG
} # end if mapline
case ParserMode of {
default: {
writes_type(myerr,ParserMode,ierror||"unexpected ParserMode")
writes_type(mylog,ParserMode,ierror||"unexpected ParserMode")
fail
}
"word": { every word := !wordlist do suspend [word] }
"line": { suspend wordlist }
}
} # end every line
end
# string
procedure map_word(word)
#=======================
local newword,junk
static info
initial {
info := "INFO: map_word: "
junk := dc2mkr("dc:title")
junk := rdf2mkr("rdfs:Class")
junk := owl2mkr("owl:Thing")
junk := mcf2mkr("Node")
junk := xml2mkr("&owl;")
}
case KMAPWORD of {
default: { return word }
("NO"|"no"): { return word }
("YES"|"yes"): { }
}
case UniqueName of {
default: { }
"shortname": { }
"classname": { }
"rootname": { }
"qualname": { return word }
} # end case UniqueName
newword := word
case word[1] of {
"/": { newword := word[2:0] }
}
if member(xml_word,newword) then {
newword := xml2mkr(newword) # xml.icn
} else if member(words_dc,newword) then {
newword := dc2mkr(newword) # xml.icn
} else if member(words_rdf,newword) then {
newword := rdf2mkr(newword) # xml.icn
} else if member(words_owl,newword) then {
newword := owl2mkr(newword) # xml.icn
} else if member(words_mcf,newword) then {
newword := mcf2mkr(newword) # xml.icn
}
case word[1] of {
"/": { newword := "/"||newword }
}
#if DEBUG=={"MAP"|"MAPWORD") then
if newword ~== word then
writes_all([TypeComment||info||"word <",word,"> => newword <",newword,">"])
#}
return newword
end
# string
procedure m_whitespace()
#=======================
# low level parse get_word()
# NOTE: get_word() sets wtype := "B"
# NOTE: get_token() sets ttype := "B"
# NOTE: map_token() deletes ttype "B"
initial {
WhiteSpace := ' \t\v\r\n\f'
}
suspend \
tab(many(WhiteSpace))
end
# string
procedure m_separator()
#======================
# low level parse get_word()
# NOTE: get_word() sets wtype := "S" or individual character
# NOTE: get_token() sets ttype := "S" or individual character
# NOTE: map_token() changes ttype from "S" to individual character
initial {
/SEPARATOR := "\"\'" ++ '<>' ++ '[,]' ++ '{;}' ++ '()' ++ '|'
}
suspend \
tab(any(SEPARATOR))
end
###### string
#####procedure m_end()
######================
#####suspend \
##### ="end"
#####end
# string
procedure m_assignword()
#=======================
# low level parse get_word()
# NOTE: get_word() sets wtype := "="
# NOTE: get_token() sets ttype := "="
# NOTE: map_token() leaves ttype := "="
initial {
/AssignOp := '='
/AssignChar := '+-*' ++ '&|~' ++ ':'
}
suspend \
=AssignOp |
tab(many(AssignChar)) || =AssignOp
end
# string
procedure m_qualword()
#=====================
# Note: I may change ":" to separator
# low level parse get_word()
# NOTE: get_word() sets wtype := "V"
# NOTE: get_token() sets ttype := "V"
# NOTE: map_token() leaves ttype := "V"
static QualOp,QualChar
initial {
/QualOp := ':'
/QualChar := ':'
}
suspend \
#####=QualOp |
#####tab(many(QualChar)) || =QualOp
="::"
end
# string
procedure m_word()
#=================
# low level parse
# NOTE: get_word() sets wtype := "w"
# NOTE: get_token() sets ttype := "w"
# NOTE: map_token() leaves ttype := "w"
suspend \
tab(many(WordChar))
end
#===================================================================#
#===================================================================#
# extended words
# string
procedure m_xword()
#==================
# intermediate level parse
# words & weak separators
# NOTE: m_xword() includes "w", "q","Q", "+","-","*",":"
# NOTE: get_token() sets ttype := "x"
# NOTE: map_token() changes ttype from "x" to "w"
suspend \
tab(many(xWordChar))
end
# string
procedure m_dollarvar()
#======================
# dollar variable
# low level parse get_word()
suspend \
="$"||tab(many(DollarNameChar)) |
="${"||tab(many(DollarNameChar))||="}"
end
# string
procedure m_filename()
#=====================
# high level parse
# legal file name
suspend \
tab(many(FileNameChar))
end
# string
procedure m_dotvar()
#===================
# dot variable (pronouns)
suspend \
="..." |
=".." |
="."
end
# string
procedure m_number()
#===================
# must begin with a digit
suspend \
( tab(any(&digits)) ) |
( tab(any(&digits)) || tab(many(NumberChar)) )
end
# string
procedure m_dquote()
#===================
# low level parse
# match shortest string "..."
# include empty string ""
# NOTE: get_word() sets wtype := "Q"
# NOTE: get_token() includes "Q" in ttype "x"
# NOTE: map_token() changes ttype from "x" to "w"
# NOTE: multi-line dquote -- see m_quotemark() in myio.icn
static dquote,QChar
initial {
dquote := "\""
QChar := &cset -- dquote
}
suspend \
#####( =dquote || tab(upto(dquote)\1) || =dquote ) |
( m_quotemark() || tab(many(QChar)) || m_quotemark() ) |
( =dquote || =dquote ) | # empty dquote
( =dquote ) # isolated dquote
end
# string
procedure m_squote()
#===================
# low level parse
# match shortest string '...'
# include empty string ''
# NOTE: apostrophe has no matching squote
# NOTE: get_word() sets wtype := "q"
# NOTE: get_token() includes "q" in ttype "x"
# NOTE: map_token() changes ttype from "x" to "w"
static squote,qChar
initial {
squote := "\'"
qChar := &cset -- squote
}
suspend \
#####( =squote || tab(upto(squote)\1) || =squote ) |
( =squote || tab(many(qChar)) || =squote ) |
( =squote || =squote ) | # empty squote
( =squote ) # apostrophe
end
# string
procedure m_paren()
#==================
# low level parse
# match shortest string (...)
# include empty string ()
# NOTE: apostrophe has no matching squote
# NOTE: get_word() sets wtype := "b"
# NOTE: get_token() includes "b" in ttype "x"
# NOTE: map_token() changes ttype from "x" to "w"
static lparen,rparen,qChar
initial {
lparen := "("
rparen := ")"
qChar := &cset -- rparen
}
suspend \
#####( =lparen || tab(upto(rparen)\1) || =rparen ) |
( =lparen || tab(many(qChar)) || =rparen ) |
( =lparen || =rparen ) # empty squote
end
# string
procedure m_angle()
#==================
# low level parse
# match shortest string <...>
# include empty string ()
# NOTE: apostrophe has no matching squote
# NOTE: get_word() sets wtype := "a"
# NOTE: get_token() includes "a" in ttype "x"
# NOTE: map_token() changes ttype from "x" to "a"
static langle,rangle,qChar
initial {
langle := "<"
rangle := ">"
qChar := &cset -- rangle
}
suspend \
#####( =langle || tab(upto(rangle)\1) || =rangle ) |
( =langle || tab(many(qChar)) || =rangle ) |
( =langle || =rangle ) # empty squote
end
# string
procedure m_comment(sharp)
#=========================
# low level parse
# comment ::= CommentCharacter not_dollar arb EndOfLine
# caution: cyc uses "#$" prefix for constant
# NOTE: get_word() sets wtype := "c" or "w"
# NOTE: get_token() sets ttype := "c" or "w"
# NOTE: map_token() deletes ttype "c"
#
# NOTE: some comments removed by prompt() in myio.icn
# see trimcomment() in word.icn
initial {
/CommentCharacter := '#'
/CommentMode := "mke"
/not_dollar := &cset -- '$'
}
##/sharp := CommentCharacter
sharp := "#"
#####case CommentMode of {
#####default: {
suspend \
#####( =sharp ) |
( =sharp || tab(any(not_dollar)) || tab(0) )
##### }
#####"cyc": { # '#$name'
##### suspend \
##### ( =sharp || tab(many(WordChar)) ) |
##### ( =sharp )
##### }
#####}
end
# string
procedure m_nvsep()
#==================
# assignment
# for get_token()
suspend \
="+=" |
="-=" |
="*=" |
="="
end
# string
procedure m_sphrase()
#====================
# NOTE: symbol.icn now uses m_lphrase instead of m_sphrase
# intermediate level parse (whitespace is now "B")
# consecutive words excluding MKR words (is,has,do, ...)
# use word or qword ???
# NOTE: sphrase isa string (token.icn)
# lphrase isa list (symbol.icn)
# NOTE: pword defined in symbol.icn (excludes is,has,do,...)
# NOTE: blank removed by map_symbol() in symbol.icn
# sphrase ::=
# pword
# pword whitespace sphrase
suspend \
( m_qword() ) |
( m_qword() || ="B" || m_sphrase() )
end
# string
procedure Blank(s)
#=================
# "replace" string by whitespace
return "B"
end
#-----------------------------------------------#
#-----------------------------------------------#
#-----------------------------------------------#
procedure init_word()
#====================
# called by knit_init() in knit.icn
# define global parsing variables
# SEPARATOR,WhiteSpace, WEAKSEP,NONSEP
# WordChar,xWordChar,NumberChar, FileNameChar
# mkr_word,HIDDEN
# rdf_word,owl_word,cycl_word
# dmoz_word,tap_word,cyc_word
# BinaryRelation
# KEYWORD,CONTROL,VERB,PREPOSITION,CONJUNCTION,OPERATOR
# QUANTIFIER,QUANT,groupQUANT
# GTYPE,xxGROUP,EXGROUP,INGROUP
# VARIABLE,keVARIABLE,PRONOUN
# PARAMETER,COMMAND
# special_ctype,special_charname
# legal_utype,legal_chartype
local sep,ws,word,param,cmd
initial {
CommentCharacter := '#'
}
NONSEP :=
'/' ++ # hierarchy,filename (/)
'\\' ++ # hierarchy,filename (/)
'~' ++ # logic,filename (~)
'&' ++ # logic (&)
'|' ++ # logic (|)
'?' ++ # question (?)
'$' ++ # dollar variable ($)
'.' ++ # dot variable,number (.)
'!' ++ # sh command (!)
'+' ++ # set add (+)
'-' ++ # set delete (-)
'*' ++ # wildcard,set intersection (*)
':' # production,format,view (:)
SEPARATOR :=
###'$' ++ # dollar variable ($)
"\"" ++ # dquote (Q)
"\'" ++ # squote (q)
###CommentCharacter ++ # comment (c)
'<>' ++ # HTML command (S)
'[,]' ++ # comma list (S)
'{;}' ++ # semicolon list (S)
'()' ++ # precedence (q)
'|' # pipeline (|)
AssignOp :=
'=' # assignment (=)
WhiteSpace :=
' ' ++ # blank (B)
'\t' ++ # tab (B)
'\v' ++ # vertical tab (B)
'\r' ++ # linereturn (B)
'\n' ++ # newline (B)
'\f' # newpage (B)
# low level parse
WordChar := &cset -- WhiteSpace -- SEPARATOR -- AssignOp
DollarNameChar := WordChar -- NONSEP
AssignChar := '+-*' ++ '&|~' ++ ':'
# intermediate level parse
xWordChar := 'w' ++ 'qQ'
# high level parse
ListSeparator := ","
FileNameChar := &letters ++ &digits ++ '/\\:.~ '
NumberChar := '.' ++ &digits
#WordChar := &cset -- WhiteSpace -- '[,];' -- "=" -- '{}' --'<>' -- '()'
#-----------------------------------------------------#
#-----------------------------------------------------#
# token types
mkr_word := table()
rdf_word := table()
owl_word := table()
cycl_word := table()
dmoz_word := table()
tap_word := table()
cyc_word := table()
BinaryRelation := table() # see binrel.icn
#====================#
# OpenCyc vocabulary #
#====================#
# Classes
insert(cyc_word,"Thing","w") # existent
insert(mkr_word,"Thing","w") # existent
# Properties
insert(cyc_word,"#$genlMt","R") # Mt1 #$genlMt Mt2
insert(cyc_word,"#$specMt","R") # Mt1 #$specMt Mt2
insert(mkr_word,"genlmt","R") # Mt1 #$genlMt Mt2
insert(mkr_word,"specmt","R") # Mt1 #$specMt Mt2
insert(mkr_word,"genlmt*","R") # Mt1 #$genlMt Mt2
insert(mkr_word,"specmt*","R") # Mt1 #$specMt Mt2
insert(mkr_word,"genlmt**","l") # Mt1 #$genlMt Mt2
insert(mkr_word,"specmt**","l") # Mt1 #$specMt Mt2
insert(mkr_word,"genlmt**?","?") # Mt1 #$genlMt Mt2
insert(mkr_word,"specmt**?","?") # Mt1 #$specMt Mt2
#====================#
# RDF/OWL vocabulary #
#====================#
# Classes
insert(owl_word,"Thing","w") # existent
insert(owl_word,"Nothing","w") # nonexistent
insert(owl_word,"owl:Thing","w") # existent
insert(owl_word,"owl:Nothing","w") # nonexistent
insert(owl_word,"owl:Class","w") # set of concepts
insert(owl_word,"owl:DatatypeProperty","w") # attribute
insert(owl_word,"owl:ObjectProperty","w") # relation part interaction
insert(owl_word,"owl:Ontology","w") # view
insert(owl_word,"owl:Restriction","w") # differentia
insert(owl_word,"owl:FunctionalProperty","w") # subcharacteristic
insert(owl_word,"owl:InverseFunctionalProperty","w") # subcharacteristic
insert(owl_word,"owl:SymmetricProperty","w") # subcharacteristic
insert(owl_word,"owl:TransitiveProperty","w") # subcharacteristic
insert(owl_word,"owl:AllDifferent","w") #
insert(owl_word,"owl:DeprecatedClass","w") #
insert(owl_word,"owl:DeprecatedProperty","w") #
# Properties
insert(owl_word,"owl:sameAs","i") # is
insert(owl_word,"owl:sameIndividualAs","i") # is
insert(owl_word,"owl:equivalentClass","R") # maybe is
insert(owl_word,"owl:equivalentProperty","R") # maybe is
insert(owl_word,"owl:oneOf","R") # isany
insert(owl_word,"owl:unionOf","R") # isunion
insert(owl_word,"owl:intersectionOf","R") # isintersection
insert(owl_word,"owl:complementOf","R") # is not
insert(owl_word,"owl:inverseOf","R") # inverse
insert(owl_word,"owl:onProperty","w") #
insert(owl_word,"owl:cardinality","w") #
insert(owl_word,"owl:maxCardinality","w") #
insert(owl_word,"owl:minCardinality","w") #
insert(owl_word,"owl:allValuesFrom","w") #
insert(owl_word,"owl:someValuesFrom","w") #
insert(owl_word,"owl:differentFrom","R") #
insert(owl_word,"owl:disjointWith","R") #
insert(owl_word,"owl:distinctMembers","R") #
insert(owl_word,"owl:backwardCompatibleWith","R") #
insert(owl_word,"owl:incompatibleWith","R") #
insert(owl_word,"owl:priorVersion","w") #
insert(rdf_word,"rdfs:Class","w") # set of concepts
insert(rdf_word,"rdfs:Resource","w") # existent
insert(rdf_word,"rdf:Property","w") # characteristic
insert(rdf_word,"rdf:type","R") # has type= <=> isu
insert(rdf_word,"rdfs:subClassOf","R") # iss*
insert(rdf_word,"rdfs:subPropertyOf","R") # iss*
insert(rdf_word,"rdfs:domain","w") #
insert(rdf_word,"rdfs:range","w") #
#================#
# non separators #
#================#
insert(mkr_word,"?","?") # question
insert(mkr_word,"/","/") # hierarchy,filename,HTML
insert(mkr_word,"/","/") # hierarchy,filename,HTML
#insert(mkr_word,"/","S") # hierarchy,filename,HTML (strong separator)
insert(mkr_word,"\\","/") # hierarchy,filename,HTML
insert(mkr_word,"$","$") # dollar variable
#insert(mkr_word,".",".") # dot variable,number
# assign words
insert(mkr_word,":=","V") # production
insert(mkr_word,"+:=","V") # production
insert(mkr_word,"-:=","V") # production
insert(mkr_word,"*:=","V") # production
insert(mkr_word,"::","V") # named proposition
insert(mkr_word,"=","=") # assignment,production
insert(mkr_word,"+","+") # assignment,word
insert(mkr_word,"-","-") # assignment,word
insert(mkr_word,"*","*") # assignment,word, wildcard
insert(mkr_word,":",":") # assignment,production, format, view
insert(mkr_word,"*","*") # assignment,wildcard
insert(mkr_word,"!","D") # sh command
insert(mkr_word,"~","~") # assignment,not
insert(mkr_word,"&","&") # assignment,and
#============#
# separators #
#============#
# quote, comment
#insert(mkr_word,"\'","q") # squote for get_word()
#insert(mkr_word,"\"","Q") # dquote for get_word()
#insert(mkr_word,"(","b") # paren for get_word()
#insert(mkr_word,")","b") # paren for get_word()
#insert(mkr_word,"<","a") # angle for get_word()
#insert(mkr_word,">","a") # angle for get_word()
#insert(mkr_word,CommentCharacter,"c") # comment for get_word()
# strong separators
insert(mkr_word,",","S") # list of phrase
insert(mkr_word,";","S") # list of proposition
insert(mkr_word,"[","S") # list of phrase
insert(mkr_word,"]","S") # list of phrase
###insert(mkr_word,"[","[") # list of phrase
###insert(mkr_word,"]","]") # list of phrase
insert(mkr_word,"{","S") # list of proposition
insert(mkr_word,"}","S") # list of proposition
insert(mkr_word,"(","S") # precedence & quote
insert(mkr_word,")","S") # precedence & quote
insert(mkr_word,"<","S") # HTML & multi-line quote
insert(mkr_word,">","S") # HTML & multi-line quote
#============#
# whitespace #
#============#
insert(mkr_word," ","B") # blank
insert(mkr_word,"\t","B") # tab
#insert(mkr_word,"\v","B") # vertical tab
insert(mkr_word,"\r","B") # linereturn
insert(mkr_word,"\n","B") # newline
insert(mkr_word,"\f","B") # newpage
#=======#
# words #
#=======#
# generic names for grammar examples
insert(mkr_word,"proposition","w")
insert(mkr_word,"production","w")
insert(mkr_word,"sentence","w")
insert(mkr_word,"statement","w")
insert(mkr_word,"question","w")
insert(mkr_word,"command","w") # do command od arg done;
insert(mkr_word,"arg","w") # do command od arg done;
####insert(mkr_word,"relverb","B") # subject relverb object;
insert(mkr_word,"subject","w")
insert(mkr_word,"verb","w")
insert(mkr_word,"object","w")
insert(mkr_word,"preposition","P")
# action
insert(mkr_word,"do","D") # action
insert(mkr_word,"ido","D") # interaction
insert(mkr_word,"DO","D") # axiomatic level
insert(mkr_word,"!","D") # UNIX shell
insert(mkr_word,"can","D") # capability
###insert(mkr_word,"do*","D") # capability
insert(mkr_word,"vdo","D") # all views
insert(mkr_word,"hdo","D") # hierarchy
insert(mkr_word,"done","d")
# preposition
insert(mkr_word,"at","A") # context ==> use "A"
#insert(mkr_word,"at","P") # context ==> use "A"
insert(mkr_word,"out","P") # product
insert(mkr_word,"of","P") # domain (part)
insert(mkr_word,"with","P") # modifier (also definition)
insert(mkr_word,"od","P") # direct object
insert(mkr_word,"from","P") # initial
insert(mkr_word,"to","P") # final
insert(mkr_word,"in","P") # index
# hierarchy
insert(mkr_word,"is","i") # alias (also definition)
insert(mkr_word,"isu","i") # unit isu genus (also definition)
insert(mkr_word,"iss","i") # species iss genus (also definition)
insert(mkr_word,"isa","i") # unit|species isa genus (also definition)
insert(mkr_word,"isp","R") # genus isp unit
insert(mkr_word,"isg","R") # genus isg species
insert(mkr_word,"isc","R") # genus isc unit|species
insert(mkr_word,"isa+","R") # 1 or more levels
insert(mkr_word,"isc+","R") # 1 or more levels
insert(mkr_word,"is*","i") # 0 or more levels
insert(mkr_word,"isa*","R") # 0 or more levels
insert(mkr_word,"isc*","R") # 0 or more levels
insert(mkr_word,"iss*","R") # 0 or more levels
insert(mkr_word,"isg*","R") # 0 or more levels
insert(mkr_word,"isu*","R") # 0 or more levels
insert(mkr_word,"isp*","R") # 0 or more levels
insert(mkr_word,"isa**","l") # followed by integer
insert(mkr_word,"isc**","l") # followed by integer
insert(mkr_word,"isa**?","?") # question verb
insert(mkr_word,"isc**?","?") # question verb
# attribute
insert(mkr_word,"iswith","R") # differentia
insert(mkr_word,"has","H")
insert(mkr_word,"HAS","l")
# part
insert(mkr_word,"haspart","H")
insert(mkr_word,"isapart","H")
insert(mkr_word,"haspart*","H")
insert(mkr_word,"isapart*","H")
# relation
insert(mkr_word,"rel","L")
insert(mkr_word,"nrel","w")
insert(mkr_word,"trel","L")
insert(mkr_word,"brel","L")
insert(mkr_word,"urel","L")
insert(mkr_word,"isin","R")
# assignment
insert(mkr_word,"let","S")
insert(mkr_word,"vlet","S")
insert(mkr_word,"unlet","S")
# NSM concepts
insert(mkr_word,"causes","B") # cause-effect
insert(mkr_word,"because","B") # NSM effect-cause
insert(mkr_word,"like","R") # NSM similarity
insert(mkr_word,"happens","D") # NSM happen
insert(mkr_word,"happensod","B")# NSM happen
insert(mkr_word,"before","B") # NSM time
insert(mkr_word,"after","B") # NSM time
insert(mkr_word,"above","B") # NSM space
insert(mkr_word,"below","B") # NSM space
insert(mkr_word,"beside","B") # NSM space
insert(mkr_word,"inside","B") # NSM space
insert(mkr_word,"outside","B") # NSM space
insert(mkr_word,"causes*","B") # cause-effect
insert(mkr_word,"because*","B") # effect-cause
insert(mkr_word,"like*","R") # NSM similarity
insert(mkr_word,"happensod*","B")# NSM happen
insert(mkr_word,"before*","B") # time
insert(mkr_word,"after*","B") # time
insert(mkr_word,"above*","B") # space
insert(mkr_word,"below*","B") # space
insert(mkr_word,"beside*","B") # space
insert(mkr_word,"inside*","B") # space
insert(mkr_word,"outside*","B") # space
# NSM view
#insert(mkr_word,"maybe","w") # view
# generator verb
insert(mkr_word,"in","P") # x in concept list
# exgroup, ingroup
insert(mkr_word,"isalt","R") # alternative isalt exgroup
insert(mkr_word,"isany","R") # exgroup isany alternative
insert(mkr_word,"isall","R") # ingroup isall member
insert(mkr_word,"ismem","R") # member ismem ingroup
# for OWL
insert(mkr_word,"isand","R") # intersection (requisite)
insert(mkr_word,"ismem","R") # intersection (requisite)
insert(mkr_word,"isor","R") # union
insert(mkr_word,"isxor","R") # disjoint union
insert(mkr_word,"xor","R") # differentFrom
insert(mkr_word,"ismem","R") # union
insert(mkr_word,"isand","R") # restriction ???
insert(mkr_word,"ismem","R") # restriction ???
insert(mkr_word,"isnon","R") # complement wrt genus
insert(mkr_word,"isnot","R") # complement wrt existent
insert(mkr_word,"inverse","R") # inverse
insert(mkr_word,"isalt*","R") # alternative isalt* exgroup
insert(mkr_word,"isany*","R") # exgroup isany* alternative
insert(mkr_word,"isall*","R") # ingroup isall* member
insert(mkr_word,"ismem*","R") # member ismem* ingroup
# concept formation
insert(mkr_word,"isd","i") # differentiate
insert(mkr_word,"isi","i") # integrate
insert(mkr_word,"means","R")
insert(mkr_word,"isref","R") # is referent of
insert(mkr_word,"means*","R")
insert(mkr_word,"isref*","R")
# groups
insert(mkr_word,"begin","Y") # group definition
insert(mkr_word,"end","Z") # group definition
insert(mkr_word,"hierarchy","h") # hierarchy,lattice
insert(mkr_word,"unithierarchy","h") # unithierarchy (backwards compatibility)
insert(mkr_word,"relation","r") # relation
insert(mkr_word,"group","w") # group,triple,mcf,...
# control structure
insert(mkr_word,"exit","z") # exit KE
insert(mkr_word,"break","z") # exit every,while,until,when
insert(mkr_word,"return","S") # return Product=value;
insert(mkr_word,"suspend","S") # suspend Product=value; (Unicon generator)
insert(mkr_word,"if","I") # conditional
insert(mkr_word,"then","T") # conditional
insert(mkr_word,"else","E") # conditional
insert(mkr_word,"fi","F") # conditional
insert(mkr_word,"every","G") # iteration
insert(mkr_word,"while","I") # iteration
insert(mkr_word,"until","I") # iteration
insert(mkr_word,"when","I") # monitor events
insert(mkr_word,"iff","J") # conjunction
insert(mkr_word,"|","|") # conjunction: pipeline - treat like ";"
# other words
# NSM words
insert(mkr_word,"good","w") # NSM evaluator
insert(mkr_word,"bad","w") # NSM evaluator
insert(mkr_word,"big","w") # NSM descriptor
insert(mkr_word,"small","w") # NSM descriptor
insert(mkr_word,"very","Q") # NSM intensifier
insert(mkr_word,"more","Q") # NSM augmentor
insert(mkr_word,"near","w") # NSM space distance
insert(mkr_word,"far","w") # NSM space distance
insert(mkr_word,"long","w") # NSM time duration
insert(mkr_word,"short","w") # NSM time duration
#insert(mkr_word,"some","Q") # NSM time duration
insert(mkr_word,"and","j") # logic
insert(mkr_word,"or","j") # logic
insert(mkr_word,"not","N") # complement wrt existent
insert(mkr_word,"non","N") # complement wrt genus
insert(mkr_word,"a","Q") # quantifier
insert(mkr_word,"all","Q") # ingroup quantifier
insert(mkr_word,"any","Q") # exgroup quantifier
insert(mkr_word,"either","Q") # exgroup quantifier
insert(mkr_word,"no","Q") # quantifier
insert(mkr_word,"some","Q") # quantifier
insert(mkr_word,"the","Q") # quantifier
##insert(mkr_word,"exists","G") # KIF first order logic
insert(mkr_word,"forall","G") # KIF first order logic
insert(mkr_word,"forany","G") # MKR first order logic
insert(mkr_word,"forSome","G") # OWL first order logic
insert(mkr_word,"forAll","G") # OWL first order logic
insert(mkr_word,"implies","J") # logic
#####insert(mkr_word,"|-","J") # implies
#insert(mkr_word,"delete","w") # do delete ... done
# iQ,iG words for reparsing compound statements
#insert(mkr_word,"iseither","R")
#insert(mkr_word,"isthe","R")
# ig,ih,ir words for exgroup and ingroup
#insert(mkr_word,"isconcept","R")
#insert(mkr_word,"isset","R")
#insert(mkr_word,"islist","R")
#insert(mkr_word,"issequence","R")
#insert(mkr_word,"isrelation","R")
#insert(mkr_word,"ishierarchy","R")
#insert(mkr_word,"islattice","R")
#insert(mkr_word,"isdirectory","R")
#insert(mkr_word,"isconcept","R")
# ad-hoc additions for parsing English phrases
#=============================================
insert(mkr_word,"for","j") # conjunction
insert(mkr_word,"vs.","j") # conjunction
# end mkr_word
#-----------------------------------------------------#
#-----------------------------------------------------#
init_keyword() # token.icn
init_gtype() # token.icn
init_quantifier() # token.icn
init_variable() # token.icn
init_parameter() # param.icn
init_command() # command.icn
init_char() # char.icn
# ignore hidden words when dumping concepts
every sep := string(!SEPARATOR) do
insert(HIDDEN,sep)
delete(HIDDEN,".")
every ws := !WhiteSpace do
insert(HIDDEN,ws)
every word := key(mkr_word) do
insert(HIDDEN,word)
delete(HIDDEN,"let")
delete(HIDDEN,"vlet")
delete(HIDDEN,"unlet")
every param := !PARAMETER do
insert(HIDDEN,param)
every cmd := ! COMMAND do
insert(HIDDEN,cmd)
# mental actions - know,believe,...
# action object is (may be) proposition list
# kaction is action with ktype=kt
# declared by tabrasa.def, user
kaction_set := set()
# identification actions # <== no longer used
identify_set := set([
"identify","perceive","classify","measure","define",
"see","hear","touch","smell","taste"
])
# initialize rdf map
#####rdf2mkr("rdf:type")
end
procedure declare_word()
#=======================
# declare special words
# called by initialize_knit() in knit.icn
add_separator() # token.icn
add_keyword() # token.icn
add_quantifier() # token.icn
add_gtype() # token.icn
add_variable() # token.icn
add_parameter() # param.icn
add_command() # command.icn
end
#===================================================================#
#===================================================================#
# KFORMAT == "ku" #
#===================================================================#
###### TOKEN
#####procedure m_endtoken()
######=====================
###### block end for hierarchy|relation|directory
#####suspend \
##### TOKEN("Z",="end") # begin end
#####end
# TOKEN list
procedure m_kulist()
#===================
# kulist ::=
# kutoken
# kutoken kulist
suspend [m_kutoken()] |
[m_kutoken()] ||| m_kulist()
end
# TOKEN
procedure m_kutoken()
#====================
# intermediate level parse
# NOTE: "q","Q","w" included in xword ("$" ???)
# NOTE: map_token() changes "x" to "w"
# NOTE: map_token() deletes comment "c"
# NOTE: map_token() deletes whitespace "B"
suspend \
TOKEN("?", ( ="?" )) | # question variable
TOKEN("/", m_slash()) | # hierarchy, HTML end
#TOKEN("$", m_dollarvar()) | # dollar variable
#TOKEN("$", ( ="$w" )) | # dollar variable
#TOKEN("$", ( ="${w}" )) | # dollar variable
#TOKEN("$", ( ="$R" )) | # dollar variable
TOKEN("$", ( ="$" )) | # dollar variable
#TOKEN("n", m_number()) | # integer or real
#TOKEN(".", m_dotvar()) | # dot pronoun (included in $variable)
#TOKEN(".", ( ="..." )) | # dot pronoun (included in $variable)
#TOKEN(".", ( =".." )) | # dot pronoun (included in $variable)
#TOKEN(".", ( ="." )) | # dot pronoun (included in $variable)
###TOKEN("h", ( ="h" )) | # HTML command
TOKEN("c", ( ="c" )) | # comment #...
TOKEN("B", ( ="B" )) | # whitespace
TOKEN(",", ( ="," )) | # list
TOKEN(";", ( =";" )) | # list
TOKEN("!", ( ="!" )) | # sh command
TOKEN("S", ( ="S" )) | # strong separators
###TOKEN("[", ( ="[" )) | # strong separators
###TOKEN("]", ( ="]" )) | # strong separators
TOKEN("<", ( ="<" )) | # strong separators
TOKEN(">", ( =">" )) | # strong separators
TOKEN("|", ( ="|" )) | # strong separators
#TOKEN("=", m_nvsep()) | # assignment
TOKEN("V", ( ="V" )) | # production :=
TOKEN("=", ( ="::=" )) | # BNF production ::=
TOKEN("=", ( ="+=" )) | # assignment +=
TOKEN("=", ( ="-=" )) | # assignment -=
TOKEN("=", ( ="*=" )) | # assignment *=
TOKEN("=", ( ="=" )) | # assignment =
TOKEN("x", m_xword()) | # (wqQ)
TOKEN("x", ( ="+" )) | # weak separator
TOKEN("x", ( ="-" )) | # weak separator
TOKEN("x", ( ="*" )) | # weak separator
TOKEN("x", ( =":" )) | # weak separator
TOKEN("q", ( ="q" )) | # squote '...'
TOKEN("Q", ( ="Q" )) | # dquote "..."
TOKEN("x", ( ="b" )) | # paren (...)
TOKEN("x", ( ="a" )) | # angle <...>
TOKEN("Y", ( ="Y" )) | # begin group
TOKEN("Z", ( ="Z" )) | # end group
TOKEN("z", ( ="z" )) | # exit MKE
TOKEN("x", ( ="w" )) | # word
TOKEN("w", ( ="" )) | # empty string
TOKEN("U", m_byte()) # anything else is unknown
end
#===================================================================#
#===================================================================#
# KFORMAT == "ho" | "hounit"
#===================================================================#
# TOKEN list
procedure m_holist()
#===================
# holist ::=
# hotoken
# hotoken holist
suspend \
[m_hotoken()] |
[m_hotoken()] ||| m_holist()
end
# TOKEN
procedure m_hotoken(sep)
#=======================
# hotoken ::=
# endtoken <<== obsolete -- in kutoken
# holevel <<== obsolete -- do in symbol.icn
# hophrase <<== obsolete -- do in symbol.icn
# kutoken
/sep := dequote(HOSEPARATOR)
suspend \
#TOKEN("Z",="end") |
#TOKEN("/",m_holevel(sep)) |
#TOKEN("h",m_hophrase(sep)) |
m_kutoken()
end
#===================================================================#
#===================================================================#
# KFORMAT == "dir"
#===================================================================#
# TOKEN list
procedure m_dirlist()
#====================
# dirlist ::=
# dirtoken
# dirtoken dirlist
suspend \
[m_dirtoken()] |
[m_dirtoken()] ||| m_dirlist()
end
# TOKEN
procedure m_dirtoken(sep)
#=======================
# dirtoken ::=
# endtoken
# dirword
# kutoken
suspend \
TOKEN("Z",="end") |
TOKEN("d",m_dirword()) |
m_kutoken()
end
# string
procedure m_dirword()
#===================#
static wordchar
initial {
wordchar := &cset
wordchar --:= '/' # no hierarchy separators
wordchar --:= '!=' # allow commands & assignments
}
suspend \
tab(many(wordchar))
end
#===================================================================#
#===================================================================#
# KFORMAT == "nrel"
#===================================================================#
# TOKEN list
procedure m_rellist()
#====================
# rellist ::=
# reltoken
# reltoken rellist
suspend [m_reltoken()] |
[m_reltoken()] ||| m_rellist()
end
# TOKEN
procedure m_reltoken()
#=====================
# reltoken ::=
# endtoken <<== obsolete -- in kutoken
# relsep <<== obsolete -- in kutoken
# relphrase <<== obsolete -- do in symbol.icn
# kutoken
suspend \
#TOKEN("Z",="end") |
#TOKEN(";",m_relsep()) |
#TOKEN("r",m_relphrase()) |
m_kutoken()
end
# string
procedure m_relsep(sep)
#======================
/sep := dequote(RELSEPARATOR)
suspend \
=sep
end
#===================================================================#
#===================================================================#
# KFORMAT == "nv"
# now same rules as "ku"
#===================================================================#
#===================================================================#
#===================================================================#
#===================================================================#
procedure init_keyword()
#=======================
CONTROL:= set([
"begin",
"end",
"exit",
"break",
"every","done",
"while","until",
"when",
"forall","exists",
"if","then","else","fi"
])
VERB := set([
"inverse",
# for CycL
"genlmt","genlmt+","genlmt*","genlmt**","genlmt**?",
"specmt","specmt+","specmt*","specmt**","specmt**?",
# for OWL
"isand", # intersection
"isor", # union
"isxor", # disjoint union
"xor", # disjoint
# for NSM
"before","before*",
"after","after*",
"above","above*",
"below","below*",
"beside","beside*",
"inside","inside*",
"outside","outside*",
"happensod","happensod*",
"like", "like*", # NSM
# MKR
"IS",
"ISA",
"ISC",
"HAS",
"DO",
"isa","isa+","isa*","isa**","isa**?",
"isc","isc+","isc*","isc**","isc**?",
"isu","isu*","isu**","isu**?",
"isp","isp*","isp**","isp**?",
"iss","iss*","iss**","iss**?",
"isg","isg*","isg**","isg**?",
"ismem","ismem*",
"isall","isall*",
"isalt","isalt*",
"isany","isany*",
"is","is*","iswith",
"isd",
"isi",
"rel","nrel","trel","brel","urel",
"has","can",
"haspart","haspart*",
"isapart","isapart*",
"do","can","vdo","hdo", "ido",
"happens",
"let","vlet","unlet",
"means", "means*",
"isref", "isref*",
# iQ words for reparsing compound statements
#"iseither",
#"isno",
#"issome",
#"isthe"
])
PREPOSITION := set([
"in",
"at", # context
"out", # product
"of", # part
"with", # characteristic
"od", # direct object
"from", # initial characteristic
"to" # final characteristic
])
OPERATOR := set([
"=", # name = value
"+=", # name op value
"-=", # name op value
"*=", # name op value
":=", # product := producer
"+:=", # product prodop producer
"-:=", # product prodop producer
"*:=", # product prodop producer
"::=" # BNF grammar
])
CONJUNCTION := set([
"iff",
"implies", # "|-"
"supports", # "|=" situation theory notation
"causes", "causes*", # NSM
"because", "because*" # NSM
])
KEYWORD := CONTROL ++ VERB ++ PREPOSITION ++ OPERATOR ++ CONJUNCTION
end
procedure init_quantifier()
#==========================
QUANT := set([
"a",
"no",
"some",
"the"
])
groupQUANT := set([
"either",
"all",
"any"
])
QUANTIFIER := QUANT ++ groupQUANT
end
procedure init_gtype()
#=====================
xxGROUP := set([
"exgroup","ingroup",
"concept"
])
INGROUP := set([
"list",
"requisite",
"sequence",
"set",
"intersection", # OWL
"LATTICE"
])
EXGROUP := set([
"enum", # OWL oneOf
"union" # OWL
])
LATTICE := set([
"hierarchy",
"lattice"
])
GTYPE := xxGROUP ++ INGROUP ++ EXGROUP ++ LATTICE
end
procedure init_variable()
#========================
# context-dependent variable
keVARIABLE := set([
"Sentence",
#####"Statement",
"Question",
"Command",
"Assignment",
"If",
"Every"
])
PRONOUN := set([
".","..","...", # pronoun
"I","we",
"you",
"he","she","they",
"it",
"none",
"someone", # NSM
"something", # NSM
"people" # NSM
])
VARIABLE := keVARIABLE ++ PRONOUN
end
procedure add_separator()
#========================
local sep,whitespace
initial {
} # end initial
new_concept("separator",,"separator")
add_species("separator","symbol")
whitespace := ["blank","tab","vertical tab","line return","newline","newpage"]
every sep := !whitespace do {
add_unit(sep,"separator")
}
#####add_alias(" ","blank")
#####add_alias("\t","tab")
#####add_alias("\v","vertical tab")
#####add_alias("\r","line return")
#####add_alias("\n","newline")
#####add_alias("\f","newpage")
# $. are filed as variables, not separators
every sep := ! (SEPARATOR -- '$.') do {
add_unit(string(sep),"separator")
}
#####add_alias("sharp","#")
#####add_alias("dquote","\"")
#####add_alias("squote","\'")
#####add_alias("lparen","(")
#####add_alias("rparen",")")
#####merge_alias(set([",","comma"]))
#####merge_alias(set([";","semicolon"]))
#####merge_alias(set(["&","and"]))
#####merge_alias(set(["|","or"]))
#####merge_alias(set(["~","not"])) # complement wrt existent
#####merge_alias(set(["~","non"])) # complement wrt genus
end
procedure add_keyword()
#======================
local kw
add_species("symbol","OBJECT")
add_species("word","symbol")
add_species("verb","word")
every kw := ! VERB do {
add_unit(kw,"verb")
insert(HIDDEN,kw)
}
add_species("conjunction","word")
every kw := ! CONJUNCTION do {
add_unit(kw,"conjunction")
insert(HIDDEN,kw)
}
add_species("preposition","word")
every kw := ! PREPOSITION do {
add_unit(kw,"preposition")
insert(HIDDEN,kw)
}
add_species("separator","symbol")
every kw := ! OPERATOR do {
add_unit(kw,"separator")
insert(HIDDEN,kw)
}
add_species("control","word")
every kw := ! CONTROL do {
add_unit(kw,"control")
insert(HIDDEN,kw)
}
merge_alias(set(["forall","forAll"]),"forAll") # use OWL name
merge_alias(set(["exists","forSome"]),"forSome") # use OWL name
#####merge_alias(set(["isc","iseither"]))
#####merge_alias(set(["|=","supports"]))
#####merge_alias(set(["|-","implies"]))
end
procedure add_quantifier()
#=========================
local kw
new_concept("quantifier",,"quantifier")
add_species("quantifier","word")
every kw := ! QUANTIFIER do {
add_unit(kw,"quantifier")
insert(HIDDEN,kw)
}
end
procedure add_gtype()
#====================
local kw
every kw := ! xxGROUP do {
add_species(kw,"group")
insert(HIDDEN,kw)
}
every kw := ! INGROUP do {
add_species(kw,"ingroup")
insert(HIDDEN,kw)
}
every kw := ! EXGROUP do {
add_species(kw,"exgroup")
insert(HIDDEN,kw)
}
every kw := ! LATTICE do {
add_species(kw,"LATTICE")
insert(HIDDEN,kw)
}
#####add_alias("enum","enum")
#####add_alias("req","requisite")
#####add_alias("seq","sequence")
end
procedure add_variable()
#=======================
# context-dependent variable
local var,pro
# declare variables
every var := ! keVARIABLE do {
add_species(var,"attribute") # var iss variable
#put_char("attr","ke",var,[]) # ke has var
insert(HIDDEN,var)
}
add_species("pronoun","attribute")
every pro := ! PRONOUN do {
add_species(pro,"pronoun") # pro iss pronoun
put_ctype(pro,"pronoun") # pro has ctype=pronoun
#put_char("attr","ke",pro,[]) # ke has pronoun
insert(HIDDEN,var)
}
end
#===================================================================#
#===================================================================#
#===================================================================#
# TOKEN
procedure copy_token(tok)
#========================
local newtok
newtok := TOKEN(tok.ttype,tok.tvalue)
return newtok
end
# list of TOKEN
procedure copy_tlist(tok)
#========================
local newtok
newtok := []
every put(newtok,copy_token(!tok))
return newtok
end
#-------------------------------------------------------------
# TOKEN list
procedure get_token(fd,ps,option)
#================================
# called by parse_file() in symbol.icn
# suspend TOKEN list for WORD list
local m_token
local t
local line,wordlist,wline,wlist
local wtok,tok
local i,j,k,y,z
local ij,tline
local KHRV,VD
local word,token
local prog,info,error,warning,ierror
initial {
/DOT1 := "existent"
/VIEW := "tabula rasa"
/KFORMAT := "ku"
/HOSEPARATOR := "/"
/RELSEPARATOR := ","
/NVSEPARATOR := "="
/PIPESEPARATOR := "|"
}
/fd := myin
/ps := "ke$ "
/option := ""
#------------------------------------------------------------#
every word := get_word(fd,ps,option) do {
wordlist := []
case t := type(word) of {
default: {
writes_type(myerr,word,ierror||"unexpected type word")
writes_type(mylog,word,ierror||"unexpected type word")
fail
}
"WORD": { put(wordlist,word) }
"list": {
wordlist |||:= word
if *word = 0 then {
if DEBUG == "NULL" then
writes_type_all(word,ierror||"empty list word")
#####fail
}
}
} # end case t
DOTCONCEPT := ".="||DOT1
VD := VIEW||":"||DOTCONCEPT
KHRV := KFORMAT||HOSEPARATOR||RELSEPARATOR||NVSEPARATOR
prog := "get_token("||KHRV||"): "
info := "INFO: "||prog
error := "ERROR: "||prog
warning := "WARNING: "||prog
ierror := "Internal ERROR: "||prog
line := unparse(wordlist,"") # blanks are still in wordlist
if DEBUG == ("WORD"|"TOKEN"|"PARSE") then {
writes_type(mybug,line,info||"input line")
writes_type(mybug,wordlist,info||"input wordlist")
writes_type(mylog,line,info||"input line")
writes_type(mylog,wordlist,info||"input wordlist")
}
if *line = 0 then next
#####if *line = 0 then fail
tok := [TOKEN("X",line)] # unknown token if no match
case KFORMAT of {
"list": { m_token := m_kulist }
"ku": { m_token := m_kulist }
"cu": { m_token := m_kulist }
"ho": { m_token := m_kulist } # was m_holist
"hounit":{ m_token := m_kulist } # was m_holist
"dir": { m_token := m_kulist } # was m_dirlist
"group": { m_token := m_kulist } # was m_rellist
"nrel": { m_token := m_kulist } # was m_rellist
"nv": { m_token := m_kulist }
"kb": { m_token := m_kulist }
"html": { m_token := m_kulist }
"htm": { m_token := m_kulist }
"xml": { m_token := m_kulist }
"rdf": { m_token := m_kulist }
"owl": { m_token := m_kulist }
"nt": { m_token := m_kulist }
"mcf": { m_token := m_kulist }
default: {
writes_type(mylog,KFORMAT,warning||"unknown kformat")
KFORMAT := "ku"
m_token := m_kulist
}
} # end case KFORMAT
wline := ""
wlist := []
every word := !wordlist do {
if DEBUG == ("WORD"|"TOKEN"|"PARSE") then {
writes_type_all(word,info||"input word")
}
wline ||:= word.wtype
put(wlist,word.wvalue)
} # end every word
if DEBUG == ("WORD"|"TOKEN"|"PARSE") then {
writes_type_all(wline,info||"input wline")
#writes_type_all(wlist,info||"input wlist")
}
if wline ? { ( wtok <- m_token() ) & pos(0) }
then {
if DEBUG==("WORD"|"TOKEN"|"PARSE") then {
writes_type(mybug,wtok,info||"parse result wtok")
writes_type(mylog,wtok,info||"parse result wtok")
}
j := 0
every i := 1 to *wtok do {
j +:= 1
ij := "(i="||i||",j="||j||")"
y := wtok[i].tvalue
z := wlist[j]
case *y of {
0: { writes_type(mylog,wline,ierror||"zero length tvalue"||ij) }
1: { }
default: {
# composite words
if DEBUG==("MAP"|"WORD"|"TOKEN") then {
writes_type(mybug,y,info||"composite y"||ij)
writes_type(mylog,y,info||"composite y"||ij)
}
every k := 2 to *y do {
j +:= 1
z ||:= wlist[j]
} # end every k
} # end default
} # end case *y
wtok[i].tvalue := z
} # end every i
tok := map_token(wtok)
tline := token2string(tok)
if DEBUG==("WORD"|"TOKEN"|"PARSE") then {
writes_type(mylog,tok,info||"map result tok")
writes_type(mylog,tline,info||"map result tline")
}
} else {
KERROR +:= 1
writes_type(myerr,line,error||"unknown token")
writes_type(mylog,line,error||"unknown token")
#####OLDcomplete("_UnknownToken_") # token.icn
} # end if wline ? { }
if DEBUG == ("WORD"|"TOKEN"|"PARSE") then {
writes_type(mybug,tok,info||"output tokenlist tok")
writes_type(mylog,tok,info||"output tokenlist tok")
}
case ParserMode of {
default: {
writes_type(myerr,ParserMode,ierror||"unexpected ParserMode")
writes_type(mylog,ParserMode,ierror||"unexpected ParserMode")
fail
}
"word": { every token := !tok do suspend token }
"line": { suspend tok }
}
} # end every word
#----------------------------------------------------------#
end
#==========================================================#
#==========================================================#
# integer
procedure NEWcomplete(word)
#==========================
# NOTE: map_token() no longer calls NEWcomplete()
# for begin,end (allows immediate processing
# of relation tuples & hierarchy info.)
local x
static nesting,html_nest
static BeginEnd
static ku_beginend,ku_every,ku_brace
static html_beginend,xml_beginend
static info
initial {
info := "# INFO: NEWcomplete: "
BeginEnd := 0
ku_beginend := 0 # begin gtype gname; ... end gtype gname;
ku_every := 0 # every sentence; { ... };
ku_brace := 0 # { ... }
html_beginend := 1 # < ... >
xml_beginend := 0 # content
nesting := [] # stack
html_nest := [] # stack
}
# check nesting
#--------------#
# called by map_token() in token.icn word
# called by parse_file() in symbol.icn "_count_"
# called by map_token() in token.icn "_group_"
# called by map_token() in token.icn "_begin_"
# called by map_token() in token.icn "_end_"
case word of {
"_count_": {
if ku_beginend > 0 then
return 1
else
if ku_every > 0 then
return 1
else if ku_brace > 0 then
return 1
#else if html_beginend > 0 then
# return 1
#else if xml_beginend > 0 then
# return 1
else
return *nesting
}
"_html_": { return html_beginend }
"_html_reset_": { html_nest := []; html_beginend := 1 }
"_group_": { return BeginEnd }
# > 0 means in begin end group
"_begin_": { BeginEnd +:= 1; return BeginEnd }
"_end_": { BeginEnd -:= 1; return BeginEnd }
("_start_"|"_restart_"): {
###BeginEnd := 0 # finish begin-end group
nesting := []
###html_nest := []
ku_beginend := 0
ku_every := 0
ku_brace := 0
###html_beginend := 1
xml_beginend := 0
return *nesting
}
default: { } # continue below
} # end case word
if ku_beginend > 0 then {
# inside begin-end group
if word == "end" then { } else
return *nesting
}
#####if html_beginend > 0 then {
##### # inside < >
##### if word == ">" then { } else
##### return *nesting
#####}
if DEBUG=="COMPLETE" then {
writes_type(mybug,nesting,info||"before update: nesting")
writes_type(mybug,word,info||"word")
writes_type(mylog,nesting,info||"before update: nesting")
writes_type(mylog,word,info||"word")
}
# update nesting
#---------------#
case KFORMAT of {
"list": { }
("ku"|"cu"): {
case word of {
#"_start_": { push(nesting,word) }
#"_restart_": { push(nesting,word) }
#";": { popcheck("_start_",word,nesting) }
"if": { push(nesting,word) }
"fi": { popcheck("if",word,nesting) }
(
"every"|"while"|"until"|"when"|
"forall"|"forAll"|
"exists"|"forSome"
): {
ku_every +:= 1
push(nesting,"every")
}
"do": { push(nesting,word) }
"DO": { push(nesting,"do") }
"can": { push(nesting,"do") }
"vdo": { push(nesting,"do") }
"hdo": { push(nesting,"do") } # hwalk
"ido": { push(nesting,"do") } # hwalk
"!": { push(nesting,"do") } # shell command (! ... done;)
"happens": { push(nesting,"do") } # interaction
"done": { popcheck("do",word,nesting) }
"[": { push(nesting,word) }
"]": { popcheck("[",word,nesting) }
"{": {
if ku_every > 0 then {
popcheck("every",word,nesting)
ku_every -:= 1
}
ku_brace +:= 1
push(nesting,word)
}
"}": {
ku_brace -:= 1
popcheck("{",word,nesting)
}
"(": { push(nesting,word) }
")": { popcheck("(",word,nesting) }
"<": { push(nesting,word) }
#####">": { popcheck("",word,nesting) }
">": { popcheck("<",word,nesting) }
"begin": { ku_beginend +:= 1 } #; push(nesting,word) }
"end": { ku_beginend -:= 1 } #; popcheck("begin",word,nesting) }
} # end case word
}
("ho"|"hounit"|"nrel"|"nt"|"mcf"): {
case word of {
"begin": { ku_beginend +:= 1 } #; push(nesting,word) }
"end": { ku_beginend -:= 1 } #; popcheck("begin",word,nesting) }
} # end case word
}
("html"|"htm"|"xlm"|"rdf"|"daml"|"owl"): {
##case word of {
##"<": { html_beginend +:= 1; push(html_nest,word) }
##">": { html_beginend -:= 1; popcheck("",word,html_nest) }
##"/": { html_beginend -:= 1; popcheck("/",word,html_nest) }
##} # end case word
}
} # end case KFORMAT
if DEBUG=="COMPLETE" then {
writes_type(mybug,nesting,info||"after update: nesting")
writes_type(mylog,nesting,info||"after update: nesting")
}
return *nesting
end
procedure popcheck(nestbegin,nestend,nest)
#=========================================
local xbegin,found,lookfor
static info,popfail,mismatch
initial {
info := "# INFO: popcheck: "
popfail := "# WARNING: popcheck: empty nesting stack: "
mismatch := "# WARNING: popcheck: mismatch: "
}
if DEBUG=="COMPLETE" then {
writes_type_all(nestbegin,info||"nestbegin")
writes_type_all(nestend,info||"nestend")
}
lookfor := nestbegin||nestend
if xbegin := pop(nest) then { } else {
case lookfor of {
"": { }
"<>": { } # rdfs:comment = <...>; ???
"()": { }
"[]": { }
"{}": { }
default: { writes_type_all(lookfor,popfail||"looking for") }
} # end case lookfor
NEWcomplete("_restart_")
return
}
if DEBUG=="COMPLETE" then {
writes_type_all(xbegin,info||"xbegin")
}
found := xbegin||nestend
if found == lookfor then { } else {
if DEBUG=="COMPLETE" then {
write(myerr,mismatch,"looking for ",lookfor," found "||found)
write(mylog,mismatch,"looking for ",lookfor," found "||found)
}
if lookfor == ">" then {
# wait for next HMTL group
} else {
NEWcomplete("_restart_")
}
}
end
#==========================================================#
# integer
procedure OLDcomplete(word)
#=======================
# set syntactic group count
# begin - end
# if - fi
# do - done
# every
# [ ] { } < > ( )
local gcount
local iinfo
static info,ierror
static beginend, iffi, dodone, everydo, whendo, whiledo, untildo
static bracket, brace, angle, paren
static htmlgroup
static semicolon
initial {
info := "INFO: OLDcomplete: "
ierror := "Internal ERROR: OLDcomplete: "
beginend := integer(0)
iffi := integer(0)
dodone := integer(0)
everydo := integer(0)
whendo := integer(0)
whiledo := integer(0)
untildo := integer(0)
bracket := integer(0)
brace := integer(0)
angle := integer(0)
paren := integer(0)
htmlgroup := integer(0)
#semicolon := integer(1)
} # end initial
iinfo := info||" word <"||word||"> "
# do initial count
#-----------------#
case KFORMAT of {
default: { gcount := 0 }
"list": { }
("ku"|"cu"): {
gcount := iffi + everydo + whendo + whiledo + untildo + dodone +
bracket + brace + angle + paren +
angle # + semicolon
}
("ho"|"hounit"|"nrel"): {
gcount := beginend
}
("html"|"htm"|"xlm"|"rdf"): {
gcount := angle
}
} # end case KFORMAT
if DEBUG == "COMPLETE" then {
writes_type(mybug,gcount,iinfo||"initial gcount")
writes_type(mylog,gcount,iinfo||"initial gcount")
}
# do reset
#---------#
case word of {
default: { } # continue
(
"_TabulaRasaComplete_"| # initialize_tabrasa() in init.icn
"_InitializationComplete_"| # main() in ke.icn,ksc.icn,tap2mkr.icn
"_BeginRead_"| # command() in command.icn
"_EndRead_"| # command() in command.icn
"_BeginGroup_"| # init.icn
"_EndGroup_"| # init.icn
"_GroupError_"| # init.icn
"_SyntaxError_" # get_symbol() in symbol.icn
#"_LineComplete_"| # continue_token(), main()
#"_UnknownToken_" # get_token() in token.icn
): {
writes_type(mylog,word,info||"reset word")
beginend := integer(0)
iffi := integer(0)
dodone := integer(0)
everydo := integer(0)
whendo := integer(0)
whiledo := integer(0)
untildo := integer(0)
bracket := integer(0)
brace := integer(0)
angle := integer(0)
paren := integer(0)
htmlgroup := integer(0)
##semicolon := integer(1)
} # end reset
} # end case
# update count
#-------------#
case KFORMAT of {
"list": { }
("ku"|"cu"): {
case word of {
"if": { iffi +:= 1 }
"fi": { iffi -:= 1 }
"every": { everydo +:= 1 }
"when": { whendo +:= 1 }
"while": { whiledo +:= 1 }
"until": { untildo +:= 1 }
("do"|
"DO"|
"!"|
"can"|
"vdo"|
"hdo"|
"ido"): { dodone +:= 1 }
"happens": { dodone +:= 1 }
"done": { dodone -:= 1 }
"[": { bracket +:= 1 }
"]": { bracket -:= 1 }
"{": { brace +:= 1
if everydo > 0 then everydo -:= 1
if whendo > 0 then whendo -:= 1
if whiledo > 0 then whiledo -:= 1
if untildo > 0 then untildo -:= 1
}
"}": { brace -:= 1 }
"(": { paren +:= 1 }
")": { paren -:= 1 }
##";": { semicolon -:= 1 }
"<": { angle +:= 1 } ##; semicolon := 0 }
">": { angle -:= 1 }
} # end case word
gcount := iffi + everydo + whendo + whiledo + untildo + dodone +
bracket + brace + paren +
angle # + semicolon
}
("ho"|"hounit"|"nrel"): {
case word of {
"begin": { beginend +:= 1 }
"end": { beginend -:= 1 }
} # end case word
gcount := beginend
}
("html"|"htm"|"xlm"|"rdf"): {
case word of {
"<": { angle +:= 1 }
">": { angle -:= 1 }
} # end case word
gcount := angle
}
} # end case KFORMAT
#####case word of {
#####"htmlbegin": { htmlgroup +:= 1 }
#####"htmlend": { htmlgroup -:= 1 }
#####} # end case word
#####gcount := htmlgroup
if DEBUG == "COMPLETE" then {
writes_type(mybug,gcount,iinfo||"final gcount")
writes_type(mylog,gcount,iinfo||"final gcount")
}
return gcount
end
#==========================================================#
#==========================================================#
# TOKEN
procedure OLDcontinue_token(linetoken,infd,continue)
#================================================
# called from get_token() in token.icn
# automatic continuation for beginning & middle keywords & special characters
# , [ { ( < \
# isa,isc, is,has,do, if,then,else, every, ..., with
# at, of, with, from, to
# begin, end
# know, believe, etc. <== NOT currently reserved
local line,tok,newline,newtok,newlinetoken
local xlineword
local tlast,xline,xlinetoken
local nocont,cont,icomplete
local head,KHRV,cilast
local iinfo,iierror
static info,ierror
initial {
info := "INFO: OLDcontinue_token: "
ierror := "Internal ERROR: OLDcontinue_token: "
}
/infd := myin
/continue := "yes"
KHRV := "("||KFORMAT||HOSEPARATOR||RELSEPARATOR||NVSEPARATOR||")"
case KFORMAT of {
"list": { }
("ku"|"cu"): {
nocont := ';' ++ '>'
cont := '\\' ++ '!,&|~' ++ '[{(<' ++ 'ACDEGHIiJPpQRTWYZ' ++ '='
icomplete := OLDcomplete("") # token.icn
}
("ho"|"hounit"): {
nocont := '>'
cont := '<'
icomplete := OLDcomplete("html") # token.icn
}
("dir"|"nrel"|"nv"): {
return linetoken
}
("rdf"|"xml"|"html"|"htm"): {
nocont := '>'
cont := '<' ++ '='
icomplete := OLDcomplete("html") # token.icn
}
"nt": {
nocont := ''
cont := ''
icomplete := OLDcomplete("") # token.icn
}
"mcf": {
nocont := ''
cont := ''
icomplete := OLDcomplete("") # token.icn
}
default: {
writes_type(mylog,KFORMAT,ierror||"unexpected KFORMAT")
return linetoken
}
} # end case KFORMAT
line := linetoken.line
tok := linetoken.token
newline := line
newtok := copy_list(tok)
tlast := tok[-1].ttype | fail
cilast := "("||continue||icomplete||tlast||")"
head := KHRV||cilast
iinfo := info||head
iierror := ierror||head
if DEBUG == "CONTINUE" then {
writes_type(mybug,line,iinfo||"input line")
writes_type(mylog,line,iinfo||"input line")
}
newlinetoken := linetoken
if continue=="yes" then {
if upto(tlast,nocont) & (icomplete <= 0) then {
# no continuation
if DEBUG=="CONTINUE" then {
writes_type(mybug,line,iinfo||"no continutaion")
writes_type(mylog,line,iinfo||"no continutaion")
OLDcomplete("_LineComplete_")
}
} else if upto(tlast,cont) | (icomplete > 0) then {
# continue to next line
if tlast == "\\" then { # delete \
newtok := newtok[1:-1]
newline := newline[1:-1]
}
if xline := prompt(infd) then {
xlineword := get_word(xline)
xlinetoken := get_token(xlineword,infd,continue)
newline ||:= " "|| xlinetoken.line
newtok |||:= copy_list(xlinetoken.token)
newlinetoken := TOKEN(newline,newtok)
if DEBUG=="CONTINUE" then {
writes_type(mybug,newline,iinfo||"next line")
writes_type(mylog,newline,iinfo||"next line")
}
} # end if xline
} else {
if DEBUG=="CONTINUE" then {
writes_type(mybug,line,iinfo||"no continutaion")
writes_type(mylog,line,iinfo||"no continutaion")
OLDcomplete("_LineComplete_")
}
} # end if upto
} # end if continue
if DEBUG=="CONTINUE" then {
writes_type(mybug,newlinetoken.line,iinfo||"output line")
writes_type(mylog,newlinetoken.line,iinfo||"output line")
}
return newlinetoken
end
# TOKEN list
procedure map_token(tok)
#=======================
# phase 2 parse
# map ttype for get_symbol()
local x,y,word
local kinfo
static info
initial {
info := "INFO: map_token: "
}
kinfo := info||"kformat <"||KFORMAT||"> "
#DEBUG := "MAP_TOKEN"
if DEBUG==("MAP_TOKEN"|"MAP"|"PARSE"|"END") then {
writes_type(mybug,tok,kinfo||"input tok")
writes_type(mylog,tok,kinfo||"input tok")
}
x := []
every y := ! tok do {
case y.ttype of {
default:{ }
"q": { y.ttype := "w" } # squote q => w for get_symbol()
"Q": { y.ttype := "w" } # dquote Q => w for get_symbol()
"a": { y.ttype := "a" } # angle a => a for get_symbol()
"b": { y.ttype := "w" } # paren b => w for get_symbol()
"c": {
case CommentMode of {
default: { next } # delete comment
"cyc": { y.ttype := "w" }
}
}
##### "h": {
##### # HTML command
##### if REMOVEHTML==("yes"|"YES") then {
##### do_html(y)
##### next # delete HTML
##### } # end if REMOVEHTML
##### } # end "h"
"B": {
case KFORMAT of {
default: { next } # delete whitespace
#("ho"|"hounit"|"nrel"): { } # keep whitespace for parsing phrase
}
}
"j": { # and,or
case KFORMAT of {
default: { } # reserved
("ho"|"hounit"|"nrel"): { y.ttype := "w" } # not reserved
}
}
#"h": { y.tvalue := trimws(y.tvalue) } # howord <== obsolete
#"r": { y.tvalue := trimws(y.tvalue) } # relword <== obsolete
#"d": { y.tvalue := trimws(y.tvalue) } # dirword <== obsolete
#"A": { y.ttype := "=" } # assignment,production <== obsolete
#"D": { y.ttype := "$" } # $variable <== obsolete
",": { y.ttype := y.tvalue } # list
";": { y.ttype := y.tvalue } # relation,proplist
#"=": { y.ttype := y.tvalue } # assignment,production
"!": { y.ttype := y.tvalue } # sh command
#"~": { y.ttype := y.tvalue } # not <== nonseparator
#"?": { y.ttype := y.tvalue } # question <== nonseparator
#"/": { y.ttype := y.tvalue } # hierarchy <== nonseparator
#"$": { y.ttype := y.tvalue } # $variable <== nonseparator
"W": { y.ttype := "w" } # weak separators
#":": { y.ttype := y.tvalue } #
#"+": { y.ttype := y.tvalue } #
#"-": { y.ttype := y.tvalue } #
#"*": { y.ttype := y.tvalue } #
"S": { # strong separators
case y.tvalue of {
default: { y.ttype := y.tvalue }
}
}
"w": {
# word inlcudes no separators
if member(mkr_word,y.tvalue) then {
y := group_map_token(y)
#case y.tvalue of {
#"next": { y.tvalue := ";" }
#"non": { y.tvalue := "~" }
#"not": { y.tvalue := "~" }
#}
} else if member(rdf_word,y.tvalue) then {
y.ttype := rdf_word[y.tvalue]
} else if member(owl_word,y.tvalue) then {
y.ttype := owl_word[y.tvalue]
} # end if
} # end "w"
"x": {
# xword includes weak separators W
if y.tvalue ? { ="isa**" | ="isc**" |
="iss**" | ="isg**" |
="isu**" | ="isp**" |
="genlmt**" | ="specmt**"
} then {
# "lw" | "l?"
y.ttype := "R"
#} else if upto("*",y.tvalue) then {
# wildcard pattern
} else if member(mkr_word,y.tvalue) then {
y := group_map_token(y)
#case y.tvalue of {
#"next": { y.tvalue := ";" }
#"non": { y.tvalue := "~" }
#"not": { y.tvalue := "~" }
#}
} else if member(rdf_word,y.tvalue) then {
y.ttype := rdf_word[y.tvalue]
} else if member(owl_word,y.tvalue) then {
y.ttype := owl_word[y.tvalue]
} else {
y.ttype := "w" # x => w for get_symbol()
} # end if
} # end "x"
} # end case y.ttype
# set syntactic nesting group count
word := y.tvalue
if DEBUG=="COMPLETE" then {
writes_type_all(word,info||"NEWcomplete")
}
case word of {
"": { }
"begin": { NEWcomplete("_begin_") }
"end": { NEWcomplete("_end_") }
"if": { NEWcomplete(word) }
"fi": { NEWcomplete(word) }
"every": { NEWcomplete(word) }
"when": { NEWcomplete(word) }
"while": { NEWcomplete(word) }
"until": { NEWcomplete(word) }
"do": { NEWcomplete(word) }
"DO": { NEWcomplete(word) }
"!": { NEWcomplete(word) }
"can": { NEWcomplete(word) }
"vdo": { NEWcomplete(word) }
"hdo": { NEWcomplete(word) }
"ido": { NEWcomplete(word) }
"happens":{ NEWcomplete(word) }
"done": { NEWcomplete(word) }
("<"|">"):{ NEWcomplete(word) }
("{"|"}"):{ NEWcomplete(word) }
("["|"]"):{ NEWcomplete(word) }
("("|")"):{ NEWcomplete(word) }
";": { NEWcomplete(word) }
} # end case y.tvalue
# separate leading and trailing "/" in HTML parsing
case KFORMAT of {
default : { put(x,y) }
("html"|"htm"|"xml"|"rdf"|"owl"): {
if y.ttype == "w" then {
if y.tvalue[1] == "/" then {
y.tvalue := y.tvalue[2:0]
put(x,TOKEN("/","/"),y)
} else if y.tvalue[-1] == "/" then {
y.tvalue := y.tvalue[1:-1]
put(x,y,TOKEN("/","/"))
} else {
put(x,y)
} # end if == "/"
} else {
put(x,y)
} # end if == "w"
} # end "html"|...
} # end case KFORMAT
} # end every y
if DEBUG==("MAP_TOKEN"|"MAP"|"PARSE"|"END") then {
writes_type(mybug,x,kinfo||"output x")
writes_type(mylog,x,kinfo||"output x")
}
return x
end
# TOKEN
procedure group_map_token(y)
#===========================
# member(mkr_word,y.tvalue) is true
# allow reserved words in hierarchies & relations
# => bad idea! everything gets parsed as group_statement
local kinfo
static info
initial {
info := "INFO: group_map_token"
}
kinfo := info||"("||KFORMAT||"): "
#DEBUG := "END"
if DEBUG==("MAP_TOKEN"|"BEGIN"|"END") then {
writes_type(mybug,y,kinfo||"input token")
writes_type(mylog,y,kinfo||"input token")
}
case KFORMAT of {
default: {
y.ttype := mkr_word[y.tvalue]
}
("ho"|"hounit"|"nrel"): {
case y.tvalue of {
"end": { y.ttype := mkr_word[y.tvalue] } # "Z"
"exit": { y.ttype := mkr_word[y.tvalue] } # "z"
"break": { y.ttype := mkr_word[y.tvalue] } # "z"
"let": { y.ttype := mkr_word[y.tvalue] } # "S"
default: { y.ttype := "w" }
}
}
} # end case KFORMAT
if DEBUG==("MAP_TOKEN"|"BEGIN"|"END") then {
writes_type(mybug,y,kinfo||"output token")
writes_type(mylog,y,kinfo||"output token")
}
return y
end
# TOKEN list
procedure horel_token(tok)
#=========================
# no special meaning for words in HO and REL
local x,y,t,kinfo,kwarning
static info,warning
initial {
info := "INFO: horel_token"
warning := "WARNING: horel_token"
}
kinfo := info||"("||KFORMAT||"): "
kwarning := warning||"("||KFORMAT||"): "
if DEBUG == ("HO"|"REL"|"NREL") then
writes_type_all(tok,kinfo||"input token list")
x := []
every t := !tok do {
y := copy(t)
case y.tvalue of {
default: {
case y.ttype of {
"/": { }
"\\": { } # should be "/"
default: { y.ttype := "w" }
} # end case y.ttype
}
# exceptions: group
"begin": { }
"end": { }
"exit": { }
";": { }
# exceptions: hierarchy
"ho": { }
"hierarchy": { }
"unithierarchy": { }
#####"/": { }
#####"\\": { }
#####"isu": { }
#####"iss": { }
# exceptions: relation
"nrel": { }
"relation": { }
",": { }
"[": { }
"]": { }
# exceptions: do read from file done; x has y=z;
"do": {}
"read": {}
"from": {}
"done": {}
##"has": {}
##"=": {}
} # end case t.tvalue
put(x,y)
} # end every t
if DEBUG == ("HO"|"NREL") then
writes_type_all(x,kinfo||"output token list")
return x
end
#