#
# KEHOME/src/token.icn
# Oct/27/2005 Jul/20/2009
$include "keparam.h"
procedure init_parser()
#======================
init_char()
init_word()
init_keyword() # only a minimal set of keywords ?
init_group()
end
global not_dollar
global ParserMode # word|line set by init.icn, symbol.icn
# CommentCharacter # initially '#'
global WordChar # for current word definition
#===================================#
# pattern matching: #
# line -> WORD list -> TOKEN list #
#===================================#
record WORD (
wtype, # character
wvalue # string
)
record TOKEN (
ttype, # character
tvalue # string
)
#=========#
# methods #
#=========#
# get_word(fd,gtype,ps)
# map_word(word)
# get_token(word,gtype,ps)
# map_token(fd,option,ps)
# token_type(x)
# token_value(x)
# token_unparse(x)
# init_parser() # called by interpret_line() in symbol.icn
# init_parser() # called by knit_init() in knit.icn
# init_keyword() # token.icn
# init_command() # command.icn
# init_parameter() # param.icn
# init_quantifier() # token.icn
# init_gtype() # token.icn
# init_variable() # token.icn
# init_char() # char.icn
# declare_word() # token.icn
# m_dquote()
# m_squote()
# m_paren() # treat like quote
# m_angle() # treat like quote
###### m_slash() # for hierarchy
# m_comment()
# m_whitespace()
# m_dollarvar()
# m_separator()
# m_assignword()
# m_productword()
# m_word()
global XLINE
global SEPARATOR,WhiteSpace,NONSEP
global AssignOp,AssignChar
global WordChar,xWordChar,NumberChar
global DollarNameChar,FileNameChar
global CycConstantChar
global ListSeparator
global mkr_word,HIDDEN
global rdf_word,owl_word,cycl_word
global dmoz_word,tap_word,cyc_word
global CONTEXTword
global KEYWORD,CONTROL,VERB,PREPOSITION,OPERATOR,CONJUNCTION
global GTYPE,xxGROUP,EXGROUP,INGROUP,LATTICE
global QUANTIFIER,QUANT,groupQUANT
global VARIABLE,keVARIABLE,PRONOUN
global PARAMETER
global COMMAND
#----------------------------------------------------#
# string
procedure token_unparse(x,sep)
#=============================
local y,z
/sep := " "
case type(x) of {
"TOKEN": { y := token_value(x) }
"list": { y := unparse(token_value(x),sep) }
}
return y
end
# string or list
procedure token_type(x)
#======================
local y
case type(x) of {
"TOKEN": { y := x.ttype }
"list": {
y := []
every put(y,(!x).ttype)
}
} # end case type
return y
end
# string or list
procedure token_value(x)
#=======================
local y
case type(x) of {
"TOKEN": { y := x.tvalue }
"list": {
y := []
every put(y,(!x).tvalue)
}
} # end case type
return y
end
#===================================================================#
#===================================================================#
# words
# NOTE processing sequence:
# get_word()
# map_word()
# get_token()
# map_token()
# get_symbol()
# map_symbol()
# WORD list
procedure get_word(fd,pgtype,ps)
#==============================
# fd ::= file | string | list of string
# wtype ::= kutoken | other token type table
# ps ::= "ke" | "ksc"
# called by get_token() in token.icn
# called by parse_list() in word.icn
# suspend WORD list for this line
# low level parse
local ttype,wtype
local t,line
local wordlist,word,val
local matchline,mapline
static info,warning,ierror
static SaveStack
initial {
/SaveStack := []
info := "INFO: get_word: "
warning := "WARNING: get_word: "
ierror := "Internal ERROR: get_word: "
}
push(SaveStack,KFORMAT)
/fd := myin
/pgtype := "ku"
/ps := "ke$ "
case pgtype of {
"ku": { ttype := mkr_ttype; wtype := mkr_wtype }
"ho": { ttype := ho_ttype; wtype := mkr_wtype }
"nrel": { ttype := nrel_ttype; wtype := mkr_wtype }
default:{ ttype := mkr_ttype; wtype := mkr_wtype }
} # end case pgtype
every line := prompt(fd) do {
if DEBUG==("WORD"|"PARSE") then {
writes_type_all(line,info||"input line")
}
case t := type(line) of {
default: {
writes_type_all(line,ierror||"unexpected type line")
#return []
fail
}
("string"|"integer"): {
if *line = 0 then {
if DEBUG == "NULL" then
writes_type_all(line,warning||"empty string line")
suspend [WORD("w","")]
next
}
}
} # end case t
wordlist := []
matchline := line
mapline := ""
matchline ? {
while word := (
WORD("B",m_whitespace()) |
WORD("w",m_cyc_constant()) | # must precede m_comment()
WORD("c",m_comment()) |
WORD("Q",m_dquote()) |
WORD("q",m_squote()) |
WORD("w",m_kuword()) |
WORD("s",m_separator()) |
WORD("a",m_angle()) |
WORD("b",m_paren()) |
#####WORD("h",m_htmlword()) |
WORD("$",m_dollarvar()) |
WORD("R",m_product_word()) | # must precede m_dcolon()
WORD("S",m_assign_word()) |
WORD("L",m_dcolon()) #| # : not currently a separator
####### WORD("/",m_slash()) | # for hierarchy
) #####& tab(0)
do {
# phase 1: map wtype for get_token()
case word.wtype of {
#####"h": { # html
##### if DEBUG==("HTML"|"HTM"|"XML"|"RDF") then {
##### writes_type(mybug,word,info||"html: word")
##### writes_type(mylog,word,info||"html: word")
##### } # end if DEBUG
##### } # end "h"
("q"|"Q"): { # quote
val := word.wvalue
if *val = 1 then {
if DEBUG=="QUOTE" then {
writes_type(mybug,val,warning||"isolated quote mark")
writes_type(mylog,val,warning||"isolated quote mark")
}
word.wtype := "w"
} # end if *val
} # end "q"|"Q"
("a"|"b"): { } # angle & paren
"c": { # comment
if DEBUG=="COMMENT" then
writes_type_all(word,info||"parsed comment word")
#####case CommentMode of {
#####default: { word.wtype := "c" }
#####"cyc": { word.wtype := "w" } # CycL constant '#$name'
#####}
}
"B": { } # white space
"R": { } # production operator
"S": { } # assignment operator
"L": { } # dcolon
"W": { } # WordNet =>
"X": { } # WordNet --
"s": {
# other separators
# [,] {;} () <>
word.wtype := ttype[word.wvalue]
}
#####"=": {
##### case word.wvalue of {
##### }
##### }
"w": {
# recognize end & exit
case word.wvalue of {
"end": { word.wtype := ttype[word.wvalue] } # "Z"
"exit": { word.wtype := ttype[word.wvalue] } # "z"
#####"::": { word.wtype := wtype[word.wvalue] } # "L"
#####":=": { word.wtype := wtype[word.wvalue] } # "R"
#####"+:=": { word.wtype := wtype[word.wvalue] } # "R"
#####"-:=": { word.wtype := wtype[word.wvalue] } # "R"
#####"*:=": { word.wtype := wtype[word.wvalue] } # "R"
#####"=>": { word.wtype := ttype[word.wvalue] } # "W"
#####"--": { word.wtype := ttype[word.wvalue] } # "X"
}
# recognize question variable & slash
case word.wvalue[1] of {
default: { }
"?": { word.wtype := ttype[word.wvalue] }
"/": { word.wtype := ttype[word.wvalue] }
"\\":{ word.wtype := ttype[word.wvalue] }
}
word.wvalue := map_word(word.wvalue) # token.icn
##### don't substitute yet
#####if member(wtype,word.wvalue) then
##### word.wtype := wtype[word.wvalue]
}
} # end case word.wtype
if DEBUG == "WORD" then
writes_type_all(word,info||"word")
put(wordlist,word)
mapline ||:= word.wvalue
} # end do
} # end line ?
if DEBUG==("WORD"|"PARSE") then {
writes_type(mybug,wordlist,info||"output wordlist")
writes_type(mylog,wordlist,info||"output wordlist")
}
if mapline==line then { } else {
if DEBUG=="MAP_WORD" then {
writes_type(mybug,mapline,warning||"map_word change: mapline")
writes_type(mylog,mapline,warning||"map_word change: mapline")
} # end if DEBUG
} # end if mapline
#if DEBUG==("READ"|"LINE"|"WORD"|"TOKEN"|"SYMBOL") then
writes_type(mylog,wordlist,info||"wordlist")
case ParserMode of {
default: {
writes_type(myerr,ParserMode,ierror||"unexpected ParserMode")
writes_type(mylog,ParserMode,ierror||"unexpected ParserMode")
fail
}
"word": { every word := !wordlist do suspend [word] }
"line": { suspend wordlist }
}
} # end every line
KFORMAT := pop(SaveStack)
end
# string
procedure map_word(word)
#=======================
local newword,junk
static info
initial {
info := "INFO: map_word: "
#####junk := dc2mkr("dc:title")
#####junk := rdf2mkr("rdfs:Class")
#####junk := owl2mkr("owl:Thing")
#####junk := mcf2mkr("Node")
#####junk := xml2mkr("&owl;")
}
case KMAPWORD of {
default: { return word }
("NO"|"no"): { return word }
("YES"|"yes"): { }
}
case UniqueName of {
default: { }
"shortname": { }
"classname": { }
"rootname": { }
"qualname": { return word }
} # end case UniqueName
newword := word
case word[1] of {
"/": { newword := word[2:0] }
}
#####if member(xml_word,newword) then {
##### newword := xml2mkr(newword) # xml.icn
#####} else if member(words_dc,newword) then {
##### newword := dc2mkr(newword) # xml.icn
#####} else if member(words_rdf,newword) then {
##### newword := rdf2mkr(newword) # xml.icn
#####} else if member(words_owl,newword) then {
##### newword := owl2mkr(newword) # xml.icn
#####} else if member(words_mcf,newword) then {
##### newword := mcf2mkr(newword) # xml.icn
#####}
case word[1] of {
"/": { newword := "/"||newword }
}
#if DEBUG=={"MAP"|"MAPWORD") then
if newword ~== word then
writes_all([TypeComment||info||"word <",word,"> => newword <",newword,">"])
#}
return newword
end
# string
procedure m_whitespace()
#=======================
# low level parse get_word()
# NOTE: get_word() sets wtype := "B"
# NOTE: get_token() sets ttype := "B"
# NOTE: map_token() deletes ttype "B"
initial {
WhiteSpace := ' \t\v\r\n\f'
}
suspend \
tab(many(WhiteSpace))
end
# string
procedure m_separator()
#======================
# low level parse get_word()
# NOTE: get_word() sets wtype := "S" or individual character
# NOTE: get_token() sets ttype := "S" or individual character
# NOTE: map_token() changes ttype from "S" to individual character
initial {
/SEPARATOR := "\"\'" ++ '[,]' ++ '{;}' ++ '()' ++ '|' ##### ++ '<>'
}
suspend \
tab(any(SEPARATOR))
end
###### string
#####procedure m_end()
######================
#####suspend \
##### ="end"
#####end
# string
procedure m_assign_word()
#========================
# low level parse get_word()
# NOTE: get_word() sets wtype := "="
# NOTE: get_token() sets ttype := "="
# NOTE: map_token() leaves ttype := "="
static AssignOp,AssignChar
initial {
AssignOp := "="
AssignChar := '+-*'
}
suspend \
(tab(any(AssignChar)) || =AssignOp) |
(=AssignOp)
end
# string
procedure m_product_word()
#=========================
# low level parse get_word()
# NOTE: get_word() sets wtype := "R"
# NOTE: get_token() sets ttype := "R"
# NOTE: map_token() leaves ttype := "R"
static ProductOp,ProductChar
initial {
ProductOp := ":="
ProductChar := "+-*"
}
suspend \
(tab(any(ProductChar)) || =ProductOp) |
(=ProductOp)
end
# string
procedure m_dcolon()
#===================
# match any number of ":"
# low level parse get_word()
# NOTE: get_word() sets wtype := "L"
# NOTE: get_token() sets ttype := "L"
# NOTE: map_token() leaves ttype := "L"
static colon
initial {
colon := ':'
}
suspend \
tab(many(colon))
end
###### string
#####procedure m_word()
######=================
###### low level parse
###### NOTE: get_word() sets wtype := "w"
###### NOTE: get_token() sets ttype := "w"
###### NOTE: map_token() leaves ttype := "w"
#####suspend \
##### tab(many(WordChar))
#####end
#===================================================================#
#===================================================================#
# extended words
# string
procedure m_xword()
#==================
# intermediate level parse
# words & weak separators
# NOTE: m_xword() includes "w", "q","Q", "+","-","*",":"
# NOTE: get_token() sets ttype := "x"
# NOTE: map_token() changes ttype from "x" to "w"
suspend \
tab(many(xWordChar))
end
# string
procedure m_dollarvar()
#======================
# dollar variable
# low level parse get_word()
suspend \
="$"||tab(many(DollarNameChar)) |
="${"||tab(many(DollarNameChar))||="}"
end
# string
procedure m_cyc_constant()
#=========================
# cyc constant
# low level parse get_word()
suspend \
="#$"||tab(many(CycConstantChar))
end
# string
procedure m_filename()
#=====================
# high level parse
# legal file name
suspend \
tab(many(FileNameChar))
end
# string
procedure m_dotvar()
#===================
# dot variable (pronouns)
suspend \
="..." |
=".." |
="."
end
# string
procedure m_number()
#===================
# must begin with a digit
suspend \
( tab(any(&digits)) ) |
( tab(any(&digits)) || tab(many(NumberChar)) )
end
# string
procedure m_dquote()
#===================
# low level parse
# match shortest string "..."
# include empty string ""
# NOTE: get_word() sets wtype := "Q"
# NOTE: get_token() includes "Q" in ttype "x"
# NOTE: map_token() changes ttype from "x" to "w"
# NOTE: multi-line dquote -- see m_quotemark() in myio.icn
static dquote,QChar
initial {
dquote := "\""
QChar := &cset -- dquote
}
suspend \
#####( =dquote || tab(upto(dquote)\1) || =dquote ) |
( m_quotemark() || tab(many(QChar)) || m_quotemark() ) |
( =dquote || =dquote ) | # empty dquote
( =dquote ) # isolated dquote
end
# string
procedure m_squote()
#===================
# low level parse
# match shortest string '...'
# include empty string ''
# NOTE: apostrophe has no matching squote
# NOTE: get_word() sets wtype := "q"
# NOTE: get_token() includes "q" in ttype "x"
# NOTE: map_token() changes ttype from "x" to "w"
static squote,qChar
initial {
squote := "\'"
qChar := &cset -- squote
}
suspend \
#####( =squote || tab(upto(squote)\1) || =squote ) |
( =squote || tab(many(qChar)) || =squote ) |
( =squote || =squote ) | # empty squote
( =squote ) # apostrophe
end
# string
procedure m_paren()
#==================
# low level parse
# match shortest string (...)
# include empty string ()
# NOTE: apostrophe has no matching squote
# NOTE: get_word() sets wtype := "b"
# NOTE: get_token() includes "b" in ttype "x"
# NOTE: map_token() changes ttype from "x" to "w"
static lparen,rparen,qChar
initial {
lparen := "("
rparen := ")"
qChar := &cset -- rparen
}
suspend \
#####( =lparen || tab(upto(rparen)\1) || =rparen ) |
( =lparen || tab(many(qChar)) || =rparen ) |
( =lparen || =rparen ) # empty squote
end
# string
procedure m_angle()
#==================
# low level parse
# match shortest string <...>
# include empty string ()
# NOTE: get_word() sets wtype := "a"
# NOTE: get_token() includes "a" in ttype "x"
# NOTE: map_token() changes ttype from "x" to "a"
static langle,rangle,qChar
initial {
langle := "<"
rangle := ">"
qChar := &cset -- langle -- rangle
}
suspend \
#####( =langle || tab(upto(rangle)\1) || =rangle ) |
( =langle || tab(many(qChar)) || =rangle ) |
( =langle || =rangle ) # empty angle
end
# string
procedure m_comment(sharp)
#=========================
# low level parse
# comment ::= CommentCharacter not_dollar arb EndOfLine
# caution: cyc uses "#$" prefix for constant
# NOTE: get_word() sets wtype := "c" or "w"
# NOTE: get_token() sets ttype := "c" or "w"
# NOTE: map_token() deletes ttype "c"
#
# NOTE: some comments removed by prompt() in myio.icn
# see trimcomment() in word.icn
initial {
/CommentCharacter := '#'
#####/CommentMode := "mke"
/not_dollar := &cset -- '$'
}
##/sharp := CommentCharacter
sharp := "#"
#####case CommentMode of {
#####default: {
suspend \
#####( =sharp ) |
( =sharp || tab(any(not_dollar)) || tab(0) )
##### }
#####"cyc": { # '#$name'
##### suspend \
##### ( =sharp || tab(many(WordChar)) ) |
##### ( =sharp )
##### }
#####}
end
# string
procedure m_nvsep()
#==================
# assignment
# for get_token()
suspend \
="+=" |
="-=" |
="*=" |
="="
end
# string
procedure m_sphrase()
#====================
# NOTE: symbol.icn now uses m_lphrase instead of m_sphrase
# intermediate level parse (whitespace is now "B")
# consecutive words excluding MKR words (is,has,do, ...)
# use word or qword ???
# NOTE: sphrase isa string (token.icn)
# lphrase isa list (symbol.icn)
# NOTE: pword defined in symbol.icn (excludes is,has,do,...)
# NOTE: blank removed by map_symbol() in symbol.icn
# sphrase ::=
# pword
# pword whitespace sphrase
suspend \
( m_qword() ) |
( m_qword() || ="B" || m_sphrase() )
end
# string
procedure Blank(s)
#=================
# "replace" string by whitespace
return "B"
end
# string
procedure m_slash()
#==================
# for hierarchy
suspend \
tab(many("/")) |
tab(many("\\"))
end
#-----------------------------------------------#
#-----------------------------------------------#
#-----------------------------------------------#
procedure init_word()
#====================
# called by knit_init() in knit.icn
# define global parsing variables
# SEPARATOR,WhiteSpace, WEAKSEP,NONSEP
# WordChar,xWordChar,NumberChar, FileNameChar
# mkr_word,HIDDEN
# rdf_word,owl_word,cycl_word
# dmoz_word,tap_word,cyc_word
# BinaryRelation
# KEYWORD,CONTROL,VERB,PREPOSITION,CONJUNCTION,OPERATOR
# QUANTIFIER,QUANT,groupQUANT
# GTYPE,xxGROUP,EXGROUP,INGROUP
# VARIABLE,keVARIABLE,PRONOUN
# PARAMETER,COMMAND
# special_ctype,special_charname
# legal_utype,legal_chartype
local sep,ws,word,param,cmd
initial {
CommentCharacter := '#'
}
NONSEP :=
'/' ++ # hierarchy,filename (/)
'\\' ++ # hierarchy,filename (/)
'~' ++ # logic,filename (~)
'&' ++ # logic (&)
'|' ++ # logic (|)
'?' ++ # question (?)
'$' ++ # dollar variable ($)
'.' ++ # dot variable,number (.)
'!' ++ # sh command (!)
'+' ++ # set add (+)
'-' ++ # set delete (-)
'*' ++ # wildcard,set intersection (*)
':' # production,format,view (:)
SEPARATOR :=
###'$' ++ # dollar variable ($)
"\"" ++ # dquote (Q)
"\'" ++ # squote (q)
###CommentCharacter ++ # comment (c)
###'<>' ++ # HTML command (S)
'[,]' ++ # comma list (S)
'{;}' ++ # semicolon list (S)
'()' ++ # precedence (q)
'|' # pipeline (|)
AssignOp :=
'=' # assignment (=)
WhiteSpace :=
' ' ++ # blank (B)
'\t' ++ # tab (B)
'\v' ++ # vertical tab (B)
'\r' ++ # linereturn (B)
'\n' ++ # newline (B)
'\f' # newpage (B)
# low level parse
WordChar := &cset -- WhiteSpace -- SEPARATOR -- AssignOp
DollarNameChar := WordChar -- NONSEP
CycConstantChar := WordChar -- NONSEP
AssignChar := '+-*' ++ '&|~' ++ ':'
# intermediate level parse
xWordChar := 'w' ++ 'qQ'
# high level parse
ListSeparator := ","
FileNameChar := &letters ++ &digits ++ '/\\:.~ '
NumberChar := '.' ++ &digits
#WordChar := &cset -- WhiteSpace -- '[,];' -- "=" -- '{}' --'<>' -- '()'
#-----------------------------------------------------#
#-----------------------------------------------------#
# token types
mkr_word := table()
rdf_word := table()
owl_word := table()
cycl_word := table()
dmoz_word := table()
tap_word := table()
cyc_word := table()
BinaryRelation := table() # see binrel.icn
#====================#
# OpenCyc vocabulary #
#====================#
# Classes
insert(cyc_word,"Thing","w") # existent
insert(mkr_word,"Thing","w") # existent
# Properties
insert(cyc_word,"#$genlMt","V") # Mt1 #$genlMt Mt2
insert(cyc_word,"#$specMt","V") # Mt1 #$specMt Mt2
insert(mkr_word,"genlmt","V") # Mt1 #$genlMt Mt2
insert(mkr_word,"specmt","V") # Mt1 #$specMt Mt2
insert(mkr_word,"genlmt*","V") # Mt1 #$genlMt Mt2
insert(mkr_word,"specmt*","V") # Mt1 #$specMt Mt2
insert(mkr_word,"genlmt**","V") # Mt1 #$genlMt Mt2
insert(mkr_word,"specmt**","V") # Mt1 #$specMt Mt2
insert(mkr_word,"genlmt**?","?") # Mt1 #$genlMt Mt2
insert(mkr_word,"specmt**?","?") # Mt1 #$specMt Mt2
#====================#
# RDF/OWL vocabulary #
#====================#
# Classes
insert(owl_word,"Thing","w") # existent
insert(owl_word,"Nothing","w") # nonexistent
insert(owl_word,"owl:Thing","w") # existent
insert(owl_word,"owl:Nothing","w") # nonexistent
insert(owl_word,"owl:Class","w") # set of concepts
insert(owl_word,"owl:DatatypeProperty","w") # attribute
insert(owl_word,"owl:ObjectProperty","w") # relation part interaction
insert(owl_word,"owl:Ontology","w") # view
insert(owl_word,"owl:Restriction","w") # differentia
insert(owl_word,"owl:FunctionalProperty","w") # subcharacteristic
insert(owl_word,"owl:InverseFunctionalProperty","w") # subcharacteristic
insert(owl_word,"owl:SymmetricProperty","w") # subcharacteristic
insert(owl_word,"owl:TransitiveProperty","w") # subcharacteristic
insert(owl_word,"owl:AllDifferent","w") #
insert(owl_word,"owl:DeprecatedClass","w") #
insert(owl_word,"owl:DeprecatedProperty","w") #
# Properties
insert(owl_word,"owl:sameAs","i") # is
insert(owl_word,"owl:sameIndividualAs","i") # is
insert(owl_word,"owl:equivalentClass","V") # maybe is
insert(owl_word,"owl:equivalentProperty","V") # maybe is
insert(owl_word,"owl:oneOf","V") # isany
insert(owl_word,"owl:unionOf","V") # isunion
insert(owl_word,"owl:intersectionOf","V") # isintersection
insert(owl_word,"owl:complementOf","V") # is not
insert(owl_word,"owl:inverseOf","V") # inverse
insert(owl_word,"owl:onProperty","w") #
insert(owl_word,"owl:cardinality","w") #
insert(owl_word,"owl:maxCardinality","w") #
insert(owl_word,"owl:minCardinality","w") #
insert(owl_word,"owl:allValuesFrom","w") #
insert(owl_word,"owl:someValuesFrom","w") #
insert(owl_word,"owl:differentFrom","V") #
insert(owl_word,"owl:disjointWith","V") #
insert(owl_word,"owl:distinctMembers","V") #
insert(owl_word,"owl:backwardCompatibleWith","V") #
insert(owl_word,"owl:incompatibleWith","V") #
insert(owl_word,"owl:priorVersion","w") #
insert(rdf_word,"rdfs:Class","w") # set of concepts
insert(rdf_word,"rdfs:Resource","w") # existent
insert(rdf_word,"rdf:Property","w") # characteristic
insert(rdf_word,"rdf:type","V") # has type= <=> isu
insert(rdf_word,"rdfs:subClassOf","V") # iss*
insert(rdf_word,"rdfs:subPropertyOf","V") # iss*
insert(rdf_word,"rdfs:domain","w") #
insert(rdf_word,"rdfs:range","w") #
#================#
# non separators #
#================#
insert(mkr_word,"?","?") # question
insert(mkr_word,"/","/") # hierarchy,filename,HTML
insert(mkr_word,"/","/") # hierarchy,filename,HTML
#insert(mkr_word,"/","S") # hierarchy,filename,HTML (strong separator)
insert(mkr_word,"\\","/") # hierarchy,filename,HTML
insert(mkr_word,"$","$") # dollar variable
#insert(mkr_word,".",".") # dot variable,number
insert(mkr_word,"::","L") # named proposition
# assign words
insert(mkr_word,":=","R") # production
insert(mkr_word,"+:=","R") # production
insert(mkr_word,"-:=","R") # production
insert(mkr_word,"*:=","R") # production
insert(mkr_word,"=","=") # assignment,production
insert(mkr_word,"+","+") # assignment,word
insert(mkr_word,"-","-") # assignment,word
insert(mkr_word,"*","*") # assignment,word, wildcard
insert(mkr_word,":",":") # assignment,production, format, view
insert(mkr_word,"*","*") # assignment,wildcard
insert(mkr_word,"!","D") # sh command
insert(mkr_word,"~","~") # assignment,not
insert(mkr_word,"&","&") # assignment,and
#============#
# separators #
#============#
# quote, comment
#insert(mkr_word,"\'","q") # squote for get_word()
#insert(mkr_word,"\"","Q") # dquote for get_word()
#insert(mkr_word,"(","b") # paren for get_word()
#insert(mkr_word,")","b") # paren for get_word()
#insert(mkr_word,"<","a") # angle for get_word()
#insert(mkr_word,">","a") # angle for get_word()
#insert(mkr_word,CommentCharacter,"c") # comment for get_word()
# strong separators
insert(mkr_word,",","S") # list of phrase
insert(mkr_word,";","S") # list of proposition
insert(mkr_word,"[","S") # list of phrase
insert(mkr_word,"]","S") # list of phrase
###insert(mkr_word,"[","[") # list of phrase
###insert(mkr_word,"]","]") # list of phrase
insert(mkr_word,"{","S") # list of proposition
insert(mkr_word,"}","S") # list of proposition
insert(mkr_word,"(","S") # precedence & quote
insert(mkr_word,")","S") # precedence & quote
insert(mkr_word,"<","S") # HTML & multi-line quote
insert(mkr_word,">","S") # HTML & multi-line quote
#============#
# whitespace #
#============#
insert(mkr_word," ","B") # blank
insert(mkr_word,"\t","B") # tab
#insert(mkr_word,"\v","B") # vertical tab
insert(mkr_word,"\r","B") # linereturn
insert(mkr_word,"\n","B") # newline
insert(mkr_word,"\f","B") # newpage
#=======#
# words #
#=======#
# generic names for grammar examples
#####insert(mkr_word,"proposition","w")
#####insert(mkr_word,"production","w")
#####insert(mkr_word,"sentence","w")
#####insert(mkr_word,"statement","w")
#####insert(mkr_word,"question","w")
#####insert(mkr_word,"command","w") # do command od arg done;
#####insert(mkr_word,"arg","w") # do command od arg done;
#####insert(mkr_word,"relverb","B") # subject relverb object;
#####insert(mkr_word,"subject","w")
#####insert(mkr_word,"verb","w")
#####insert(mkr_word,"object","w")
#####insert(mkr_word,"preposition","P")
# action
insert(mkr_word,"do","D") # action
insert(mkr_word,"ido","D") # interaction
insert(mkr_word,"DO","D") # axiomatic level
insert(mkr_word,"!","D") # UNIX shell
insert(mkr_word,"can","w") # capability
###insert(mkr_word,"can","D") # capability
###insert(mkr_word,"do*","D") # capability
insert(mkr_word,"vdo","D") # all views
insert(mkr_word,"hdo","D") # hierarchy
insert(mkr_word,"done","d")
# preposition
insert(mkr_word,"at","A") # context ==> use "A"
#insert(mkr_word,"at","P") # context ==> use "A"
insert(mkr_word,"out","P") # product
insert(mkr_word,"of","P") # domain (part)
insert(mkr_word,"with","P") # modifier (also definition)
insert(mkr_word,"od","P") # direct object
insert(mkr_word,"from","P") # initial
insert(mkr_word,"to","P") # final
insert(mkr_word,"in","P") # array (index)
# hierarchy
insert(mkr_word,"is","V") # alias (also definition)
insert(mkr_word,"syn","V") # synonym
insert(mkr_word,"isu","V") # unit isu genus (also definition)
insert(mkr_word,"iss","V") # species iss genus (also definition)
insert(mkr_word,"isa","V") # unit|species isa genus (also definition)
insert(mkr_word,"isp","V") # genus isp unit
insert(mkr_word,"isg","V") # genus isg species
insert(mkr_word,"isc","V") # genus isc unit|species
insert(mkr_word,"isa+","V") # 1 or more levels
insert(mkr_word,"isc+","V") # 1 or more levels
insert(mkr_word,"is*","V") # 0 or more levels
insert(mkr_word,"isa*","V") # 0 or more levels
insert(mkr_word,"isc*","V") # 0 or more levels
insert(mkr_word,"iss*","V") # 0 or more levels
insert(mkr_word,"isg*","V") # 0 or more levels
insert(mkr_word,"isu*","V") # 0 or more levels
insert(mkr_word,"isp*","V") # 0 or more levels
insert(mkr_word,"isa**","l") # followed by integer
insert(mkr_word,"isc**","l") # followed by integer
insert(mkr_word,"isa**?","?") # question verb
insert(mkr_word,"isc**?","?") # question verb
# attribute
insert(mkr_word,"iswith","V") # differentia
insert(mkr_word,"has","V")
insert(mkr_word,"HAS","V")
# part
insert(mkr_word,"haspart","V")
insert(mkr_word,"isapart","V")
insert(mkr_word,"haspart*","V")
insert(mkr_word,"isapart*","V")
# relation
insert(mkr_word,"rel","V")
##insert(mkr_word,"nrel","V")
##insert(mkr_word,"trel","V")
##insert(mkr_word,"brel","V")
##insert(mkr_word,"urel","V")
insert(mkr_word,"isin","V")
# assignment
insert(mkr_word,"let","r")
insert(mkr_word,"vlet","r")
insert(mkr_word,"unlet","r")
# NSM concepts
insert(mkr_word,"causes","V") # cause-effect
insert(mkr_word,"because","V") # NSM effect-cause
insert(mkr_word,"like","V") # NSM similarity
insert(mkr_word,"happens","D") # NSM happen
insert(mkr_word,"happensod","B")# NSM happen
insert(mkr_word,"before","V") # NSM time
insert(mkr_word,"after","V") # NSM time
insert(mkr_word,"above","V") # NSM space
insert(mkr_word,"below","V") # NSM space
insert(mkr_word,"beside","V") # NSM space
insert(mkr_word,"inside","V") # NSM space
insert(mkr_word,"outside","V") # NSM space
insert(mkr_word,"causes*","V") # cause-effect
insert(mkr_word,"because*","V") # effect-cause
insert(mkr_word,"like*","V") # NSM similarity
insert(mkr_word,"happensod*","B")# NSM happen
insert(mkr_word,"before*","V") # time
insert(mkr_word,"after*","V") # time
insert(mkr_word,"above*","V") # space
insert(mkr_word,"below*","V") # space
insert(mkr_word,"beside*","V") # space
insert(mkr_word,"inside*","V") # space
insert(mkr_word,"outside*","V") # space
# NSM view
#insert(mkr_word,"maybe","w") # view
# generator verb
insert(mkr_word,"in","P") # x in concept list
# exgroup, ingroup
insert(mkr_word,"isalt","V") # alternative isalt exgroup
insert(mkr_word,"isany","V") # exgroup isany alternative
insert(mkr_word,"isall","V") # ingroup isall member
insert(mkr_word,"ismem","V") # member ismem ingroup
# for OWL
insert(mkr_word,"isand","V") # intersection (requisite)
insert(mkr_word,"ismem","V") # intersection (requisite)
insert(mkr_word,"isor","V") # union
insert(mkr_word,"isxor","V") # disjoint union
insert(mkr_word,"xor","V") # differentFrom
insert(mkr_word,"ismem","V") # union
insert(mkr_word,"isand","V") # restriction ???
insert(mkr_word,"ismem","V") # restriction ???
insert(mkr_word,"isnon","V") # complement wrt genus
insert(mkr_word,"isnot","V") # complement wrt existent
insert(mkr_word,"inverse","V") # inverse
insert(mkr_word,"isalt*","V") # alternative isalt* exgroup
insert(mkr_word,"isany*","V") # exgroup isany* alternative
insert(mkr_word,"isall*","V") # ingroup isall* member
insert(mkr_word,"ismem*","V") # member ismem* ingroup
# concept formation
insert(mkr_word,"isd","V") # differentiate
insert(mkr_word,"isi","V") # integrate
insert(mkr_word,"means","V")
insert(mkr_word,"isref","V") # is referent of
insert(mkr_word,"means*","V")
insert(mkr_word,"isref*","V")
# groups
insert(mkr_word,"begin","Y") # group definition
insert(mkr_word,"end","Z") # group definition
insert(mkr_word,"hierarchy","w") # hierarchy,lattice
insert(mkr_word,"unithierarchy","w") # unithierarchy (backwards compatibility)
insert(mkr_word,"relation","w") # relation
insert(mkr_word,"group","w") # group,triple,mcf,...
# control structure
insert(mkr_word,"exit","z") # exit KE
insert(mkr_word,"break","z") # exit every,while,until,when
insert(mkr_word,"return","r") # return Product=value;
insert(mkr_word,"suspend","r") # suspend Product=value; (Unicon generator)
insert(mkr_word,"if","I") # conditional
insert(mkr_word,"then","T") # conditional
insert(mkr_word,"else","E") # conditional
insert(mkr_word,"fi","F") # conditional
insert(mkr_word,"iff","J") # conjunction
insert(mkr_word,"|","|") # conjunction: pipeline - treat like ";"
insert(mkr_word,"for","G") # iteration
insert(mkr_word,"every","G") # iteration
insert(mkr_word,"while","I") # iteration
insert(mkr_word,"until","I") # iteration
insert(mkr_word,"when","I") # monitor events
# other words
# NSM words
insert(mkr_word,"good","w") # NSM evaluator
insert(mkr_word,"bad","w") # NSM evaluator
insert(mkr_word,"big","w") # NSM descriptor
insert(mkr_word,"small","w") # NSM descriptor
insert(mkr_word,"very","Q") # NSM intensifier
insert(mkr_word,"more","Q") # NSM augmentor
insert(mkr_word,"near","w") # NSM space distance
insert(mkr_word,"far","w") # NSM space distance
insert(mkr_word,"long","w") # NSM time duration
insert(mkr_word,"short","w") # NSM time duration
#insert(mkr_word,"some","Q") # NSM time duration
insert(mkr_word,"and","j") # logic
insert(mkr_word,"or","j") # logic
insert(mkr_word,"not","N") # complement wrt existent
insert(mkr_word,"non","N") # complement wrt genus
insert(mkr_word,"a","Q") # quantifier
insert(mkr_word,"all","Q") # ingroup quantifier
insert(mkr_word,"any","Q") # exgroup quantifier
insert(mkr_word,"either","Q") # exgroup quantifier
insert(mkr_word,"no","Q") # quantifier
insert(mkr_word,"some","Q") # quantifier
insert(mkr_word,"the","Q") # quantifier
##insert(mkr_word,"exists","G") # KIF first order logic
#####insert(mkr_word,"forall","G") # KIF first order logic
#####insert(mkr_word,"forany","G") # MKR first order logic
#####insert(mkr_word,"forSome","G") # OWL first order logic
#####insert(mkr_word,"forAll","G") # OWL first order logic
insert(mkr_word,"implies","J") # logic
#####insert(mkr_word,"|-","J") # implies
#insert(mkr_word,"delete","w") # do delete ... done
# iQ,iG words for reparsing compound statements
#insert(mkr_word,"iseither","V")
#insert(mkr_word,"isthe","V")
# ig,ih,ir words for exgroup and ingroup
#insert(mkr_word,"isconcept","V")
#insert(mkr_word,"isset","V")
#insert(mkr_word,"islist","V")
#insert(mkr_word,"issequence","V")
#insert(mkr_word,"isrelation","V")
#insert(mkr_word,"ishierarchy","V")
#insert(mkr_word,"islattice","V")
#insert(mkr_word,"isdirectory","V")
#insert(mkr_word,"isconcept","V")
# ad-hoc additions for parsing English phrases
#=============================================
#####insert(mkr_word,"for","j") # conjunction
insert(mkr_word,"vs.","j") # conjunction
# end mkr_word
#-----------------------------------------------------#
#-----------------------------------------------------#
init_keyword() # token.icn
init_gtype() # token.icn
init_quantifier() # token.icn
init_variable() # token.icn
init_parameter() # param.icn
init_command() # command.icn
init_char() # char.icn
# ignore hidden words when dumping concepts
every sep := string(!SEPARATOR) do
insert(HIDDEN,sep)
delete(HIDDEN,".")
every ws := !WhiteSpace do
insert(HIDDEN,ws)
#####every word := key(mkr_word) do
##### insert(HIDDEN,word)
delete(HIDDEN,"let")
delete(HIDDEN,"vlet")
delete(HIDDEN,"unlet")
every param := !PARAMETER do
insert(HIDDEN,param)
every cmd := ! COMMAND do
insert(HIDDEN,cmd)
# mental actions - know,believe,...
# action object is (may be) proposition list
# kaction is action with ktype=kt
# declared by tabrasa.def, user
kaction_set := set()
# identification actions # <== no longer used
identify_set := set([
"identify","perceive","classify","measure","define",
"see","hear","touch","smell","taste"
])
# initialize rdf map
#####rdf2mkr("rdf:type")
end
procedure declare_word()
#=======================
# declare special words
# called by initialize_knit() in knit.icn
add_separator() # token.icn
add_keyword() # token.icn
add_quantifier() # token.icn
add_gtype() # token.icn
add_variable() # token.icn
add_parameter() # param.icn
add_command() # command.icn
end
#===================================================================#
#===================================================================#
# KFORMAT == "ku" #
#===================================================================#
###### TOKEN
#####procedure m_endtoken()
######=====================
###### block end for hierarchy|relation|directory
#####suspend \
##### TOKEN("Z",="end") # begin end
#####end
# TOKEN list
procedure m_kulist()
#===================
# kulist ::=
# kutoken
# kutoken kulist
suspend [m_kutoken()] |
[m_kutoken()] ||| m_kulist()
end
# TOKEN
procedure m_kutoken()
#====================
# intermediate level parse
# NOTE: "q","Q","w" included in xword ("$" ???)
# NOTE: map_token() changes "x" to "w"
# NOTE: map_token() deletes comment "c"
# NOTE: map_token() deletes whitespace "B"
suspend \
TOKEN("?", ( ="?" )) | # question variable
TOKEN("/", m_slash()) | # hierarchy, HTML end
#TOKEN("$", m_dollarvar()) | # dollar variable
#TOKEN("$", ( ="$w" )) | # dollar variable
#TOKEN("$", ( ="${w}" )) | # dollar variable
#TOKEN("$", ( ="$R" )) | # dollar variable
TOKEN("$", ( ="$" )) | # dollar variable
#TOKEN("n", m_number()) | # integer or real
#TOKEN(".", m_dotvar()) | # dot pronoun (included in $variable)
#TOKEN(".", ( ="..." )) | # dot pronoun (included in $variable)
#TOKEN(".", ( =".." )) | # dot pronoun (included in $variable)
#TOKEN(".", ( ="." )) | # dot pronoun (included in $variable)
###TOKEN("h", ( ="h" )) | # HTML command
TOKEN("c", ( ="c" )) | # comment #...
TOKEN("B", ( ="B" )) | # whitespace
TOKEN(",", ( ="," )) | # list
TOKEN(";", ( =";" )) | # list
TOKEN("!", ( ="!" )) | # sh command
TOKEN("{", ( ="{" )) | # strong separators
TOKEN("}", ( ="}" )) | # strong separators
TOKEN("[", ( ="[" )) | # strong separators
TOKEN("]", ( ="]" )) | # strong separators
TOKEN("(", ( ="(" )) | # strong separators
TOKEN(")", ( =")" )) | # strong separators
TOKEN("<", ( ="<" )) | # strong separators
TOKEN(">", ( =">" )) | # strong separators
TOKEN("|", ( ="|" )) | # strong separators
TOKEN("L", ( ="L" )) | # propname ::
#TOKEN("L", ( ="::" )) | # propname ::
TOKEN("W", ( ="W" )) | # WordNet =>
TOKEN("X", ( ="X" )) | # WordNet --
##TOKEN("R", ( ="::=" )) | # BNF production ::=
TOKEN("R", ( ="R" )) | # production :=
#TOKEN("R", ( =":=" )) | # production :=
#TOKEN("R", ( ="+:=" )) | # production :=
#TOKEN("R", ( ="-:=" )) | # production :=
#TOKEN("R", ( ="*:=" )) | # production :=
TOKEN("S", ( ="S" )) | # assignment
#TOKEN("=", m_nvsep()) | # assignment =
#TOKEN("=", ( ="+=" )) | # assignment +=
#TOKEN("=", ( ="-=" )) | # assignment -=
#TOKEN("=", ( ="*=" )) | # assignment *=
##TOKEN("=", ( ="=" )) | # assignment =
TOKEN("x", m_xword()) | # (wqQ)
TOKEN("x", ( ="+" )) | # weak separator
TOKEN("x", ( ="-" )) | # weak separator
TOKEN("x", ( ="*" )) | # weak separator
TOKEN("x", ( =":" )) | # weak separator
TOKEN("q", ( ="q" )) | # squote '...'
TOKEN("Q", ( ="Q" )) | # dquote "..."
TOKEN("a", ( ="a" )) | # angle <...>
TOKEN("b", ( ="b" )) | # paren (...)
TOKEN("Y", ( ="Y" )) | # begin group
TOKEN("Z", ( ="Z" )) | # end group
TOKEN("z", ( ="z" )) | # exit MKE
TOKEN("x", ( ="w" )) | # word
TOKEN("w", ( ="" )) | # empty string
TOKEN("U", m_byte()) # anything else is unknown
end
#===================================================================#
#===================================================================#
# KFORMAT == "ho" | "hounit"
#===================================================================#
# TOKEN list
procedure m_holist()
#===================
# holist ::=
# hotoken
# hotoken holist
suspend \
[m_hotoken()] |
[m_hotoken()] ||| m_holist()
end
# TOKEN
procedure m_hotoken(sep)
#=======================
# hotoken ::=
# endtoken <<== obsolete -- in kutoken
# holevel <<== obsolete -- do in symbol.icn
# hophrase <<== obsolete -- do in symbol.icn
# kutoken
/sep := dequote(HOSEPARATOR)
suspend \
#TOKEN("Z",="end") |
#TOKEN("/",m_holevel(sep)) |
#TOKEN("h",m_hophrase(sep)) |
m_kutoken()
end
#===================================================================#
#===================================================================#
# KFORMAT == "dir"
#===================================================================#
# TOKEN list
procedure m_dirlist()
#====================
# dirlist ::=
# dirtoken
# dirtoken dirlist
suspend \
[m_dirtoken()] |
[m_dirtoken()] ||| m_dirlist()
end
# TOKEN
procedure m_dirtoken(sep)
#=======================
# dirtoken ::=
# endtoken
# dirword
# kutoken
suspend \
TOKEN("Z",="end") |
TOKEN("d",m_dirword()) |
m_kutoken()
end
# string
procedure m_dirword()
#===================#
static wordchar
initial {
wordchar := &cset
wordchar --:= '/' # no hierarchy separators
wordchar --:= '!=' # allow commands & assignments
}
suspend \
tab(many(wordchar))
end
#===================================================================#
#===================================================================#
# KFORMAT == "nrel"
#===================================================================#
# TOKEN list
procedure m_rellist()
#====================
# rellist ::=
# reltoken
# reltoken rellist
suspend [m_reltoken()] |
[m_reltoken()] ||| m_rellist()
end
# TOKEN
procedure m_reltoken()
#=====================
# reltoken ::=
# endtoken <<== obsolete -- in kutoken
# relsep <<== obsolete -- in kutoken
# relphrase <<== obsolete -- do in symbol.icn
# kutoken
suspend \
#TOKEN("Z",="end") |
#TOKEN(";",m_relsep()) |
#TOKEN("r",m_relphrase()) |
m_kutoken()
end
# string
procedure m_relsep(sep)
#======================
/sep := dequote(RELSEPARATOR)
suspend \
=sep
end
#===================================================================#
#===================================================================#
# KFORMAT == "nv"
# now same rules as "ku"
#===================================================================#
#===================================================================#
#===================================================================#
#===================================================================#
procedure init_keyword()
#=======================
CONTROL:= set([
"begin",
"end",
"exit",
"break",
"for",
"every","done",
"while","until",
"when",
"forall","exists",
"if","then","else","fi"
])
VERB := set([
"inverse",
# for CycL
"genlmt","genlmt+","genlmt*","genlmt**","genlmt**?",
"specmt","specmt+","specmt*","specmt**","specmt**?",
# for OWL
"isand", # intersection
"isor", # union
"isxor", # disjoint union
"xor", # disjoint
# for NSM
"before","before*",
"after","after*",
"above","above*",
"below","below*",
"beside","beside*",
"inside","inside*",
"outside","outside*",
"happensod","happensod*",
"like", "like*", # NSM
# MKR
"IS",
"ISA",
"ISC",
"HAS",
"DO",
"isa","isa+","isa*","isa**","isa**?",
"isc","isc+","isc*","isc**","isc**?",
"isu","isu*","isu**","isu**?",
"isp","isp*","isp**","isp**?",
"iss","iss*","iss**","iss**?",
"isg","isg*","isg**","isg**?",
"ismem","ismem*",
"isall","isall*",
"isalt","isalt*",
"isany","isany*",
"is","is*","iswith",
"isd",
"isi",
"rel","nrel","trel","brel","urel",
"has","can",
"haspart","haspart*",
"isapart","isapart*",
"do","can","vdo","hdo", "ido",
"happens",
"let","vlet","unlet",
"means", "means*",
"isref", "isref*",
# iQ words for reparsing compound statements
#"iseither",
#"isno",
#"issome",
#"isthe"
])
PREPOSITION := set([
"in",
"at", # context
"out", # product
"of", # part
"with", # characteristic
"od", # direct object
"from", # initial characteristic
"to" # final characteristic
])
OPERATOR := set([
"=", # name = value
"+=", # name op value
"-=", # name op value
"*=", # name op value
":=", # product := producer
"+:=", # product prodop producer
"-:=", # product prodop producer
"*:=", # product prodop producer
"::=" # BNF grammar
])
CONJUNCTION := set([
"iff",
"implies", # "|-"
"supports", # "|=" situation theory notation
"causes", "causes*", # NSM
"because", "because*" # NSM
])
KEYWORD := CONTROL ++ VERB ++ PREPOSITION ++ OPERATOR ++ CONJUNCTION
end
procedure init_quantifier()
#==========================
QUANT := set([
"a",
"no",
"some",
"the"
])
groupQUANT := set([
"either",
"all",
"any"
])
QUANTIFIER := QUANT ++ groupQUANT
end
procedure init_gtype()
#=====================
xxGROUP := set([
"exgroup","ingroup",
"concept"
])
INGROUP := set([
"list",
"requisite",
"sequence",
"set",
"intersection", # OWL
"LATTICE"
])
EXGROUP := set([
"enum", # OWL oneOf
"union" # OWL
])
LATTICE := set([
"hierarchy",
"lattice"
])
GTYPE := xxGROUP ++ INGROUP ++ EXGROUP ++ LATTICE
end
procedure init_variable()
#========================
# context-dependent variable
keVARIABLE := set([
"Sentence",
#####"Statement",
"Question",
"Command",
"Assignment",
"If",
"Every"
])
PRONOUN := set([
".","..","...", # pronoun
"I","we",
"you",
"he","she","they",
"it",
"none",
"someone", # NSM
"something", # NSM
"people" # NSM
])
VARIABLE := keVARIABLE ++ PRONOUN
end
procedure add_separator()
#========================
local sep,whitespace
initial {
} # end initial
new_concept("separator",,"separator")
add_species("separator","symbol")
whitespace := ["blank","tab","vertical tab","line return","newline","newpage"]
every sep := !whitespace do {
add_unit(sep,"separator")
}
#####add_alias(" ","blank")
#####add_alias("\t","tab")
#####add_alias("\v","vertical tab")
#####add_alias("\r","line return")
#####add_alias("\n","newline")
#####add_alias("\f","newpage")
# $. are filed as variables, not separators
every sep := ! (SEPARATOR -- '$.') do {
add_unit(string(sep),"separator")
}
#####add_alias("sharp","#")
#####add_alias("dquote","\"")
#####add_alias("squote","\'")
#####add_alias("lparen","(")
#####add_alias("rparen",")")
#####merge_alias(set([",","comma"]))
#####merge_alias(set([";","semicolon"]))
#####merge_alias(set(["&","and"]))
#####merge_alias(set(["|","or"]))
#####merge_alias(set(["~","not"])) # complement wrt existent
#####merge_alias(set(["~","non"])) # complement wrt genus
end
procedure add_keyword()
#======================
local kw
add_species("symbol","OBJECT")
add_species("word","symbol")
add_species("verb","word")
every kw := ! VERB do {
add_unit(kw,"verb")
insert(HIDDEN,kw)
}
add_species("conjunction","word")
every kw := ! CONJUNCTION do {
add_unit(kw,"conjunction")
insert(HIDDEN,kw)
}
add_species("preposition","word")
every kw := ! PREPOSITION do {
add_unit(kw,"preposition")
insert(HIDDEN,kw)
}
add_species("separator","symbol")
every kw := ! OPERATOR do {
add_unit(kw,"separator")
insert(HIDDEN,kw)
}
add_species("control","word")
every kw := ! CONTROL do {
add_unit(kw,"control")
insert(HIDDEN,kw)
}
merge_alias(set(["forall","forAll"]),"forAll") # use OWL name
merge_alias(set(["exists","forSome"]),"forSome") # use OWL name
#####merge_alias(set(["isc","iseither"]))
#####merge_alias(set(["|=","supports"]))
#####merge_alias(set(["|-","implies"]))
end
procedure add_quantifier()
#=========================
local kw
new_concept("quantifier",,"quantifier")
add_species("quantifier","word")
every kw := ! QUANTIFIER do {
add_unit(kw,"quantifier")
insert(HIDDEN,kw)
}
end
procedure add_gtype()
#====================
local kw
every kw := ! xxGROUP do {
add_species(kw,"group")
insert(HIDDEN,kw)
}
every kw := ! INGROUP do {
add_species(kw,"ingroup")
insert(HIDDEN,kw)
}
every kw := ! EXGROUP do {
add_species(kw,"exgroup")
insert(HIDDEN,kw)
}
every kw := ! LATTICE do {
add_species(kw,"LATTICE")
insert(HIDDEN,kw)
}
#####add_alias("enum","enum")
#####add_alias("req","requisite")
#####add_alias("seq","sequence")
end
procedure add_variable()
#=======================
# context-dependent variable
local var,pro
# declare variables
every var := ! keVARIABLE do {
add_species(var,"variable") # var iss variable
#put_char("attr","ke",var,[]) # ke has var
insert(HIDDEN,var)
}
add_species("pronoun","attribute")
every pro := ! PRONOUN do {
add_species(pro,"pronoun") # pro iss pronoun
put_ctype(pro,"pronoun") # pro has ctype=pronoun
#put_char("attr","ke",pro,[]) # ke has pronoun
insert(HIDDEN,var)
}
end
#===================================================================#
#===================================================================#
#===================================================================#
# TOKEN
procedure copy_token(tok)
#========================
local newtok
newtok := TOKEN(tok.ttype,tok.tvalue)
return newtok
end
# list of TOKEN
procedure copy_tlist(tok)
#========================
local newtok
newtok := []
every put(newtok,copy_token(!tok))
return newtok
end
#-------------------------------------------------------------
# TOKEN list
procedure get_token(fd,pgtype,ps)
#================================
# fd ::= file | string | list of string
# ttype ::= mkr_word | ho_ttype | nrel_ttype
# ps ::= "ke" | "ksc"
# called by parse_file() in symbol.icn
# called by get_prop() in proplist.icn
# suspend TOKEN list for WORD list
local m_token
local ttype,wtype
local line,wordlist,wline,wlist
local wtok,tok
local i,j,k,y,z
local ij,tline
local KHRV,VD
local word,token
local prog,info,error,warning,ierror
static SaveStack
initial {
/SaveStack := []
/DOT1 := "existent"
/VIEW := "tabula rasa"
/KFORMAT := "ku"
/HOSEPARATOR := "/"
/RELSEPARATOR := ","
/NVSEPARATOR := "="
/PIPESEPARATOR := "|"
}
push(SaveStack,PGTYPE)
push(SaveStack,KFORMAT)
/fd := myin
/pgtype := "ku"
# DOTCONCEPT := ".="||DOT1
# VD := VIEW||":"||DOTCONCEPT
# KHRV := KFORMAT||HOSEPARATOR||RELSEPARATOR||NVSEPARATOR
prog := "get_token("||pgtype||"): "
info := "INFO: "||prog
error := "ERROR: "||prog
ierror := "Internal ERROR: "||prog
warning := "WARNING: "||prog
case pgtype of {
"ku": { ttype := mkr_ttype; wtype := mkr_wtype }
"ho": { ttype := ho_ttype; wtype := mkr_wtype }
"nrel": { ttype := nrel_ttype; wtype := mkr_wtype }
default:{ ttype := mkr_ttype; wtype := mkr_wtype }
} # end case pgtype
#------------------------------------------------------------#
every word := get_word(fd,pgtype,ps) do {
wordlist := []
wordlist := word_put(wordlist,word)
line := unparse(wordlist,"") # blanks are still in wordlist
if DEBUG == ("WORD"|"TOKEN"|"PARSE") then {
writes_type_all(wordlist,info||"input wordlist")
##writes_type_all(line,info||"input word line")
}
if *line = 0 then next
#####if *line = 0 then fail
tok := [TOKEN("X",line)] # unknown token if no match
case KFORMAT of {
"list": { m_token := m_kulist }
"ku": { m_token := m_kulist }
"cu": { m_token := m_kulist }
"ho": { m_token := m_kulist } # was m_holist
"hounit":{ m_token := m_kulist } # was m_holist
"dir": { m_token := m_kulist } # was m_dirlist
"group": { m_token := m_kulist } # was m_rellist
"nrel": { m_token := m_kulist } # was m_rellist
"nv": { m_token := m_kulist }
"kb": { m_token := m_kulist }
"html": { m_token := m_kulist }
"htm": { m_token := m_kulist }
"xml": { m_token := m_kulist }
"rdf": { m_token := m_kulist }
"owl": { m_token := m_kulist }
"nt": { m_token := m_kulist }
"mcf": { m_token := m_kulist }
default: {
writes_type(mylog,KFORMAT,warning||"unknown kformat")
KFORMAT := "ku"
m_token := m_kulist
}
} # end case KFORMAT
wline := ""
wlist := []
every word := !wordlist do {
if DEBUG == ("WORD"|"TOKEN"|"PARSE") then {
writes_type_all(word,info||"input word")
}
/word.wtype := "w"
wline ||:= word.wtype
put(wlist,word.wvalue)
} # end every word
if DEBUG == ("WORD"|"TOKEN"|"PARSE") then {
writes_type_all(wline,info||"input wline")
#writes_type_all(wlist,info||"input wlist")
}
if wline ? { ( wtok <- m_token() ) & pos(0) }
then {
if DEBUG==("WORD"|"TOKEN"|"PARSE") then {
writes_type_all(wtok,info||"parse result wtok")
}
j := 0
every i := 1 to *wtok do {
j +:= 1
ij := "(i="||i||",j="||j||")"
y := wtok[i].tvalue
z := wlist[j]
case *y of {
0: { writes_type(mylog,wline,ierror||"zero length tvalue"||ij) }
1: { }
default: {
# composite words
if DEBUG==("MAP"|"WORD"|"TOKEN") then {
writes_type_all(y,info||"composite y"||ij)
}
every k := 2 to *y do {
j +:= 1
z ||:= wlist[j]
} # end every k
} # end default
} # end case *y
wtok[i].tvalue := z
} # end every i
tok := map_token(wtok,ttype)
tline := token2string(tok)
if DEBUG==("WORD"|"TOKEN"|"PARSE") then {
writes_type(mylog,tok,info||"map result tok")
writes_type(mylog,tline,info||"map result tline")
}
} else {
KERROR +:= 1
writes_type_all(line,error||"unknown token")
#####OLDcomplete("_UnknownToken_") # token.icn
} # end if wline ? { }
if DEBUG == ("TOKEN") then
writes_type_all(tok,info||"tokenlist")
case ParserMode of {
default: {
writes_type_all(ParserMode,ierror||"unexpected ParserMode")
KFORMAT := pop(SaveStack)
PGTYPE := pop(SaveStack)
fail
}
"word": { every token := !tok do suspend token }
"line": { suspend tok }
}
} # end every word
#----------------------------------------------------------#
KFORMAT := pop(SaveStack)
PGTYPE := pop(SaveStack)
end
#==========================================================#
#==========================================================#
# integer
procedure NEWcomplete(word)
#==========================
# NOTE: map_token() no longer calls NEWcomplete()
# for begin,end (allows immediate processing
# of relation tuples & hierarchy info.)
local x
static nesting,html_nest
static BeginEnd
static ku_begin,ku_for,ku_every,ku_brace,ku_paren
static html_beginend,xml_beginend
static info
initial {
info := "INFO: NEWcomplete: "
BeginEnd := 0
ku_begin := 0 # begin gtype gname; ... end gtype gname;
ku_for := 0 # for quantifier generator; { ... };
ku_every := 0 # every generator; { ... };
ku_brace := 0 # { ... }
ku_paren := 0 # ( ... ) # for WordNet
html_beginend := 0 # < ... >
xml_beginend := 0 # content
nesting := [] # stack
html_nest := [] # stack
}
# check nesting
#--------------#
# called by map_token() in token.icn word
# called by parse_file() in symbol.icn "_count_"
# called by map_token() in token.icn "_group_"
# called by map_token() in token.icn "_begin_"
# called by map_token() in token.icn "_end_"
case word of {
"_count_": {
if ku_begin > 0 then
return 1
else if ku_for > 0 then
return 1
else if ku_every > 0 then
return 1
else if ku_brace > 0 then
return 1
else if ku_paren > 0 then
return 1
#else if html_beginend > 0 then
# return 1
#else if xml_beginend > 0 then
# return 1
else
return *nesting
}
"_html_": { return html_beginend }
"_html_reset_": { html_nest := []; html_beginend := 1 }
"_group_": { return BeginEnd }
# > 0 means in begin end group
"_begin_": { BeginEnd +:= 1; return BeginEnd }
"_end_": { BeginEnd -:= 1; return BeginEnd }
"_for_reset_": { ku_for := 0; return *nesting }
"_every_reset_": { ku_every := 0; return *nesting }
"_begin_reset_": { ku_begin := 0; return *nesting }
"_brace_reset_": { ku_brace := 0; return *nesting }
"_paren_reset_": { ku_paren := 0; return *nesting }
("_start_"|"_restart_"): {
###BeginEnd := 0 # finish begin-end group
nesting := []
###html_nest := []
ku_begin := 0
ku_for := 0
ku_every := 0
ku_brace := 0
ku_paren := 0
###html_beginend := 1
xml_beginend := 0
return *nesting
}
default: { } # continue below
} # end case word
if ku_begin > 0 then {
# inside begin-end group
if word == "end" then { } else
return *nesting
}
#####if html_beginend > 0 then {
##### # inside < >
##### if word == ">" then { } else
##### return *nesting
#####}
if DEBUG=="COMPLETE" then {
writes_type_all(nesting,info||"before update: nesting")
writes_type_all(word,info||"word")
}
# update nesting
#---------------#
case KFORMAT of {
"list": { }
("ku"|"cu"): {
case word of {
#"_start_": { push(nesting,word) }
#"_restart_": { push(nesting,word) }
#";": { popcheck("_start_",word,nesting) }
"=>": { push(nesting,word) } # WordNet entry
"--": { popcheck("=>",word,nesting) }# WordNet entry
"if": { push(nesting,word) }
"fi": { popcheck("if",word,nesting) }
"for": {
ku_for +:= 1
push(nesting,"for")
}
(
"every"|
"while"|"until"|"when"
): {
ku_every +:= 1
push(nesting,"every")
}
"do": { push(nesting,word) }
"DO": { push(nesting,"do") }
#"can": { push(nesting,"do") }
"vdo": { push(nesting,"do") }
"hdo": { push(nesting,"do") } # hwalk
"ido": { push(nesting,"do") } # hwalk
"!": { push(nesting,"do") } # shell command (! ... done;)
"happens": { push(nesting,"do") } # interaction
#(";"| "done"): { popcheck("do",word,nesting) }
"done": { popcheck("do",word,nesting) }
"[": { push(nesting,word) }
"]": { popcheck("[",word,nesting) }
"{": {
if ku_for > 0 then {
popcheck("for",word,nesting)
ku_for -:= 1
} else if ku_every > 0 then {
popcheck("every",word,nesting)
ku_every -:= 1
}
ku_brace +:= 1
push(nesting,word)
}
"}": {
ku_brace -:= 1
popcheck("{",word,nesting)
}
"(": {
ku_paren +:= 1
push(nesting,word)
}
")": {
ku_paren -:= 1
popcheck("(",word,nesting)
}
"<": { push(nesting,word) }
#####">": { popcheck("",word,nesting) }
">": { popcheck("<",word,nesting) }
"begin": { ku_begin +:= 1 } #; push(nesting,word) }
"end": { ku_begin -:= 1 } #; popcheck("begin",word,nesting) }
} # end case word
}
("ho"|"hounit"|"nrel"|"nt"|"mcf"): {
case word of {
"begin": { ku_begin +:= 1 } #; push(nesting,word) }
"end": { ku_begin -:= 1 } #; popcheck("begin",word,nesting) }
"(": {
ku_paren +:= 1
push(nesting,word)
}
")": {
ku_paren -:= 1
popcheck("(",word,nesting)
}
} # end case word
}
("html"|"htm"|"xlm"|"rdf"|"daml"|"owl"): {
##case word of {
##"<": { html_beginend +:= 1; push(html_nest,word) }
##">": { html_beginend -:= 1; popcheck("",word,html_nest) }
##"/": { html_beginend -:= 1; popcheck("/",word,html_nest) }
##} # end case word
}
} # end case KFORMAT
if DEBUG=="COMPLETE" then {
writes_type_all(nesting,info||"after update: nesting")
}
return *nesting
end
# WORD list
procedure word_put(wordlist,word)
#================================
static ierror
initial {
ierror := "Internal ERROR: word_put: "
}
case type(word) of {
default: {
writes_type_all(word,ierror||"unexpected type word")
fail
}
"WORD": { put(wordlist,word) }
"list": {
wordlist |||:= word
if *word = 0 then {
if DEBUG == "NULL" then
writes_type_all(word,ierror||"empty list word")
#####fail
}
}
} # end case type
return wordlist
end
procedure popcheck(nestbegin,nestend,nest)
#=========================================
local xbegin,found,lookfor
static info,popfail,mismatch
initial {
info := "INFO: popcheck: "
popfail := "WARNING: popcheck: empty nesting stack: "
mismatch := "WARNING: popcheck: mismatch: "
}
lookfor := nestbegin||nestend
if DEBUG=="COMPLETE" then {
writes_type_all(lookfor,info||"lookfor")
}
if xbegin := pop(nest) then { } else {
case lookfor of {
"": { }
"<>": { } # rdfs:comment = <...>; ???
"()": { }
"[]": { }
"{}": { }
default: { writes_type_all(lookfor,popfail||"looking for") }
} # end case lookfor
NEWcomplete("_restart_")
return
}
found := xbegin||nestend
if DEBUG=="COMPLETE" then {
writes_type_all(found,info||"found")
}
if found == lookfor then { } else {
writes_all(["# "||mismatch,"looking for ",lookfor," found "||found])
case xbegin of {
"for": {
NEWcomplete("_for_reset_")
NEWcomplete(nestend)
}
"every": {
NEWcomplete("_every_reset_")
NEWcomplete(nestend)
}
"begin": {
NEWcomplete("_begin_reset_")
NEWcomplete(nestend)
}
"{": {
NEWcomplete("_brace_reset_")
NEWcomplete(nestend)
}
"(": {
NEWcomplete("_paren_reset_")
NEWcomplete(nestend)
}
"do": {
NEWcomplete(nestend)
}
"if": {
NEWcomplete(nestend)
}
"=>": { # WordNet entry
NEWcomplete(nestend)
}
default: {
writes_all(["# "||mismatch,"unexpected xbegin ",xbegin])
}
} # end case xbegin
## if lookfor == ">" then {
## # wait for next HMTL group
## } else {
## NEWcomplete("_restart_")
## }
}
end
#==========================================================#
# integer
procedure OLDcomplete(word)
#=======================
# set syntactic group count
# begin - end
# if - fi
# do - done
# for
# [ ] { } < > ( )
local gcount
local iinfo
static info,ierror
static beginend, iffi, dodone, fordo, whendo, whiledo, untildo
static bracket, brace, angle, paren
static htmlgroup
static semicolon
initial {
info := "INFO: OLDcomplete: "
ierror := "Internal ERROR: OLDcomplete: "
beginend := integer(0)
iffi := integer(0)
dodone := integer(0)
fordo := integer(0)
whendo := integer(0)
whiledo := integer(0)
untildo := integer(0)
bracket := integer(0)
brace := integer(0)
angle := integer(0)
paren := integer(0)
htmlgroup := integer(0)
#semicolon := integer(1)
} # end initial
iinfo := info||" word <"||word||"> "
# do initial count
#-----------------#
case KFORMAT of {
default: { gcount := 0 }
"list": { }
("ku"|"cu"): {
gcount := iffi + fordo + whendo + whiledo + untildo + dodone +
bracket + brace + angle + paren +
angle # + semicolon
}
("ho"|"hounit"|"nrel"): {
gcount := beginend
}
("html"|"htm"|"xlm"|"rdf"): {
gcount := angle
}
} # end case KFORMAT
if DEBUG == "COMPLETE" then {
writes_type(mybug,gcount,iinfo||"initial gcount")
writes_type(mylog,gcount,iinfo||"initial gcount")
}
# do reset
#---------#
case word of {
default: { } # continue
(
"_TabulaRasaComplete_"| # initialize_tabrasa() in init.icn
"_InitializationComplete_"| # main() in ke.icn,ksc.icn,tap2mkr.icn
"_BeginRead_"| # command() in command.icn
"_EndRead_"| # command() in command.icn
"_BeginGroup_"| # init.icn
"_EndGroup_"| # init.icn
"_GroupError_"| # init.icn
"_SyntaxError_" # get_symbol() in symbol.icn
#"_LineComplete_"| # continue_token(), main()
#"_UnknownToken_" # get_token() in token.icn
): {
writes_type(mylog,word,info||"reset word")
beginend := integer(0)
iffi := integer(0)
dodone := integer(0)
fordo := integer(0)
whendo := integer(0)
whiledo := integer(0)
untildo := integer(0)
bracket := integer(0)
brace := integer(0)
angle := integer(0)
paren := integer(0)
htmlgroup := integer(0)
##semicolon := integer(1)
} # end reset
} # end case
# update count
#-------------#
case KFORMAT of {
"list": { }
("ku"|"cu"): {
case word of {
"if": { iffi +:= 1 }
"fi": { iffi -:= 1 }
"for": { fordo +:= 1 }
"every": { fordo +:= 1 }
"when": { whendo +:= 1 }
"while": { whiledo +:= 1 }
"until": { untildo +:= 1 }
("do"|
"DO"|
"!"|
#"can"|
"vdo"|
"hdo"|
"ido"): { dodone +:= 1 }
"happens": { dodone +:= 1 }
#(";"| "done"): { dodone -:= 1 }
"done": { dodone -:= 1 }
"[": { bracket +:= 1 }
"]": { bracket -:= 1 }
"{": { brace +:= 1
if fordo > 0 then fordo -:= 1
if whendo > 0 then whendo -:= 1
if whiledo > 0 then whiledo -:= 1
if untildo > 0 then untildo -:= 1
}
"}": { brace -:= 1 }
"(": { paren +:= 1 }
")": { paren -:= 1 }
##";": { semicolon -:= 1 }
"<": { angle +:= 1 } ##; semicolon := 0 }
">": { angle -:= 1 }
} # end case word
gcount := iffi + fordo + whendo + whiledo + untildo + dodone +
bracket + brace + paren +
angle # + semicolon
}
("ho"|"hounit"|"nrel"): {
case word of {
"begin": { beginend +:= 1 }
"end": { beginend -:= 1 }
} # end case word
gcount := beginend
}
("html"|"htm"|"xlm"|"rdf"): {
case word of {
"<": { angle +:= 1 }
">": { angle -:= 1 }
} # end case word
gcount := angle
}
} # end case KFORMAT
#####case word of {
#####"htmlbegin": { htmlgroup +:= 1 }
#####"htmlend": { htmlgroup -:= 1 }
#####} # end case word
#####gcount := htmlgroup
if DEBUG == "COMPLETE" then {
writes_type(mybug,gcount,iinfo||"final gcount")
writes_type(mylog,gcount,iinfo||"final gcount")
}
return gcount
end
#==========================================================#
#==========================================================#
# TOKEN
procedure OLDcontinue_token(linetoken,infd,continue)
#================================================
# called from get_token() in token.icn
# automatic continuation for beginning & middle keywords & special characters
# , [ { ( < \
# isa,isc, is,has,do, if,then,else, for, ..., with
# at, of, with, from, to
# begin, end
# know, believe, etc. <== NOT currently reserved
local line,tok,newline,newtok,newlinetoken
local xlineword
local tlast,xline,xlinetoken
local nocont,cont,icomplete
local head,KHRV,cilast
local iinfo,iierror
static info,ierror
initial {
info := "INFO: OLDcontinue_token: "
ierror := "Internal ERROR: OLDcontinue_token: "
}
/infd := myin
/continue := "yes"
KHRV := "("||KFORMAT||HOSEPARATOR||RELSEPARATOR||NVSEPARATOR||")"
case KFORMAT of {
"list": { }
("ku"|"cu"): {
nocont := ';' ++ '>'
cont := '\\' ++ '!,&|~' ++ '[{(<' ++ 'ACDEGHIiJPpQRTWYZ' ++ '='
icomplete := OLDcomplete("") # token.icn
}
("ho"|"hounit"): {
nocont := '>'
cont := '<'
icomplete := OLDcomplete("html") # token.icn
}
("dir"|"nrel"|"nv"): {
return linetoken
}
("rdf"|"xml"|"html"|"htm"): {
nocont := '>'
cont := '<' ++ '='
icomplete := OLDcomplete("html") # token.icn
}
"nt": {
nocont := ''
cont := ''
icomplete := OLDcomplete("") # token.icn
}
"mcf": {
nocont := ''
cont := ''
icomplete := OLDcomplete("") # token.icn
}
default: {
writes_type(mylog,KFORMAT,ierror||"unexpected KFORMAT")
return linetoken
}
} # end case KFORMAT
line := linetoken.line
tok := linetoken.token
newline := line
newtok := copy_list(tok)
tlast := tok[-1].ttype | fail
cilast := "("||continue||icomplete||tlast||")"
head := KHRV||cilast
iinfo := info||head
iierror := ierror||head
if DEBUG == "CONTINUE" then {
writes_type(mybug,line,iinfo||"input line")
writes_type(mylog,line,iinfo||"input line")
}
newlinetoken := linetoken
if continue=="yes" then {
if upto(tlast,nocont) & (icomplete <= 0) then {
# no continuation
if DEBUG=="CONTINUE" then {
writes_type(mybug,line,iinfo||"no continutaion")
writes_type(mylog,line,iinfo||"no continutaion")
OLDcomplete("_LineComplete_")
}
} else if upto(tlast,cont) | (icomplete > 0) then {
# continue to next line
if tlast == "\\" then { # delete \
newtok := newtok[1:-1]
newline := newline[1:-1]
}
if xline := prompt(infd) then {
xlineword := get_word(xline)
xlinetoken := get_token(xlineword,infd,continue)
newline ||:= " "|| xlinetoken.line
newtok |||:= copy_list(xlinetoken.token)
newlinetoken := TOKEN(newline,newtok)
if DEBUG=="CONTINUE" then {
writes_type(mybug,newline,iinfo||"next line")
writes_type(mylog,newline,iinfo||"next line")
}
} # end if xline
} else {
if DEBUG=="CONTINUE" then {
writes_type(mybug,line,iinfo||"no continutaion")
writes_type(mylog,line,iinfo||"no continutaion")
OLDcomplete("_LineComplete_")
}
} # end if upto
} # end if continue
if DEBUG=="CONTINUE" then {
writes_type(mybug,newlinetoken.line,iinfo||"output line")
writes_type(mylog,newlinetoken.line,iinfo||"output line")
}
return newlinetoken
end
# TOKEN list
procedure map_token(tok,tokentype)
#=================================
# phase 2 parse
# map ttype for get_symbol()
# delete whitespace [ttype=="B"]
# delete comment [ttype=="c"]
local x,y,word
local tvalue
local kinfo
static info
initial {
info := "INFO: map_token: "
}
/tokentype := mkr_word
kinfo := info||"kformat <"||KFORMAT||"> "
#DEBUG := "MAP_TOKEN"
if DEBUG==("MAP_TOKEN"|"MAP"|"PARSE"|"END") then {
writes_type(mybug,tok,kinfo||"input tok")
writes_type(mylog,tok,kinfo||"input tok")
}
x := []
every y := ! tok do {
case y.ttype of {
default:{ }
"q": { y.ttype := "w" } # squote q => w for get_symbol()
"Q": { y.ttype := "w" } # dquote Q => w for get_symbol()
"a": {
##### case KFORMAT of {
##### default: { y.ttype := "a" # angle <...>
##### }
##### ("html"|"htm"|"xml"|"rdf"|"owl"): {
##### tvalue := y.tvalue
##### if tvalue[2] == "/" then {
##### y.ttype := "Z" # html tag end
##### } else if tvalue[-2] == "/" then {
##### y.ttype := "a" # html tag complete
##### } else {
##### y.ttype := "Y" # html tag begin
##### }
##### } # end "html"|...
##### } # end case KFORMAT
}
"b": { y.ttype := "w" } # paren b => w for get_symbol()
"c": { # delete comment
if DEBUG=="COMMENT" then
writes_type_all(y,info||"deleted comment token")
next
}
##### "h": {
##### # HTML command
##### if REMOVEHTML==("yes"|"YES") then {
##### do_html(y)
##### next # delete HTML
##### } # end if REMOVEHTML
##### } # end "h"
"B": {
case KFORMAT of {
#("ho"|"hounit"|"nrel"): { } # keep whitespace for parsing phrase
default: { # delete whitespace
if DEBUG=="WHITESPACE" then
writes_type_all(y,info||"deleting whitespace token")
next
}
} # end case
}
"j": { # and,or
case KFORMAT of {
default: { } # reserved
("ho"|"hounit"|"nrel"): { y.ttype := "w" } # not reserved
}
}
#"h": { y.tvalue := trimws(y.tvalue) } # howord <== obsolete
#"r": { y.tvalue := trimws(y.tvalue) } # relword <== obsolete
#"d": { y.tvalue := trimws(y.tvalue) } # dirword <== obsolete
#"A": { y.ttype := "=" } # assignment,production <== obsolete
#"D": { y.ttype := "$" } # $variable <== obsolete
",": { y.ttype := y.tvalue } # list
";": { y.ttype := y.tvalue } # relation,proplist
#"=": { y.ttype := y.tvalue } # assignment,production
"!": { y.ttype := y.tvalue } # sh command
#"~": { y.ttype := y.tvalue } # not <== nonseparator
#"?": { y.ttype := y.tvalue } # question <== nonseparator
#"/": { y.ttype := y.tvalue } # hierarchy <== nonseparator
#"$": { y.ttype := y.tvalue } # $variable <== nonseparator
"W": { y.ttype := "w" } # weak separators
#":": { y.ttype := y.tvalue } #
#"+": { y.ttype := y.tvalue } #
#"-": { y.ttype := y.tvalue } #
#"*": { y.ttype := y.tvalue } #
"S": { # strong separators
case y.tvalue of {
default: { y.ttype := y.tvalue }
}
}
"w": {
# word inlcudes no separators
if member(tokentype,y.tvalue) then {
y := group_map_token(y,tokentype)
#case y.tvalue of {
#"next": { y.tvalue := ";" }
#"non": { y.tvalue := "~" }
#"not": { y.tvalue := "~" }
#}
} else if member(rdf_word,y.tvalue) then {
y.ttype := rdf_word[y.tvalue]
} else if member(owl_word,y.tvalue) then {
y.ttype := owl_word[y.tvalue]
} # end if
} # end "w"
"x": {
# xword includes weak separators W
if y.tvalue ? { ="isa**" | ="isc**" |
="iss**" | ="isg**" |
="isu**" | ="isp**" |
="genlmt**" | ="specmt**"
} then {
# "lw" | "l?"
y.ttype := "V"
#} else if upto("*",y.tvalue) then {
# wildcard pattern
} else if member(tokentype,y.tvalue) then {
y := group_map_token(y,tokentype)
#case y.tvalue of {
#"next": { y.tvalue := ";" }
#"non": { y.tvalue := "~" }
#"not": { y.tvalue := "~" }
#}
} else if member(rdf_word,y.tvalue) then {
y.ttype := rdf_word[y.tvalue]
} else if member(owl_word,y.tvalue) then {
y.ttype := owl_word[y.tvalue]
} else {
y.ttype := "w" # x => w for get_symbol()
} # end if
} # end "x"
} # end case y.ttype
# set syntactic nesting group count
word := y.tvalue
if DEBUG=="COMPLETE" then {
writes_type_all(word,info||"NEWcomplete")
}
case word of {
"": { }
"begin": { NEWcomplete("_begin_") }
"end": { NEWcomplete("_end_") }
"if": { NEWcomplete(word) }
"fi": { NEWcomplete(word) }
"=>": { NEWcomplete(word) }
"--": { NEWcomplete(word) }
"for": { NEWcomplete(word) }
"every": { NEWcomplete(word) }
"when": { NEWcomplete(word) }
"while": { NEWcomplete(word) }
"until": { NEWcomplete(word) }
"do": { NEWcomplete(word) }
"DO": { NEWcomplete(word) }
"!": { NEWcomplete(word) }
#"can": { NEWcomplete(word) }
"vdo": { NEWcomplete(word) }
"hdo": { NEWcomplete(word) }
"ido": { NEWcomplete(word) }
"happens":{ NEWcomplete(word) }
#(";"|"done"): { NEWcomplete(word) }
"done": { NEWcomplete(word) }
("<"|">"):{ NEWcomplete(word) }
("{"|"}"):{ NEWcomplete(word) }
("["|"]"):{ NEWcomplete(word) }
("("|")"):{ NEWcomplete(word) }
";": { NEWcomplete(word) }
} # end case y.tvalue
# separate leading and trailing "/" in HTML parsing
case KFORMAT of {
default : { put(x,y) }
("html"|"htm"|"xml"|"rdf"|"owl"): {
if y.ttype == "w" then {
if y.tvalue[1] == "/" then {
y.tvalue := y.tvalue[2:0]
put(x,TOKEN("/","/"),y)
} else if y.tvalue[-1] == "/" then {
y.tvalue := y.tvalue[1:-1]
put(x,y,TOKEN("/","/"))
} else {
put(x,y)
} # end if == "/"
} else {
put(x,y)
} # end if == "w"
} # end "html"|...
} # end case KFORMAT
} # end every y
if DEBUG==("MAP_TOKEN"|"MAP"|"PARSE"|"END") then {
writes_type(mybug,x,kinfo||"output x")
writes_type(mylog,x,kinfo||"output x")
}
return x
end
# TOKEN
procedure group_map_token(y,tokentype)
#====================================
# member(mkr_word,y.tvalue) is true
# allow reserved words in hierarchies & relations
# => bad idea! everything gets parsed as group_statement
local kinfo
static info
initial {
info := "INFO: group_map_token"
}
/tokentype := mkr_word
kinfo := info||"("||KFORMAT||"): "
#DEBUG := "END"
if DEBUG==("MAP_TOKEN"|"BEGIN"|"END") then {
writes_type(mybug,y,kinfo||"input token")
writes_type(mylog,y,kinfo||"input token")
}
case KFORMAT of {
default: {
y.ttype := tokentype[y.tvalue]
}
("ho"|"hounit"|"nrel"): {
case y.tvalue of {
"end": { y.ttype := tokentype[y.tvalue] } # "Z"
"exit": { y.ttype := tokentype[y.tvalue] } # "z"
"break": { y.ttype := tokentype[y.tvalue] } # "z"
"let": { y.ttype := tokentype[y.tvalue] } # "S"
default: { y.ttype := "w" }
}
}
} # end case KFORMAT
if DEBUG==("MAP_TOKEN"|"BEGIN"|"END") then {
writes_type(mybug,y,kinfo||"output token")
writes_type(mylog,y,kinfo||"output token")
}
return y
end
# TOKEN list
procedure horel_token(tok)
#=========================
# no special meaning for words in HO and REL
local x,y,t,kinfo,kwarning
static info,warning
initial {
info := "INFO: horel_token"
warning := "WARNING: horel_token"
}
kinfo := info||"("||KFORMAT||"): "
kwarning := warning||"("||KFORMAT||"): "
if DEBUG == ("HO"|"REL"|"NREL") then
writes_type_all(tok,kinfo||"input token list")
x := []
every t := !tok do {
y := copy(t)
case y.tvalue of {
default: {
case y.ttype of {
"/": { }
"\\": { } # should be "/"
default: { y.ttype := "w" }
} # end case y.ttype
}
# exceptions: group
"begin": { }
"end": { }
"exit": { }
";": { }
# exceptions: hierarchy
"ho": { }
"hierarchy": { }
"unithierarchy": { }
#####"/": { }
#####"\\": { }
#####"isu": { }
#####"iss": { }
# exceptions: relation
"nrel": { }
"relation": { }
",": { }
"[": { }
"]": { }
# exceptions: do read from file done; x has y=z;
"do": {}
"read": {}
"from": {}
"done": {}
##"has": {}
##"=": {}
} # end case t.tvalue
put(x,y)
} # end every t
if DEBUG == ("HO"|"NREL") then
writes_type_all(x,kinfo||"output token list")
return x
end
#