| | 1 | /* |
| | 2 | * Simple "noun phrase" parser test. This isn't meant to be a real noun |
| | 3 | * phrase parser; it's just a demonstration of how some of the |
| | 4 | * dictionary object's features work. |
| | 5 | */ |
| | 6 | |
| | 7 | #include "tads.h" |
| | 8 | #include "t3.h" |
| | 9 | #include "dict.h" |
| | 10 | |
| | 11 | /* define our dictionary */ |
| | 12 | dictionary G_dict; |
| | 13 | |
| | 14 | /* define our dictionary properties */ |
| | 15 | dictionary property noun, adjective, plural; |
| | 16 | |
| | 17 | /* ------------------------------------------------------------------------ */ |
| | 18 | /* |
| | 19 | * define a few objects |
| | 20 | */ |
| | 21 | |
| | 22 | class Item: object |
| | 23 | ; |
| | 24 | |
| | 25 | class Book: Item |
| | 26 | noun = 'book' |
| | 27 | plural = 'books' |
| | 28 | sdesc = "book" |
| | 29 | ; |
| | 30 | |
| | 31 | class Ball: Item |
| | 32 | noun = 'ball' |
| | 33 | plural = 'balls' |
| | 34 | sdesc = "ball" |
| | 35 | ; |
| | 36 | |
| | 37 | redBook: Book |
| | 38 | adjective = 'red' |
| | 39 | sdesc = "red book" |
| | 40 | ; |
| | 41 | |
| | 42 | blueBook: Book |
| | 43 | adjective = 'blue' |
| | 44 | sdesc = "blue book" |
| | 45 | ; |
| | 46 | |
| | 47 | redBall: Ball |
| | 48 | adjective = 'red' |
| | 49 | sdesc = "red ball" |
| | 50 | ; |
| | 51 | |
| | 52 | greenBall: Ball |
| | 53 | adjective = 'green' |
| | 54 | sdesc = "green ball" |
| | 55 | ; |
| | 56 | |
| | 57 | |
| | 58 | |
| | 59 | /* ------------------------------------------------------------------------ */ |
| | 60 | /* |
| | 61 | * Main routine - read and parse strings |
| | 62 | */ |
| | 63 | main(args) |
| | 64 | { |
| | 65 | "Welcome to the noun phrase test! Type QUIT or Q to stop. The |
| | 66 | defined objects are:\n"; |
| | 67 | for (local obj = firstObj(Item) ; obj != nil ; obj = nextObj(obj, Item)) |
| | 68 | "\t<<obj.sdesc>>\n"; |
| | 69 | |
| | 70 | for (;;) |
| | 71 | { |
| | 72 | local str; |
| | 73 | local toklist; |
| | 74 | local objlist; |
| | 75 | |
| | 76 | /* read a string and convert it to miniscules */ |
| | 77 | "\b>"; |
| | 78 | str = inputLine().toLower(); |
| | 79 | |
| | 80 | /* tokenize it */ |
| | 81 | toklist = tokenize(str); |
| | 82 | |
| | 83 | /* if tokenizing failed, ignore this input and continue */ |
| | 84 | if (toklist == nil) |
| | 85 | continue; |
| | 86 | |
| | 87 | /* check for a QUIT command */ |
| | 88 | if (toklist.length() == 1 |
| | 89 | && toklist[1] == 'quit' || toklist[1] == 'q') |
| | 90 | break; |
| | 91 | |
| | 92 | /* parse the noun phrase to get an object list */ |
| | 93 | objlist = parseNounPhrase(toklist); |
| | 94 | |
| | 95 | /* display the result */ |
| | 96 | if (objlist != nil) |
| | 97 | { |
| | 98 | if (objlist.length() > 0) |
| | 99 | { |
| | 100 | "Matching objects:\n"; |
| | 101 | for (local i = 1, local len = objlist.length() ; |
| | 102 | i <= len ; ++i) |
| | 103 | "\t<<i>>: <<objlist[i].sdesc>>\n"; |
| | 104 | } |
| | 105 | else |
| | 106 | "No matching objects.\n"; |
| | 107 | } |
| | 108 | } |
| | 109 | } |
| | 110 | |
| | 111 | /* |
| | 112 | * A simple tokenizer |
| | 113 | */ |
| | 114 | tokenize(str) |
| | 115 | { |
| | 116 | local toks = ['[a-z][-\'a-z0-9]*', '[.,?!;:]']; |
| | 117 | local i, len; |
| | 118 | local result; |
| | 119 | |
| | 120 | /* start with an empty result list */ |
| | 121 | result = []; |
| | 122 | |
| | 123 | /* keep going until we exhaust the string */ |
| | 124 | scanLoop: |
| | 125 | while (str != '') |
| | 126 | { |
| | 127 | local matchLen; |
| | 128 | |
| | 129 | /* skip any leading spaces */ |
| | 130 | matchLen = rexMatch(' +', str); |
| | 131 | if (matchLen != nil) |
| | 132 | { |
| | 133 | /* skip the leading spaces */ |
| | 134 | str = str.substr(matchLen + 1); |
| | 135 | |
| | 136 | /* if that leaves us with nothing, we're done */ |
| | 137 | if (str == '') |
| | 138 | break; |
| | 139 | } |
| | 140 | |
| | 141 | /* compare the string to our various token patterns */ |
| | 142 | for (i = 1, len = toks.length() ; i <= len ; ++i) |
| | 143 | { |
| | 144 | /* check for a match */ |
| | 145 | matchLen = rexMatch(toks[i], str); |
| | 146 | if (matchLen != nil) |
| | 147 | { |
| | 148 | /* |
| | 149 | * it's a match - add the matching string to the result |
| | 150 | * list |
| | 151 | */ |
| | 152 | result += str.substr(1, matchLen); |
| | 153 | |
| | 154 | /* consume these characters from the match */ |
| | 155 | str = str.substr(matchLen + 1); |
| | 156 | |
| | 157 | /* continue with the main scanning loop */ |
| | 158 | continue scanLoop; |
| | 159 | } |
| | 160 | } |
| | 161 | |
| | 162 | /* |
| | 163 | * this isn't a valid token - display an error and return 'nil' |
| | 164 | * to indicate failure |
| | 165 | */ |
| | 166 | "'<<str.substr(1, 1)>>' is not a valid character. "; |
| | 167 | return nil; |
| | 168 | } |
| | 169 | |
| | 170 | /* we're done - return the result list */ |
| | 171 | return result; |
| | 172 | } |
| | 173 | |
| | 174 | /* |
| | 175 | * A simple noun phrase parser and resolver |
| | 176 | */ |
| | 177 | parseNounPhrase(toklist) |
| | 178 | { |
| | 179 | local i; |
| | 180 | local start; |
| | 181 | local len; |
| | 182 | local result; |
| | 183 | |
| | 184 | /* start parsing at the first word */ |
| | 185 | start = 1; |
| | 186 | |
| | 187 | /* if the first word is an article, skip it */ |
| | 188 | if (toklist.length() >= 1 |
| | 189 | && rexMatch('^(the|a|an)$', toklist[start]) != nil) |
| | 190 | { |
| | 191 | /* it's an article - skip it */ |
| | 192 | ++start; |
| | 193 | } |
| | 194 | |
| | 195 | /* |
| | 196 | * Find all of the objects matching the given list of words. Treat |
| | 197 | * every word but the last as an adjective, and the last as a noun. |
| | 198 | */ |
| | 199 | for (i = start, len = toklist.length() ; i <= len ; ++i) |
| | 200 | { |
| | 201 | local prop; |
| | 202 | local curResult; |
| | 203 | |
| | 204 | /* |
| | 205 | * if this is the last word, treat it as a noun; otherwise, |
| | 206 | * treat it as an adjective |
| | 207 | */ |
| | 208 | prop = (i == len ? &noun : &adjective); |
| | 209 | |
| | 210 | /* look it up in the dictionary */ |
| | 211 | curResult = G_dict.findWord(toklist[i], prop); |
| | 212 | curResult += G_dict.findWordTrunc(toklist[i], prop); |
| | 213 | |
| | 214 | /* if we don't know the word, say so, and give up */ |
| | 215 | if (curResult == []) |
| | 216 | { |
| | 217 | /* check to see if the word is defined under any property */ |
| | 218 | if (!G_dict.isWordDefined(toklist[i]) |
| | 219 | && !G_dict.isWordDefinedTrunc(toklist[i])) |
| | 220 | { |
| | 221 | /* the word simply isn't defined */ |
| | 222 | "I don't know the word \"<<toklist[i]>>.\" "; |
| | 223 | return nil; |
| | 224 | } |
| | 225 | else |
| | 226 | { |
| | 227 | /* the word is defined, but not for this part of speech */ |
| | 228 | "You don't see any "; |
| | 229 | for (i = start ; i <= toklist.length() ; ++i) |
| | 230 | "<<toklist[i]>> "; |
| | 231 | "here. "; |
| | 232 | return nil; |
| | 233 | } |
| | 234 | } |
| | 235 | |
| | 236 | /* |
| | 237 | * if this is the first word, this is the whole list; otherwise, |
| | 238 | * intersect the list so far with the new list, since we only |
| | 239 | * want objects that define every single word in the input |
| | 240 | * phrase |
| | 241 | */ |
| | 242 | if (i == start) |
| | 243 | result = curResult; |
| | 244 | else |
| | 245 | result = result.intersect(curResult); |
| | 246 | } |
| | 247 | |
| | 248 | /* return the result list */ |
| | 249 | return result; |
| | 250 | } |
| | 251 | |