| | 1 | #ifdef RCSID |
| | 2 | static char RCSid[] = |
| | 3 | "$Header: d:/cvsroot/tads/tads3/tctok.cpp,v 1.5 1999/07/11 00:46:58 MJRoberts Exp $"; |
| | 4 | #endif |
| | 5 | |
| | 6 | /* |
| | 7 | * Copyright (c) 1999, 2002 Michael J. Roberts. All Rights Reserved. |
| | 8 | * |
| | 9 | * Please see the accompanying license file, LICENSE.TXT, for information |
| | 10 | * on using and copying this software. |
| | 11 | */ |
| | 12 | /* |
| | 13 | Name |
| | 14 | tctok.cpp - TADS3 compiler tokenizer |
| | 15 | Function |
| | 16 | |
| | 17 | Notes |
| | 18 | The tokenizer features an integrated C-style preprocessor. The |
| | 19 | preprocessor is integrated into the tokenizer for efficiency; since |
| | 20 | the preprocessor uses the same lexical structure as the the TADS |
| | 21 | language, we need only tokenize the input stream once, and the result |
| | 22 | can be used both for preprocessing and for parsing. |
| | 23 | Modified |
| | 24 | 04/12/99 MJRoberts - Creation |
| | 25 | */ |
| | 26 | |
| | 27 | #include <stdio.h> |
| | 28 | #include <string.h> |
| | 29 | #include <stdarg.h> |
| | 30 | #include <time.h> |
| | 31 | |
| | 32 | #include "os.h" |
| | 33 | #include "t3std.h" |
| | 34 | #include "vmerr.h" |
| | 35 | #include "vmhash.h" |
| | 36 | #include "tcerr.h" |
| | 37 | #include "tcerrnum.h" |
| | 38 | #include "tctok.h" |
| | 39 | #include "tcsrc.h" |
| | 40 | #include "tcmain.h" |
| | 41 | #include "tchost.h" |
| | 42 | #include "tcprs.h" |
| | 43 | #include "tctarg.h" |
| | 44 | #include "charmap.h" |
| | 45 | #include "vmfile.h" |
| | 46 | |
| | 47 | |
| | 48 | /* ------------------------------------------------------------------------ */ |
| | 49 | /* |
| | 50 | * Initialize the tokenizer |
| | 51 | */ |
| | 52 | CTcTokenizer::CTcTokenizer(CResLoader *res_loader, |
| | 53 | const char *default_charset) |
| | 54 | { |
| | 55 | int i; |
| | 56 | time_t timer; |
| | 57 | struct tm *tblk; |
| | 58 | const char *tstr; |
| | 59 | char timebuf[50]; |
| | 60 | struct kwdef |
| | 61 | { |
| | 62 | const char *kw_text; |
| | 63 | tc_toktyp_t kw_tok_id; |
| | 64 | }; |
| | 65 | static const kwdef kwlist[] = |
| | 66 | { |
| | 67 | { "self", TOKT_SELF }, |
| | 68 | { "targetprop", TOKT_TARGETPROP }, |
| | 69 | { "targetobj", TOKT_TARGETOBJ }, |
| | 70 | { "definingobj", TOKT_DEFININGOBJ }, |
| | 71 | { "inherited", TOKT_INHERITED }, |
| | 72 | { "delegated", TOKT_DELEGATED }, |
| | 73 | { "argcount", TOKT_ARGCOUNT }, |
| | 74 | { "if", TOKT_IF }, |
| | 75 | { "else", TOKT_ELSE }, |
| | 76 | { "for", TOKT_FOR }, |
| | 77 | { "while", TOKT_WHILE }, |
| | 78 | { "do", TOKT_DO }, |
| | 79 | { "switch", TOKT_SWITCH }, |
| | 80 | { "case", TOKT_CASE }, |
| | 81 | { "default", TOKT_DEFAULT }, |
| | 82 | { "goto", TOKT_GOTO }, |
| | 83 | { "break", TOKT_BREAK }, |
| | 84 | { "continue", TOKT_CONTINUE }, |
| | 85 | // { "and", TOKT_AND }, |
| | 86 | // { "or", TOKT_OR }, |
| | 87 | // { "not", TOKT_NOT }, |
| | 88 | { "function", TOKT_FUNCTION }, |
| | 89 | { "return", TOKT_RETURN }, |
| | 90 | { "local", TOKT_LOCAL }, |
| | 91 | { "object", TOKT_OBJECT }, |
| | 92 | { "nil", TOKT_NIL }, |
| | 93 | { "true", TOKT_TRUE }, |
| | 94 | { "pass", TOKT_PASS }, |
| | 95 | { "external", TOKT_EXTERNAL }, |
| | 96 | { "extern", TOKT_EXTERN }, |
| | 97 | { "formatstring", TOKT_FORMATSTRING }, |
| | 98 | { "class", TOKT_CLASS }, |
| | 99 | { "replace", TOKT_REPLACE }, |
| | 100 | { "modify", TOKT_MODIFY }, |
| | 101 | { "new", TOKT_NEW }, |
| | 102 | // { "delete", TOKT_DELETE }, |
| | 103 | { "throw", TOKT_THROW }, |
| | 104 | { "try", TOKT_TRY }, |
| | 105 | { "catch", TOKT_CATCH }, |
| | 106 | { "finally", TOKT_FINALLY }, |
| | 107 | { "intrinsic", TOKT_INTRINSIC }, |
| | 108 | { "dictionary", TOKT_DICTIONARY }, |
| | 109 | { "grammar", TOKT_GRAMMAR }, |
| | 110 | { "enum", TOKT_ENUM }, |
| | 111 | { "template", TOKT_TEMPLATE }, |
| | 112 | { "static", TOKT_STATIC }, |
| | 113 | { "foreach", TOKT_FOREACH }, |
| | 114 | { "export", TOKT_EXPORT }, |
| | 115 | { "propertyset", TOKT_PROPERTYSET }, |
| | 116 | { "transient", TOKT_TRANSIENT }, |
| | 117 | { "replaced", TOKT_REPLACED }, |
| | 118 | { "property", TOKT_PROPERTY }, |
| | 119 | |
| | 120 | // { "void", TOKT_VOID }, |
| | 121 | // { "int", TOKT_INT }, |
| | 122 | // { "string", TOKT_STRING }, |
| | 123 | // { "list", TOKT_LIST }, |
| | 124 | // { "boolean", TOKT_BOOLEAN }, |
| | 125 | // { "any", TOKT_ANY }, |
| | 126 | |
| | 127 | /* end-of-table marker */ |
| | 128 | { 0, TOKT_INVALID } |
| | 129 | }; |
| | 130 | const kwdef *kwp; |
| | 131 | |
| | 132 | /* remember my resource loader */ |
| | 133 | res_loader_ = res_loader; |
| | 134 | |
| | 135 | /* there's no stream yet */ |
| | 136 | str_ = 0; |
| | 137 | |
| | 138 | /* no external source yet */ |
| | 139 | ext_src_ = 0; |
| | 140 | |
| | 141 | /* start numbering the file descriptors at zero */ |
| | 142 | next_filedesc_id_ = 0; |
| | 143 | |
| | 144 | /* there are no file descriptors yet */ |
| | 145 | desc_head_ = 0; |
| | 146 | desc_tail_ = 0; |
| | 147 | desc_list_ = 0; |
| | 148 | desc_list_cnt_ = desc_list_alo_ = 0; |
| | 149 | |
| | 150 | /* empty out the input line buffer */ |
| | 151 | clear_linebuf(); |
| | 152 | |
| | 153 | /* start out with a minimal line buffer size */ |
| | 154 | linebuf_.ensure_space(4096); |
| | 155 | expbuf_.ensure_space(4096); |
| | 156 | |
| | 157 | /* set up at the beginning of the input line buffer */ |
| | 158 | start_new_line(&linebuf_, 0); |
| | 159 | |
| | 160 | /* remember the default character set */ |
| | 161 | default_charset_ = lib_copy_str(default_charset); |
| | 162 | |
| | 163 | /* we don't have a default character mapper yet */ |
| | 164 | default_mapper_ = 0; |
| | 165 | |
| | 166 | /* create an input mapper for the default character set, if specified */ |
| | 167 | if (default_charset != 0) |
| | 168 | default_mapper_ = CCharmapToUni::load(res_loader, default_charset); |
| | 169 | |
| | 170 | /* |
| | 171 | * if the default character set wasn't specified, or we failed to |
| | 172 | * load a mapper for the specified character set, use a plain ASCII |
| | 173 | * mapper |
| | 174 | */ |
| | 175 | if (default_mapper_ == 0) |
| | 176 | default_mapper_ = new CCharmapToUniASCII(); |
| | 177 | |
| | 178 | /* presume we're not in preprocessor-only mode */ |
| | 179 | pp_only_mode_ = FALSE; |
| | 180 | |
| | 181 | /* presume we're not in list-includes mode */ |
| | 182 | list_includes_mode_ = FALSE; |
| | 183 | |
| | 184 | /* presume we're not in test report mode */ |
| | 185 | test_report_mode_ = FALSE; |
| | 186 | |
| | 187 | /* allow preprocessing directives */ |
| | 188 | allow_pp_ = TRUE; |
| | 189 | |
| | 190 | /* there are no previously-included files yet */ |
| | 191 | prev_includes_ = 0; |
| | 192 | |
| | 193 | /* presume we'll convert newlines in strings to whitespace */ |
| | 194 | string_newline_spacing_ = TRUE; |
| | 195 | |
| | 196 | /* start out with ALL_ONCE mode off */ |
| | 197 | all_once_ = FALSE; |
| | 198 | |
| | 199 | /* by default, ignore redundant includes without warning */ |
| | 200 | warn_on_ignore_incl_ = FALSE; |
| | 201 | |
| | 202 | /* there are no include path entries yet */ |
| | 203 | incpath_head_ = incpath_tail_ = 0; |
| | 204 | |
| | 205 | /* not in a quoted string yet */ |
| | 206 | in_quote_ = '\0'; |
| | 207 | |
| | 208 | /* not in an embedded expression yet */ |
| | 209 | comment_in_embedding_ = FALSE; |
| | 210 | macro_in_embedding_ = FALSE; |
| | 211 | main_in_embedding_ = FALSE; |
| | 212 | |
| | 213 | /* not in a #if block yet */ |
| | 214 | if_sp_ = 0; |
| | 215 | if_false_level_ = 0; |
| | 216 | |
| | 217 | /* not processing a preprocessor constant expression */ |
| | 218 | in_pp_expr_ = FALSE; |
| | 219 | |
| | 220 | /* we don't have a current or appended line yet */ |
| | 221 | last_desc_ = 0; |
| | 222 | last_linenum_ = 0; |
| | 223 | appended_desc_ = 0; |
| | 224 | appended_linenum_ = 0; |
| | 225 | |
| | 226 | /* allocate the first token-list block */ |
| | 227 | init_src_block_list(); |
| | 228 | |
| | 229 | /* create the #define and #undef symbol tables */ |
| | 230 | defines_ = new CVmHashTable(512, new CVmHashFuncCS(), TRUE); |
| | 231 | undefs_ = new CVmHashTable(64, new CVmHashFuncCS(), TRUE); |
| | 232 | |
| | 233 | /* create the special __LINE__ and __FILE__ macros */ |
| | 234 | defines_->add(new CTcHashEntryPpLINE(this)); |
| | 235 | defines_->add(new CTcHashEntryPpFILE(this)); |
| | 236 | |
| | 237 | /* get the current time and date */ |
| | 238 | timer = time(0); |
| | 239 | tblk = localtime(&timer); |
| | 240 | tstr = asctime(tblk); |
| | 241 | |
| | 242 | /* |
| | 243 | * add the __DATE__ macro - the format is "Mmm dd yyyy", where "Mmm" |
| | 244 | * is the three-letter month name generated by asctime(), "dd" is |
| | 245 | * the day of the month, with a leading space for numbers less than |
| | 246 | * ten, and "yyyy" is the year. |
| | 247 | */ |
| | 248 | sprintf(timebuf, "'%.3s %2d %4d'", |
| | 249 | tstr + 4, tblk->tm_mday, tblk->tm_year + 1900); |
| | 250 | add_define("__DATE__", timebuf); |
| | 251 | |
| | 252 | /* add the __TIME__ macro - 24-hour "hh:mm:ss" format */ |
| | 253 | sprintf(timebuf, "'%.8s'", tstr + 11); |
| | 254 | add_define("__TIME__", timebuf); |
| | 255 | |
| | 256 | /* |
| | 257 | * Allocate a pool of macro resources. The number we start with is |
| | 258 | * arbitrary, since we'll add more as needed, but we want to try to |
| | 259 | * allocate enough up front that we avoid time-consuming memory |
| | 260 | * allocations later. On the other hand, we don't want to |
| | 261 | * pre-allocate a huge number of objects that we'll never use. |
| | 262 | */ |
| | 263 | for (macro_res_avail_ = 0, macro_res_head_ = 0, i = 0 ; i < 7 ; ++i) |
| | 264 | { |
| | 265 | CTcMacroRsc *rsc; |
| | 266 | |
| | 267 | /* allocate a new object */ |
| | 268 | rsc = new CTcMacroRsc(); |
| | 269 | |
| | 270 | /* add it onto the master list */ |
| | 271 | rsc->next_ = macro_res_head_; |
| | 272 | macro_res_head_ = rsc; |
| | 273 | |
| | 274 | /* add it onto the available list */ |
| | 275 | rsc->next_avail_ = macro_res_avail_; |
| | 276 | macro_res_avail_ = rsc; |
| | 277 | } |
| | 278 | |
| | 279 | /* create the keyword hash table */ |
| | 280 | kw_ = new CVmHashTable(64, new CVmHashFuncCS(), TRUE); |
| | 281 | |
| | 282 | /* populate the keyword table */ |
| | 283 | for (kwp = kwlist ; kwp->kw_text != 0 ; ++kwp) |
| | 284 | kw_->add(new CTcHashEntryKw(kwp->kw_text, kwp->kw_tok_id)); |
| | 285 | |
| | 286 | /* no ungot token yet */ |
| | 287 | nxttok_valid_ = FALSE; |
| | 288 | |
| | 289 | /* no string capture file */ |
| | 290 | string_fp_ = 0; |
| | 291 | string_fp_map_ = 0; |
| | 292 | } |
| | 293 | |
| | 294 | /* |
| | 295 | * Initialize the source save block list |
| | 296 | */ |
| | 297 | void CTcTokenizer::init_src_block_list() |
| | 298 | { |
| | 299 | /* allocate the first source block */ |
| | 300 | src_cur_ = src_head_ = new CTcTokSrcBlock(); |
| | 301 | |
| | 302 | /* set up to write into the first block */ |
| | 303 | src_ptr_ = src_head_->get_buf(); |
| | 304 | src_rem_ = TCTOK_SRC_BLOCK_SIZE; |
| | 305 | } |
| | 306 | |
| | 307 | |
| | 308 | /* ------------------------------------------------------------------------ */ |
| | 309 | /* |
| | 310 | * Delete the tokenizer |
| | 311 | */ |
| | 312 | CTcTokenizer::~CTcTokenizer() |
| | 313 | { |
| | 314 | /* delete all streams */ |
| | 315 | delete_source(); |
| | 316 | |
| | 317 | /* delete all file descriptors */ |
| | 318 | while (desc_head_ != 0) |
| | 319 | { |
| | 320 | CTcTokFileDesc *nxt; |
| | 321 | |
| | 322 | /* remember the next descriptor */ |
| | 323 | nxt = desc_head_->get_next(); |
| | 324 | |
| | 325 | /* delete this one */ |
| | 326 | delete desc_head_; |
| | 327 | |
| | 328 | /* move on to the next one */ |
| | 329 | desc_head_ = nxt; |
| | 330 | } |
| | 331 | |
| | 332 | /* delete the file descriptor index array */ |
| | 333 | if (desc_list_ != 0) |
| | 334 | t3free(desc_list_); |
| | 335 | |
| | 336 | /* delete our default character set string copy */ |
| | 337 | lib_free_str(default_charset_); |
| | 338 | |
| | 339 | /* release our reference on our default character mapper */ |
| | 340 | default_mapper_->release_ref(); |
| | 341 | |
| | 342 | /* forget about all of our previous include files */ |
| | 343 | while (prev_includes_ != 0) |
| | 344 | { |
| | 345 | tctok_incfile_t *nxt; |
| | 346 | |
| | 347 | /* remember the next file */ |
| | 348 | nxt = prev_includes_->nxt; |
| | 349 | |
| | 350 | /* delete this one */ |
| | 351 | t3free(prev_includes_); |
| | 352 | |
| | 353 | /* move on to the next one */ |
| | 354 | prev_includes_ = nxt; |
| | 355 | } |
| | 356 | |
| | 357 | /* delete the include path list */ |
| | 358 | while (incpath_head_ != 0) |
| | 359 | { |
| | 360 | tctok_incpath_t *nxt; |
| | 361 | |
| | 362 | /* remember the next entry in the path */ |
| | 363 | nxt = incpath_head_->nxt; |
| | 364 | |
| | 365 | /* delete this entry */ |
| | 366 | t3free(incpath_head_); |
| | 367 | |
| | 368 | /* move on to the next one */ |
| | 369 | incpath_head_ = nxt; |
| | 370 | } |
| | 371 | |
| | 372 | /* delete the macro resources */ |
| | 373 | while (macro_res_head_ != 0) |
| | 374 | { |
| | 375 | CTcMacroRsc *nxt; |
| | 376 | |
| | 377 | /* remember the next one */ |
| | 378 | nxt = macro_res_head_->next_; |
| | 379 | |
| | 380 | /* delete this one */ |
| | 381 | delete macro_res_head_; |
| | 382 | |
| | 383 | /* move on to the next one */ |
| | 384 | macro_res_head_ = nxt; |
| | 385 | } |
| | 386 | |
| | 387 | /* delete the token list */ |
| | 388 | delete src_head_; |
| | 389 | |
| | 390 | /* delete the #define and #undef symbol tables */ |
| | 391 | delete defines_; |
| | 392 | delete undefs_; |
| | 393 | |
| | 394 | /* delete the keyword hash table */ |
| | 395 | delete kw_; |
| | 396 | |
| | 397 | /* if we created a mapping for the string capture file, release it */ |
| | 398 | if (string_fp_map_ != 0) |
| | 399 | string_fp_map_->release_ref(); |
| | 400 | } |
| | 401 | |
| | 402 | /* ------------------------------------------------------------------------ */ |
| | 403 | /* |
| | 404 | * Clear the line buffer |
| | 405 | */ |
| | 406 | void CTcTokenizer::clear_linebuf() |
| | 407 | { |
| | 408 | /* clear the buffer */ |
| | 409 | linebuf_.clear_text(); |
| | 410 | |
| | 411 | /* reset our read point to the start of the line buffer */ |
| | 412 | p_.set(linebuf_.get_buf()); |
| | 413 | } |
| | 414 | |
| | 415 | /* ------------------------------------------------------------------------ */ |
| | 416 | /* |
| | 417 | * Get a textual representation of an operator token |
| | 418 | */ |
| | 419 | const char *CTcTokenizer::get_op_text(tc_toktyp_t op) |
| | 420 | { |
| | 421 | struct tokname_t |
| | 422 | { |
| | 423 | tc_toktyp_t typ; |
| | 424 | const char *nm; |
| | 425 | }; |
| | 426 | static const tokname_t toknames[] = |
| | 427 | { |
| | 428 | { TOKT_EOF, "<end of file>" }, |
| | 429 | { TOKT_SYM, "<symbol>" }, |
| | 430 | { TOKT_INT, "<integer>" }, |
| | 431 | { TOKT_SSTR, "<single-quoted string>" }, |
| | 432 | { TOKT_DSTR, "<double-quoted string>" }, |
| | 433 | { TOKT_DSTR_START, "<double-quoted string>" }, |
| | 434 | { TOKT_DSTR_MID, "<double-quoted string>" }, |
| | 435 | { TOKT_DSTR_END, "<double-quoted string>" }, |
| | 436 | { TOKT_LPAR, "(" }, |
| | 437 | { TOKT_RPAR, ")" }, |
| | 438 | { TOKT_COMMA, "," }, |
| | 439 | { TOKT_DOT, "." }, |
| | 440 | { TOKT_LBRACE, "{" }, |
| | 441 | { TOKT_RBRACE, "}", }, |
| | 442 | { TOKT_LBRACK, "[", }, |
| | 443 | { TOKT_RBRACK, "]", }, |
| | 444 | { TOKT_EQ, "=", }, |
| | 445 | { TOKT_EQEQ, "==", }, |
| | 446 | { TOKT_ASI, ":=" }, |
| | 447 | { TOKT_PLUS, "+" }, |
| | 448 | { TOKT_MINUS, "-" }, |
| | 449 | { TOKT_TIMES, "*" }, |
| | 450 | { TOKT_DIV, "/", }, |
| | 451 | { TOKT_MOD, "%" }, |
| | 452 | { TOKT_GT, ">" }, |
| | 453 | { TOKT_LT, "<" }, |
| | 454 | { TOKT_GE, ">=" }, |
| | 455 | { TOKT_LE, "<=" }, |
| | 456 | { TOKT_NE, "!=" }, |
| | 457 | { TOKT_ARROW, "->" }, |
| | 458 | { TOKT_COLON, ":" }, |
| | 459 | { TOKT_SEM, ";" }, |
| | 460 | { TOKT_AND, "&" }, |
| | 461 | { TOKT_ANDAND, "&&" }, |
| | 462 | { TOKT_OR, "|" }, |
| | 463 | { TOKT_OROR, "||" }, |
| | 464 | { TOKT_XOR, "^" }, |
| | 465 | { TOKT_SHL, "<<" }, |
| | 466 | { TOKT_SHR, ">>" }, |
| | 467 | { TOKT_INC, "++" }, |
| | 468 | { TOKT_DEC, "--" }, |
| | 469 | { TOKT_PLUSEQ, "+=" }, |
| | 470 | { TOKT_MINEQ, "-=" }, |
| | 471 | { TOKT_TIMESEQ, "*=" }, |
| | 472 | { TOKT_DIVEQ, "/=" }, |
| | 473 | { TOKT_MODEQ, "%=" }, |
| | 474 | { TOKT_ANDEQ, "&=" }, |
| | 475 | { TOKT_OREQ, "|=" }, |
| | 476 | { TOKT_XOREQ, "^=" }, |
| | 477 | { TOKT_SHLEQ, "<<=" }, |
| | 478 | { TOKT_SHREQ, ">>=" }, |
| | 479 | { TOKT_NOT, "! (not)" }, |
| | 480 | { TOKT_BNOT, "~" }, |
| | 481 | { TOKT_POUND, "#" }, |
| | 482 | { TOKT_POUNDPOUND, "##" }, |
| | 483 | { TOKT_POUNDAT, "#@" }, |
| | 484 | { TOKT_ELLIPSIS, "..." }, |
| | 485 | { TOKT_QUESTION, "?" }, |
| | 486 | { TOKT_COLONCOLON, "::" }, |
| | 487 | { TOKT_FLOAT, "<float>" }, |
| | 488 | { TOKT_AT, "@" }, |
| | 489 | { TOKT_SELF, "self" }, |
| | 490 | { TOKT_TARGETPROP, "targetprop" }, |
| | 491 | { TOKT_TARGETOBJ, "targetobj" }, |
| | 492 | { TOKT_DEFININGOBJ, "definingobj" }, |
| | 493 | { TOKT_INHERITED, "inherited" }, |
| | 494 | { TOKT_DELEGATED, "delegated" }, |
| | 495 | { TOKT_IF, "if" }, |
| | 496 | { TOKT_ELSE, "else" }, |
| | 497 | { TOKT_FOR, "for" }, |
| | 498 | { TOKT_WHILE, "while" }, |
| | 499 | { TOKT_DO, "do" }, |
| | 500 | { TOKT_SWITCH, "switch" }, |
| | 501 | { TOKT_CASE, "case" }, |
| | 502 | { TOKT_DEFAULT, "default" }, |
| | 503 | { TOKT_GOTO, "goto" }, |
| | 504 | { TOKT_BREAK, "break" }, |
| | 505 | { TOKT_CONTINUE, "continue" }, |
| | 506 | { TOKT_FUNCTION, "function" }, |
| | 507 | { TOKT_RETURN, "return" }, |
| | 508 | { TOKT_LOCAL, "local" }, |
| | 509 | { TOKT_OBJECT, "object" }, |
| | 510 | { TOKT_NIL, "nil" }, |
| | 511 | { TOKT_TRUE, "true" }, |
| | 512 | { TOKT_PASS, "pass" }, |
| | 513 | { TOKT_EXTERNAL, "external" }, |
| | 514 | { TOKT_EXTERN, "extern" }, |
| | 515 | { TOKT_FORMATSTRING, "formatstring" }, |
| | 516 | { TOKT_CLASS, "class" }, |
| | 517 | { TOKT_REPLACE, "replace" }, |
| | 518 | { TOKT_MODIFY, "modify" }, |
| | 519 | { TOKT_NEW, "new" }, |
| | 520 | // { TOKT_DELETE, "delete" }, |
| | 521 | { TOKT_THROW, "throw" }, |
| | 522 | { TOKT_TRY, "try" }, |
| | 523 | { TOKT_CATCH, "catch" }, |
| | 524 | { TOKT_FINALLY, "finally" }, |
| | 525 | { TOKT_INTRINSIC, "intrinsic" }, |
| | 526 | { TOKT_DICTIONARY, "dictionary" }, |
| | 527 | { TOKT_GRAMMAR, "grammar" }, |
| | 528 | { TOKT_ENUM, "enum" }, |
| | 529 | { TOKT_TEMPLATE, "template" }, |
| | 530 | { TOKT_STATIC, "static" }, |
| | 531 | { TOKT_FOREACH, "foreach" }, |
| | 532 | { TOKT_EXPORT, "export" }, |
| | 533 | { TOKT_PROPERTYSET, "propertyset" }, |
| | 534 | { TOKT_TRANSIENT, "transient" }, |
| | 535 | { TOKT_REPLACED, "replaced" }, |
| | 536 | { TOKT_PROPERTY, "property" }, |
| | 537 | |
| | 538 | // { TOKT_VOID, "void" }, |
| | 539 | // { TOKT_INTKW, "int" }, |
| | 540 | // { TOKT_STRING, "string" }, |
| | 541 | // { TOKT_LIST, "list" }, |
| | 542 | // { TOKT_BOOLEAN, "boolean" }, |
| | 543 | // { TOKT_ANY, "any"}, |
| | 544 | |
| | 545 | { TOKT_INVALID, 0 } |
| | 546 | }; |
| | 547 | const tokname_t *p; |
| | 548 | |
| | 549 | /* search for the token */ |
| | 550 | for (p = toknames ; p->nm != 0 ; ++p) |
| | 551 | { |
| | 552 | /* if this is our token, return the associated name string */ |
| | 553 | if (p->typ == op) |
| | 554 | return p->nm; |
| | 555 | } |
| | 556 | |
| | 557 | /* we didn't find it */ |
| | 558 | return "<unknown>"; |
| | 559 | } |
| | 560 | |
| | 561 | /* ------------------------------------------------------------------------ */ |
| | 562 | /* |
| | 563 | * Reset the tokenizer. Delete the current source object and all of the |
| | 564 | * saved source text. This can be used after compilation of a unit |
| | 565 | * (such as a debugger expression) is completed and the intermediate |
| | 566 | * parser state is no longer needed. |
| | 567 | */ |
| | 568 | void CTcTokenizer::reset() |
| | 569 | { |
| | 570 | /* delete the source object */ |
| | 571 | delete_source(); |
| | 572 | |
| | 573 | /* delete saved token text */ |
| | 574 | if (src_head_ != 0) |
| | 575 | { |
| | 576 | /* delete the list */ |
| | 577 | delete src_head_; |
| | 578 | |
| | 579 | /* re-initialize the source block list */ |
| | 580 | init_src_block_list(); |
| | 581 | } |
| | 582 | } |
| | 583 | |
| | 584 | /* ------------------------------------------------------------------------ */ |
| | 585 | /* |
| | 586 | * Delete the source file, if any, including any parent include files. |
| | 587 | */ |
| | 588 | void CTcTokenizer::delete_source() |
| | 589 | { |
| | 590 | /* delete the current stream and all enclosing parents */ |
| | 591 | while (str_ != 0) |
| | 592 | { |
| | 593 | CTcTokStream *nxt; |
| | 594 | |
| | 595 | /* remember the next stream in the list */ |
| | 596 | nxt = str_->get_parent(); |
| | 597 | |
| | 598 | /* delete this stream */ |
| | 599 | delete str_; |
| | 600 | |
| | 601 | /* move up to the next one */ |
| | 602 | str_ = nxt; |
| | 603 | } |
| | 604 | |
| | 605 | /* there are no more streams */ |
| | 606 | str_ = 0; |
| | 607 | } |
| | 608 | |
| | 609 | |
| | 610 | /* ------------------------------------------------------------------------ */ |
| | 611 | /* |
| | 612 | * Set up to read a source file. Returns zero on success, or a non-zero |
| | 613 | * error code on failure. |
| | 614 | */ |
| | 615 | int CTcTokenizer::set_source(const char *src_filename, const char *orig_name) |
| | 616 | { |
| | 617 | CTcTokFileDesc *desc; |
| | 618 | CTcSrcFile *src; |
| | 619 | int charset_error; |
| | 620 | int default_charset_error; |
| | 621 | |
| | 622 | /* empty out the input line buffer */ |
| | 623 | clear_linebuf(); |
| | 624 | |
| | 625 | /* set up at the beginning of the input line buffer */ |
| | 626 | start_new_line(&linebuf_, 0); |
| | 627 | |
| | 628 | /* create a reader for the source file */ |
| | 629 | src = CTcSrcFile::open_source(src_filename, res_loader_, |
| | 630 | default_charset_, &charset_error, |
| | 631 | &default_charset_error); |
| | 632 | if (src == 0) |
| | 633 | { |
| | 634 | /* if we had a problem loading the default character set, log it */ |
| | 635 | if (default_charset_error) |
| | 636 | log_error(TCERR_CANT_LOAD_DEFAULT_CHARSET, default_charset_); |
| | 637 | |
| | 638 | /* return failure */ |
| | 639 | return TCERR_CANT_OPEN_SRC; |
| | 640 | } |
| | 641 | |
| | 642 | /* find or create a file descriptor for this filename */ |
| | 643 | desc = get_file_desc(src_filename, strlen(src_filename), FALSE, |
| | 644 | orig_name, strlen(orig_name)); |
| | 645 | |
| | 646 | /* |
| | 647 | * Create a stream to read the source file. The new stream has no |
| | 648 | * parent, because this is the top-level source file, and was not |
| | 649 | * included from any other file. |
| | 650 | */ |
| | 651 | str_ = new CTcTokStream(desc, src, 0, charset_error, if_sp_); |
| | 652 | |
| | 653 | /* success */ |
| | 654 | return 0; |
| | 655 | } |
| | 656 | |
| | 657 | /* |
| | 658 | * Set up to read source code from a memory buffer |
| | 659 | */ |
| | 660 | void CTcTokenizer::set_source_buf(const char *buf) |
| | 661 | { |
| | 662 | CTcSrcMemory *src; |
| | 663 | |
| | 664 | /* empty out the input line buffer */ |
| | 665 | clear_linebuf(); |
| | 666 | |
| | 667 | /* reset the scanning state to the start of a brand new stream */ |
| | 668 | in_pp_expr_ = FALSE; |
| | 669 | last_linenum_ = 0; |
| | 670 | unsplicebuf_.clear_text(); |
| | 671 | in_quote_ = 0; |
| | 672 | comment_in_embedding_ = FALSE; |
| | 673 | macro_in_embedding_ = FALSE; |
| | 674 | main_in_embedding_ = FALSE; |
| | 675 | if_sp_ = 0; |
| | 676 | if_false_level_ = 0; |
| | 677 | nxttok_valid_ = FALSE; |
| | 678 | |
| | 679 | /* set up at the beginning of the input line buffer */ |
| | 680 | start_new_line(&linebuf_, 0); |
| | 681 | |
| | 682 | /* create a reader for the memory buffer */ |
| | 683 | src = new CTcSrcMemory(buf, default_mapper_); |
| | 684 | |
| | 685 | /* |
| | 686 | * Create a stream to read the source file. The new stream has no |
| | 687 | * parent, because this is the top-level source file, and was not |
| | 688 | * included from any other file. |
| | 689 | */ |
| | 690 | str_ = new CTcTokStream(0, src, 0, 0, if_sp_); |
| | 691 | } |
| | 692 | |
| | 693 | /* ------------------------------------------------------------------------ */ |
| | 694 | /* |
| | 695 | * Stuff text into the source stream. |
| | 696 | */ |
| | 697 | void CTcTokenizer::stuff_text(const char *txt, size_t len, int expand) |
| | 698 | { |
| | 699 | CTcTokString expbuf; |
| | 700 | int p_ofs; |
| | 701 | |
| | 702 | /* if desired, expand macros */ |
| | 703 | if (expand) |
| | 704 | { |
| | 705 | /* expand macros in the text, storing the result in 'expbuf' */ |
| | 706 | expand_macros(&expbuf, txt, len); |
| | 707 | |
| | 708 | /* use the expanded version as the stuffed text now */ |
| | 709 | txt = expbuf.get_text(); |
| | 710 | len = expbuf.get_text_len(); |
| | 711 | } |
| | 712 | |
| | 713 | /* get the current p_ offset */ |
| | 714 | p_ofs = p_.getptr() - curbuf_->get_text(); |
| | 715 | |
| | 716 | /* insert the text into the buffer */ |
| | 717 | curbuf_->insert(p_ofs, txt, len); |
| | 718 | |
| | 719 | /* reset p_ in case the curbuf_ buffer was reallocated for expansion */ |
| | 720 | start_new_line(curbuf_, p_ofs); |
| | 721 | } |
| | 722 | |
| | 723 | /* ------------------------------------------------------------------------ */ |
| | 724 | /* |
| | 725 | * Find or create a file descriptor for a given filename |
| | 726 | */ |
| | 727 | CTcTokFileDesc *CTcTokenizer::get_file_desc(const char *fname, |
| | 728 | size_t fname_len, |
| | 729 | int always_create, |
| | 730 | const char *orig_fname, |
| | 731 | size_t orig_fname_len) |
| | 732 | { |
| | 733 | CTcTokFileDesc *orig_desc; |
| | 734 | CTcTokFileDesc *desc; |
| | 735 | |
| | 736 | /* presume we won't find an original descriptor in the list */ |
| | 737 | orig_desc = 0; |
| | 738 | |
| | 739 | /* |
| | 740 | * Search the list of existing descriptors to find one that matches. |
| | 741 | * Do this regardless of whether we're allowed to re-use an existing |
| | 742 | * one or not - even if we're creating a new one unconditionaly, we |
| | 743 | * need to know if there's an earlier copy that already exists so we |
| | 744 | * can associate the new one with the original. |
| | 745 | */ |
| | 746 | for (desc = desc_head_ ; desc != 0 ; desc = desc->get_next()) |
| | 747 | { |
| | 748 | /* check for a name match */ |
| | 749 | if (strlen(desc->get_fname()) == fname_len |
| | 750 | && memcmp(desc->get_fname(), fname, fname_len) == 0) |
| | 751 | { |
| | 752 | /* |
| | 753 | * if we're allowed to return an existing descriptor, return |
| | 754 | * this one, since it's for the same filename |
| | 755 | */ |
| | 756 | if (!always_create) |
| | 757 | return desc; |
| | 758 | |
| | 759 | /* |
| | 760 | * we have to create a new descriptor even though we have an |
| | 761 | * existing one - remember the original so we can point the |
| | 762 | * new one back to the original |
| | 763 | */ |
| | 764 | orig_desc = desc; |
| | 765 | |
| | 766 | /* |
| | 767 | * no need to look any further - we've found the first |
| | 768 | * instance of this filename in our list |
| | 769 | */ |
| | 770 | break; |
| | 771 | } |
| | 772 | } |
| | 773 | |
| | 774 | /* we didn't find a match - create a new descriptor */ |
| | 775 | desc = new CTcTokFileDesc(fname, fname_len, next_filedesc_id_++, |
| | 776 | orig_desc, orig_fname, orig_fname_len); |
| | 777 | |
| | 778 | /* link it in at the end of the master list */ |
| | 779 | desc->set_next(0); |
| | 780 | if (desc_tail_ == 0) |
| | 781 | desc_head_ = desc; |
| | 782 | else |
| | 783 | desc_tail_->set_next(desc); |
| | 784 | desc_tail_ = desc; |
| | 785 | |
| | 786 | /* expand our array index if necessary */ |
| | 787 | if (desc_list_cnt_ >= desc_list_alo_) |
| | 788 | { |
| | 789 | size_t siz; |
| | 790 | |
| | 791 | /* allocate or expand the array */ |
| | 792 | desc_list_alo_ += 10; |
| | 793 | siz = desc_list_alo_ * sizeof(desc_list_[0]); |
| | 794 | if (desc_list_ == 0) |
| | 795 | desc_list_ = (CTcTokFileDesc **)t3malloc(siz); |
| | 796 | else |
| | 797 | desc_list_ = (CTcTokFileDesc **)t3realloc(desc_list_, siz); |
| | 798 | } |
| | 799 | |
| | 800 | /* add the new array entry */ |
| | 801 | desc_list_[desc_list_cnt_++] = desc; |
| | 802 | |
| | 803 | /* return it */ |
| | 804 | return desc; |
| | 805 | } |
| | 806 | |
| | 807 | |
| | 808 | /* ------------------------------------------------------------------------ */ |
| | 809 | /* |
| | 810 | * Add an include path entry. Each new entry goes at the end of the |
| | 811 | * list, after all previous entries. |
| | 812 | */ |
| | 813 | void CTcTokenizer::add_inc_path(const char *path) |
| | 814 | { |
| | 815 | tctok_incpath_t *entry; |
| | 816 | |
| | 817 | /* create a new path list entry */ |
| | 818 | entry = (tctok_incpath_t *)t3malloc(sizeof(tctok_incpath_t) |
| | 819 | + strlen(path)); |
| | 820 | |
| | 821 | /* store the path in the entry */ |
| | 822 | strcpy(entry->path, path); |
| | 823 | |
| | 824 | /* link this entry at the end of our list */ |
| | 825 | if (incpath_tail_ != 0) |
| | 826 | incpath_tail_->nxt = entry; |
| | 827 | else |
| | 828 | incpath_head_ = entry; |
| | 829 | incpath_tail_ = entry; |
| | 830 | entry->nxt = 0; |
| | 831 | } |
| | 832 | |
| | 833 | |
| | 834 | /* ------------------------------------------------------------------------ */ |
| | 835 | /* |
| | 836 | * Set the string capture file. |
| | 837 | */ |
| | 838 | void CTcTokenizer::set_string_capture(osfildef *fp) |
| | 839 | { |
| | 840 | /* remember the capture file */ |
| | 841 | string_fp_ = fp; |
| | 842 | |
| | 843 | /* |
| | 844 | * if we don't already have a character mapping to translate from |
| | 845 | * our internal unicode characters back into the source file |
| | 846 | * character set, create one now |
| | 847 | */ |
| | 848 | if (string_fp_map_ == 0) |
| | 849 | { |
| | 850 | /* try creating a mapping for the default character set */ |
| | 851 | if (default_charset_ != 0) |
| | 852 | string_fp_map_ = |
| | 853 | CCharmapToLocal::load(res_loader_, default_charset_); |
| | 854 | |
| | 855 | /* if we couldn't create the mapping, use a default ASCII mapping */ |
| | 856 | if (string_fp_map_ == 0) |
| | 857 | string_fp_map_ = CCharmapToLocal::load(res_loader_, "us-ascii"); |
| | 858 | } |
| | 859 | } |
| | 860 | |
| | 861 | |
| | 862 | /* ------------------------------------------------------------------------ */ |
| | 863 | /* |
| | 864 | * Get the next token in the input stream, reading additional lines from |
| | 865 | * the source file as needed. |
| | 866 | */ |
| | 867 | tc_toktyp_t CTcTokenizer::next() |
| | 868 | { |
| | 869 | /* the current token is about to become the previous token */ |
| | 870 | prvtok_ = curtok_; |
| | 871 | |
| | 872 | /* if there's an un-got token, return it */ |
| | 873 | if (nxttok_valid_) |
| | 874 | { |
| | 875 | /* get the previously-saved token */ |
| | 876 | curtok_ = nxttok_; |
| | 877 | |
| | 878 | /* we've now consumed nxttok_ */ |
| | 879 | nxttok_valid_ = FALSE; |
| | 880 | |
| | 881 | /* return the new token's type */ |
| | 882 | return curtok_.gettyp(); |
| | 883 | } |
| | 884 | |
| | 885 | /* if there's an external source, get its next token */ |
| | 886 | if (ext_src_ != 0) |
| | 887 | { |
| | 888 | const CTcToken *ext_tok; |
| | 889 | |
| | 890 | /* get the next token from the external source */ |
| | 891 | ext_tok = ext_src_->get_next_token(); |
| | 892 | |
| | 893 | /* check to see if we got a token */ |
| | 894 | if (ext_tok == 0) |
| | 895 | { |
| | 896 | /* |
| | 897 | * restore the current token in effect before this source was |
| | 898 | * active |
| | 899 | */ |
| | 900 | curtok_ = *ext_src_->get_enclosing_curtok(); |
| | 901 | |
| | 902 | /* |
| | 903 | * this source has no more tokens - restore the enclosing |
| | 904 | * source, and keep going so we try getting a token from it |
| | 905 | */ |
| | 906 | ext_src_ = ext_src_->get_enclosing_source(); |
| | 907 | |
| | 908 | /* return the token type */ |
| | 909 | return curtok_.gettyp(); |
| | 910 | } |
| | 911 | else |
| | 912 | { |
| | 913 | /* we got a token - copy it to our internal token buffer */ |
| | 914 | curtok_ = *ext_tok; |
| | 915 | |
| | 916 | /* return its type */ |
| | 917 | return curtok_.gettyp(); |
| | 918 | } |
| | 919 | } |
| | 920 | |
| | 921 | /* keep going until we get a valid token */ |
| | 922 | for (;;) |
| | 923 | { |
| | 924 | tc_toktyp_t typ; |
| | 925 | |
| | 926 | /* |
| | 927 | * read the next token from the current line, applying |
| | 928 | * appropriate string translations and storing strings and |
| | 929 | * symbols in the source block list |
| | 930 | */ |
| | 931 | typ = next_on_line_xlat_keep(); |
| | 932 | |
| | 933 | /* if it's the "null" token, skip it and read another token */ |
| | 934 | if (typ == TOKT_NULLTOK) |
| | 935 | continue; |
| | 936 | |
| | 937 | /* if we found a valid token, we're done - return the token */ |
| | 938 | if (typ != TOKT_EOF) |
| | 939 | return typ; |
| | 940 | |
| | 941 | /* |
| | 942 | * if we're at the end of a preprocess line, don't read another |
| | 943 | * line - just return end of file |
| | 944 | */ |
| | 945 | if (p_.getch() == TOK_END_PP_LINE) |
| | 946 | return TOKT_EOF; |
| | 947 | |
| | 948 | /* |
| | 949 | * we've reached the end of the line - read another line, |
| | 950 | * applying preprocessing directives and expanding macros as |
| | 951 | * needed |
| | 952 | */ |
| | 953 | if (read_line_pp()) |
| | 954 | { |
| | 955 | /* no more lines are available - return end of file */ |
| | 956 | return TOKT_EOF; |
| | 957 | } |
| | 958 | } |
| | 959 | } |
| | 960 | |
| | 961 | /* ------------------------------------------------------------------------ */ |
| | 962 | /* |
| | 963 | * clear external token sources, returning to the true input stream |
| | 964 | */ |
| | 965 | void CTcTokenizer::clear_external_sources() |
| | 966 | { |
| | 967 | /* |
| | 968 | * restore the current token as it was before the outermost external |
| | 969 | * source was first established |
| | 970 | */ |
| | 971 | if (ext_src_ != 0) |
| | 972 | { |
| | 973 | CTcTokenSource *outer; |
| | 974 | |
| | 975 | /* find the outermost source */ |
| | 976 | for (outer = ext_src_ ; outer->get_enclosing_source() != 0 ; |
| | 977 | outer = ext_src_->get_enclosing_source()) ; |
| | 978 | |
| | 979 | /* restore its original next token */ |
| | 980 | curtok_ = *ext_src_->get_enclosing_curtok(); |
| | 981 | } |
| | 982 | |
| | 983 | /* there's no external source now */ |
| | 984 | ext_src_ = 0; |
| | 985 | } |
| | 986 | |
| | 987 | /* ------------------------------------------------------------------------ */ |
| | 988 | /* |
| | 989 | * Make a safely storable copy of the current token. |
| | 990 | */ |
| | 991 | const CTcToken *CTcTokenizer::copycur() |
| | 992 | { |
| | 993 | /* if the current token is a symbol, it already has a safe copy */ |
| | 994 | if (curtok_.gettyp() == TOKT_SYM) |
| | 995 | return getcur(); |
| | 996 | |
| | 997 | /* save the current token's text in permanent tokenizer memory */ |
| | 998 | curtok_.set_text(store_source(curtok_.get_text(), curtok_.get_text_len()), |
| | 999 | curtok_.get_text_len()); |
| | 1000 | |
| | 1001 | /* return the current token, now that we've made it safe */ |
| | 1002 | return &curtok_; |
| | 1003 | } |
| | 1004 | |
| | 1005 | /* |
| | 1006 | * Make a safely storable copy of a given token. |
| | 1007 | */ |
| | 1008 | void CTcTokenizer::copytok(CTcToken *dst, const CTcToken *src) |
| | 1009 | { |
| | 1010 | /* start with an exact copy of the token */ |
| | 1011 | *dst = *src; |
| | 1012 | |
| | 1013 | /* if the token is a symbol, it already has a safe copy */ |
| | 1014 | if (src->gettyp() == TOKT_SYM) |
| | 1015 | return; |
| | 1016 | |
| | 1017 | /* save the token's text in permanent tokenizer memory */ |
| | 1018 | dst->set_text(store_source(dst->get_text(), dst->get_text_len()), |
| | 1019 | dst->get_text_len()); |
| | 1020 | } |
| | 1021 | |
| | 1022 | |
| | 1023 | /* ------------------------------------------------------------------------ */ |
| | 1024 | /* |
| | 1025 | * Check to see if the current token matches the given text |
| | 1026 | */ |
| | 1027 | int CTcTokenizer::cur_tok_matches(const char *txt, size_t len) |
| | 1028 | { |
| | 1029 | /* if the length matches, and the text matches exactly, it matches */ |
| | 1030 | return (getcur()->get_text_len() == len |
| | 1031 | && memcmp(getcur()->get_text(), txt, len) == 0); |
| | 1032 | } |
| | 1033 | |
| | 1034 | /* ------------------------------------------------------------------------ */ |
| | 1035 | /* |
| | 1036 | * Un-get the current token |
| | 1037 | */ |
| | 1038 | void CTcTokenizer::unget() |
| | 1039 | { |
| | 1040 | /* |
| | 1041 | * remember the current token as the next one to fetch, and flag |
| | 1042 | * that this is valid |
| | 1043 | */ |
| | 1044 | nxttok_ = curtok_; |
| | 1045 | nxttok_valid_ = TRUE; |
| | 1046 | |
| | 1047 | /* go back to the previous token */ |
| | 1048 | curtok_ = prvtok_; |
| | 1049 | } |
| | 1050 | |
| | 1051 | /* ------------------------------------------------------------------------ */ |
| | 1052 | /* |
| | 1053 | * Assume that we should have just found a '>>' terminating an embedded |
| | 1054 | * expression in a double-quoted string. If possible, back out the |
| | 1055 | * previous token and re-scan it as though it had started with '>>'. |
| | 1056 | * |
| | 1057 | * This is to be called by a higher-level parser when it determines |
| | 1058 | * that, syntactically, we should have found the '>>' leaving an |
| | 1059 | * embedded expression. |
| | 1060 | */ |
| | 1061 | void CTcTokenizer::assume_missing_dstr_cont() |
| | 1062 | { |
| | 1063 | /* act as though we had just seen '>>' */ |
| | 1064 | xlat_string_to_src(&main_in_embedding_, TRUE); |
| | 1065 | } |
| | 1066 | |
| | 1067 | |
| | 1068 | /* ------------------------------------------------------------------------ */ |
| | 1069 | /* |
| | 1070 | * Skip whitespace and macro expansion markers |
| | 1071 | */ |
| | 1072 | void CTcTokenizer::skip_ws_and_markers(utf8_ptr *p) |
| | 1073 | { |
| | 1074 | /* keep going until we find something interesting */ |
| | 1075 | for (;;) |
| | 1076 | { |
| | 1077 | wchar_t cur; |
| | 1078 | |
| | 1079 | /* get the current character */ |
| | 1080 | cur = p->getch(); |
| | 1081 | |
| | 1082 | /* |
| | 1083 | * if it's a macro expansion end marker, skip it as though it |
| | 1084 | * were whitespace; otherwise, if it's whitespace, skip it; |
| | 1085 | * otherwise, we're done skipping leading whitespace |
| | 1086 | */ |
| | 1087 | if (cur == TOK_MACRO_EXP_END) |
| | 1088 | { |
| | 1089 | /* skip the embedded pointer value that follows */ |
| | 1090 | p->set(p->getptr() + 1 + sizeof(CTcHashEntryPp *)); |
| | 1091 | } |
| | 1092 | else if (is_space(cur)) |
| | 1093 | { |
| | 1094 | /* skip the space */ |
| | 1095 | p->inc(); |
| | 1096 | } |
| | 1097 | else |
| | 1098 | { |
| | 1099 | /* it's not whitespace or equivalent - we're done */ |
| | 1100 | return; |
| | 1101 | } |
| | 1102 | } |
| | 1103 | } |
| | 1104 | |
| | 1105 | /* ------------------------------------------------------------------------ */ |
| | 1106 | /* |
| | 1107 | * Get the next token from the input stream, operating on the current |
| | 1108 | * line only. |
| | 1109 | */ |
| | 1110 | tc_toktyp_t CTcTokenizer::next_on_line(utf8_ptr *p, CTcToken *tok, |
| | 1111 | int *in_embedding, int expanding) |
| | 1112 | { |
| | 1113 | wchar_t cur; |
| | 1114 | tc_toktyp_t typ; |
| | 1115 | utf8_ptr start; |
| | 1116 | int num_minus; |
| | 1117 | |
| | 1118 | /* skip whitespace */ |
| | 1119 | skip_ws_and_markers(p); |
| | 1120 | |
| | 1121 | /* remember where the token starts */ |
| | 1122 | start = *p; |
| | 1123 | |
| | 1124 | /* if there's nothing left in the current line, return EOF */ |
| | 1125 | if (p->getch() == '\0') |
| | 1126 | { |
| | 1127 | /* indicate end of file */ |
| | 1128 | typ = TOKT_EOF; |
| | 1129 | goto done; |
| | 1130 | } |
| | 1131 | |
| | 1132 | /* get the initial character, and skip it */ |
| | 1133 | cur = p->getch(); |
| | 1134 | p->inc(); |
| | 1135 | |
| | 1136 | /* presume the token will not be marked as fully macro-expanded */ |
| | 1137 | tok->set_fully_expanded(FALSE); |
| | 1138 | |
| | 1139 | /* presume it's not a number with a minus sign */ |
| | 1140 | num_minus = FALSE; |
| | 1141 | |
| | 1142 | /* see what we have */ |
| | 1143 | switch(cur) |
| | 1144 | { |
| | 1145 | case TOK_MACRO_FORMAL_FLAG: |
| | 1146 | /* |
| | 1147 | * this is a two-byte formal parameter sequence in a macro |
| | 1148 | * expansion - skip the second byte of the two-byte sequence, |
| | 1149 | * and return the special token type for this sequence |
| | 1150 | */ |
| | 1151 | typ = TOKT_MACRO_FORMAL; |
| | 1152 | |
| | 1153 | /* |
| | 1154 | * skip the second byte - note that we want to skip exactly one |
| | 1155 | * byte, regardless of what the byte looks like as a utf-8 |
| | 1156 | * partial character, since it's not a utf-8 character at all |
| | 1157 | */ |
| | 1158 | p->set(p->getptr() + 1); |
| | 1159 | break; |
| | 1160 | |
| | 1161 | case TOK_MACRO_FOREACH_FLAG: |
| | 1162 | /* |
| | 1163 | * this is the special macro '#foreach' flag - return it as a |
| | 1164 | * special pseudo-token |
| | 1165 | */ |
| | 1166 | typ = TOKT_MACRO_FOREACH; |
| | 1167 | break; |
| | 1168 | |
| | 1169 | case TOK_MACRO_IFEMPTY_FLAG: |
| | 1170 | /* #ifempty macro flag */ |
| | 1171 | typ = TOKT_MACRO_IFEMPTY; |
| | 1172 | break; |
| | 1173 | |
| | 1174 | case TOK_MACRO_IFNEMPTY_FLAG: |
| | 1175 | /* #ifnempty macro flag */ |
| | 1176 | typ = TOKT_MACRO_IFNEMPTY; |
| | 1177 | break; |
| | 1178 | |
| | 1179 | case TOK_MACRO_ARGCOUNT_FLAG: |
| | 1180 | /* it's the special macro '#argcount' flag */ |
| | 1181 | typ = TOKT_MACRO_ARGCOUNT; |
| | 1182 | break; |
| | 1183 | |
| | 1184 | case TOK_FULLY_EXPANDED_FLAG: |
| | 1185 | /* set the token flag indicating that it has been fully expanded */ |
| | 1186 | tok->set_fully_expanded(TRUE); |
| | 1187 | |
| | 1188 | /* the token symbol starts at the byte after the flag byte */ |
| | 1189 | start = p->getptr(); |
| | 1190 | |
| | 1191 | /* read the first character of the symbol */ |
| | 1192 | cur = p->getch(); |
| | 1193 | p->inc(); |
| | 1194 | |
| | 1195 | /* tokenize the symbol that follows */ |
| | 1196 | goto tokenize_symbol; |
| | 1197 | |
| | 1198 | case TOK_END_PP_LINE: |
| | 1199 | /* |
| | 1200 | * Preprocess line-ending marker - when we reach the end of a |
| | 1201 | * preprocessor line, we can't read another source line, because |
| | 1202 | * a preprocessor directive consists of only a single logical |
| | 1203 | * source line. Once we see this, return end-of-file until the |
| | 1204 | * caller explicitly reads a new source line. |
| | 1205 | * |
| | 1206 | * Keep the read pointer stuck on this flag byte, so that we |
| | 1207 | * return end-of-file on a subsequent attempt to get the next |
| | 1208 | * token. |
| | 1209 | */ |
| | 1210 | *p = start; |
| | 1211 | typ = TOKT_EOF; |
| | 1212 | break; |
| | 1213 | |
| | 1214 | case '0': |
| | 1215 | case '1': |
| | 1216 | case '2': |
| | 1217 | case '3': |
| | 1218 | case '4': |
| | 1219 | case '5': |
| | 1220 | case '6': |
| | 1221 | case '7': |
| | 1222 | case '8': |
| | 1223 | case '9': |
| | 1224 | { |
| | 1225 | long acc; |
| | 1226 | |
| | 1227 | /* |
| | 1228 | * Start out with the leading digit in the accumulator. Note |
| | 1229 | * that the character set internally is always UTF-8. |
| | 1230 | */ |
| | 1231 | acc = value_of_digit(cur); |
| | 1232 | |
| | 1233 | /* |
| | 1234 | * If it's a leading zero, treat as octal or hex. '0x' means |
| | 1235 | * hex; otherwise, '0' means octal. |
| | 1236 | */ |
| | 1237 | if (cur == '0') |
| | 1238 | { |
| | 1239 | /* check for hex - if it's not hex, it's octal */ |
| | 1240 | if (p->getch() == 'x' || p->getch() == 'X') |
| | 1241 | { |
| | 1242 | /* skip the 'x' */ |
| | 1243 | p->inc(); |
| | 1244 | |
| | 1245 | /* |
| | 1246 | * scan the hex number - keep going until we find |
| | 1247 | * something that's not a hex digit |
| | 1248 | */ |
| | 1249 | for (;;) |
| | 1250 | { |
| | 1251 | /* get this character */ |
| | 1252 | cur = p->getch(); |
| | 1253 | |
| | 1254 | /* if it's not a hex digit, stop scanning */ |
| | 1255 | if (!is_xdigit(cur)) |
| | 1256 | break; |
| | 1257 | |
| | 1258 | /* |
| | 1259 | * Shift the accumulator and add this digit's value. |
| | 1260 | * Note that we can save a test - if the character is |
| | 1261 | * >= lower-case 'a', we know it's not an upper-case |
| | 1262 | * letter because the lower-case letters all have |
| | 1263 | * values above the upper-case letters in UTF-8 |
| | 1264 | * encoding (which we always use as the internal |
| | 1265 | * character set). Since we already know it's a |
| | 1266 | * valid hex digit (we wouldn't be here if it |
| | 1267 | * weren't), we can just check to see if it's at |
| | 1268 | * least lower-case 'a', and we automatically know |
| | 1269 | * then whether it's in the 'a'-'f' range or the |
| | 1270 | * 'A'-'F' range. |
| | 1271 | */ |
| | 1272 | acc *= 16; |
| | 1273 | acc += value_of_xdigit(cur); |
| | 1274 | |
| | 1275 | /* move on */ |
| | 1276 | p->inc(); |
| | 1277 | } |
| | 1278 | } |
| | 1279 | else |
| | 1280 | { |
| | 1281 | /* scan octal digits */ |
| | 1282 | for ( ; is_odigit(p->getch()) ; p->inc()) |
| | 1283 | acc = 8*acc + value_of_odigit(p->getch()); |
| | 1284 | |
| | 1285 | /* |
| | 1286 | * If we stopped on a digit outside of the octal range, |
| | 1287 | * consume any remaining digits, and flag it as an |
| | 1288 | * error. Leaving subsequent decimal digits as a |
| | 1289 | * separate token tends to be confusing, since in most |
| | 1290 | * cases the inclusion of decimal digits means that the |
| | 1291 | * user didn't really intend this to be an octal number |
| | 1292 | * after all. For instance, the leading zero might be |
| | 1293 | * there for formatting reasons, and the user simply |
| | 1294 | * forgot to take into account that it triggers octal |
| | 1295 | * interpretation. |
| | 1296 | */ |
| | 1297 | if (is_digit(p->getch())) |
| | 1298 | { |
| | 1299 | /* skip subsequent digits */ |
| | 1300 | for (p->inc() ; is_digit(p->getch()) ; p->inc()) ; |
| | 1301 | |
| | 1302 | /* flag the error */ |
| | 1303 | if (!expanding) |
| | 1304 | log_error(TCERR_DECIMAL_IN_OCTAL, |
| | 1305 | p->getptr() - start.getptr(), |
| | 1306 | start.getptr()); |
| | 1307 | } |
| | 1308 | } |
| | 1309 | } |
| | 1310 | else |
| | 1311 | { |
| | 1312 | /* scan decimal digits */ |
| | 1313 | for ( ; is_digit(p->getch()) ; p->inc()) |
| | 1314 | acc = 10*acc + value_of_digit(p->getch()); |
| | 1315 | } |
| | 1316 | |
| | 1317 | /* negate the value if we had a minus sign */ |
| | 1318 | if (num_minus) |
| | 1319 | acc = -acc; |
| | 1320 | |
| | 1321 | /* |
| | 1322 | * if we stopped at a decimal point or an exponent, it's a |
| | 1323 | * floating point number |
| | 1324 | */ |
| | 1325 | if (p->getch() == '.' || p->getch() == 'e' || p->getch() == 'E') |
| | 1326 | goto do_float; |
| | 1327 | |
| | 1328 | /* it's an integer value */ |
| | 1329 | typ = TOKT_INT; |
| | 1330 | |
| | 1331 | /* set the integer value */ |
| | 1332 | tok->set_int_val(acc); |
| | 1333 | } |
| | 1334 | break; |
| | 1335 | |
| | 1336 | do_float: |
| | 1337 | { |
| | 1338 | int found_decpt; |
| | 1339 | |
| | 1340 | /* start over and parse the float */ |
| | 1341 | for (*p = start, found_decpt = FALSE ; ; p->inc()) |
| | 1342 | { |
| | 1343 | /* get this character and move on */ |
| | 1344 | cur = p->getch(); |
| | 1345 | |
| | 1346 | /* see what we have */ |
| | 1347 | if (is_digit(cur)) |
| | 1348 | { |
| | 1349 | /* we have another digit; just keep going */ |
| | 1350 | } |
| | 1351 | else if (!found_decpt && cur == '.') |
| | 1352 | { |
| | 1353 | /* it's the decimal point - note it and keep going */ |
| | 1354 | found_decpt = TRUE; |
| | 1355 | } |
| | 1356 | else if (cur == 'e' || cur == 'E') |
| | 1357 | { |
| | 1358 | utf8_ptr p2; |
| | 1359 | |
| | 1360 | /* it might not be an exponent - look ahead to find out */ |
| | 1361 | p2 = *p; |
| | 1362 | p2.inc(); |
| | 1363 | |
| | 1364 | /* if we have a sign, skip it */ |
| | 1365 | if ((cur = p2.getch()) == '-' || cur == '+') |
| | 1366 | p2.inc(); |
| | 1367 | |
| | 1368 | /* we need at least one digit to make an exponent */ |
| | 1369 | if (!is_digit(p2.getch())) |
| | 1370 | break; |
| | 1371 | |
| | 1372 | /* skip digits */ |
| | 1373 | while (is_digit(p2.getch())) |
| | 1374 | p2.inc(); |
| | 1375 | |
| | 1376 | /* advance to the end of the exponent */ |
| | 1377 | *p = p2; |
| | 1378 | |
| | 1379 | /* the end of the exponent is the end of the number */ |
| | 1380 | break; |
| | 1381 | } |
| | 1382 | else |
| | 1383 | { |
| | 1384 | /* everything else ends the number */ |
| | 1385 | break; |
| | 1386 | } |
| | 1387 | } |
| | 1388 | } |
| | 1389 | |
| | 1390 | /* it's a float */ |
| | 1391 | typ = TOKT_FLOAT; |
| | 1392 | break; |
| | 1393 | |
| | 1394 | case '"': |
| | 1395 | case '\'': |
| | 1396 | *p = start; |
| | 1397 | return tokenize_string(p, tok, in_embedding); |
| | 1398 | |
| | 1399 | case '(': |
| | 1400 | typ = TOKT_LPAR; |
| | 1401 | break; |
| | 1402 | |
| | 1403 | case ')': |
| | 1404 | typ = TOKT_RPAR; |
| | 1405 | break; |
| | 1406 | |
| | 1407 | case ',': |
| | 1408 | typ = TOKT_COMMA; |
| | 1409 | break; |
| | 1410 | |
| | 1411 | case '.': |
| | 1412 | /* check for '...' and floating-point numbers */ |
| | 1413 | if (p->getch() == '.' && p->getch_at(1) == '.') |
| | 1414 | { |
| | 1415 | p->inc(); |
| | 1416 | p->inc(); |
| | 1417 | typ = TOKT_ELLIPSIS; |
| | 1418 | } |
| | 1419 | else if (is_digit(p->getch())) |
| | 1420 | goto do_float; |
| | 1421 | else |
| | 1422 | typ = TOKT_DOT; |
| | 1423 | break; |
| | 1424 | |
| | 1425 | case '{': |
| | 1426 | typ = TOKT_LBRACE; |
| | 1427 | break; |
| | 1428 | |
| | 1429 | case '}': |
| | 1430 | typ = TOKT_RBRACE; |
| | 1431 | break; |
| | 1432 | |
| | 1433 | case '[': |
| | 1434 | typ = TOKT_LBRACK; |
| | 1435 | break; |
| | 1436 | |
| | 1437 | case ']': |
| | 1438 | typ = TOKT_RBRACK; |
| | 1439 | break; |
| | 1440 | |
| | 1441 | case '=': |
| | 1442 | /* check for '==' */ |
| | 1443 | if (p->getch() == '=') |
| | 1444 | { |
| | 1445 | p->inc(); |
| | 1446 | typ = TOKT_EQEQ; |
| | 1447 | } |
| | 1448 | else |
| | 1449 | typ = TOKT_EQ; |
| | 1450 | break; |
| | 1451 | |
| | 1452 | case ':': |
| | 1453 | /* check for '::' */ |
| | 1454 | if (p->getch() == ':') |
| | 1455 | { |
| | 1456 | p->inc(); |
| | 1457 | typ = TOKT_COLONCOLON; |
| | 1458 | } |
| | 1459 | else |
| | 1460 | typ = TOKT_COLON; |
| | 1461 | break; |
| | 1462 | |
| | 1463 | case '?': |
| | 1464 | typ = TOKT_QUESTION; |
| | 1465 | break; |
| | 1466 | |
| | 1467 | case '+': |
| | 1468 | /* check for '++' and '+=' */ |
| | 1469 | if (p->getch() == '+') |
| | 1470 | { |
| | 1471 | p->inc(); |
| | 1472 | typ = TOKT_INC; |
| | 1473 | } |
| | 1474 | else if (p->getch() == '=') |
| | 1475 | { |
| | 1476 | p->inc(); |
| | 1477 | typ = TOKT_PLUSEQ; |
| | 1478 | } |
| | 1479 | else |
| | 1480 | typ = TOKT_PLUS; |
| | 1481 | break; |
| | 1482 | |
| | 1483 | case '-': |
| | 1484 | /* check for '--', '->' and '-=' */ |
| | 1485 | if (p->getch() == '-') |
| | 1486 | { |
| | 1487 | p->inc(); |
| | 1488 | typ = TOKT_DEC; |
| | 1489 | } |
| | 1490 | else if (p->getch() == '=') |
| | 1491 | { |
| | 1492 | p->inc(); |
| | 1493 | typ = TOKT_MINEQ; |
| | 1494 | } |
| | 1495 | else if (p->getch() == '>') |
| | 1496 | { |
| | 1497 | p->inc(); |
| | 1498 | typ = TOKT_ARROW; |
| | 1499 | } |
| | 1500 | else |
| | 1501 | typ = TOKT_MINUS; |
| | 1502 | break; |
| | 1503 | |
| | 1504 | case '*': |
| | 1505 | /* check for '*=' */ |
| | 1506 | if (p->getch() == '=') |
| | 1507 | { |
| | 1508 | p->inc(); |
| | 1509 | typ = TOKT_TIMESEQ; |
| | 1510 | } |
| | 1511 | else |
| | 1512 | typ = TOKT_TIMES; |
| | 1513 | break; |
| | 1514 | |
| | 1515 | case '/': |
| | 1516 | /* check for '/=' */ |
| | 1517 | if (p->getch() == '=') |
| | 1518 | { |
| | 1519 | p->inc(); |
| | 1520 | typ = TOKT_DIVEQ; |
| | 1521 | } |
| | 1522 | else |
| | 1523 | typ = TOKT_DIV; |
| | 1524 | break; |
| | 1525 | |
| | 1526 | case '%': |
| | 1527 | /* check for '%=' */ |
| | 1528 | if (p->getch() == '=') |
| | 1529 | { |
| | 1530 | p->inc(); |
| | 1531 | typ = TOKT_MODEQ; |
| | 1532 | } |
| | 1533 | else |
| | 1534 | typ = TOKT_MOD; |
| | 1535 | break; |
| | 1536 | |
| | 1537 | case '>': |
| | 1538 | /* check for '>>=', '>>' and '>=' */ |
| | 1539 | if (p->getch() == '=') |
| | 1540 | { |
| | 1541 | p->inc(); |
| | 1542 | typ = TOKT_GE; |
| | 1543 | } |
| | 1544 | else if (p->getch() == '>') |
| | 1545 | { |
| | 1546 | /* check for the end of an embedded expression */ |
| | 1547 | if (in_embedding != 0 && *in_embedding) |
| | 1548 | { |
| | 1549 | *p = start; |
| | 1550 | return tokenize_string(p, tok, in_embedding); |
| | 1551 | } |
| | 1552 | |
| | 1553 | /* check for '>>=' */ |
| | 1554 | p->inc(); |
| | 1555 | if (p->getch() == '=') |
| | 1556 | { |
| | 1557 | p->inc(); |
| | 1558 | typ = TOKT_SHREQ; |
| | 1559 | } |
| | 1560 | else |
| | 1561 | typ = TOKT_SHR; |
| | 1562 | } |
| | 1563 | else |
| | 1564 | typ = TOKT_GT; |
| | 1565 | break; |
| | 1566 | |
| | 1567 | case '<': |
| | 1568 | /* check for '<<=', '<<', '<>', and '<=' */ |
| | 1569 | if (p->getch() == '=') |
| | 1570 | { |
| | 1571 | p->inc(); |
| | 1572 | typ = TOKT_LE; |
| | 1573 | } |
| | 1574 | else if (p->getch() == '<') |
| | 1575 | { |
| | 1576 | /* check for '<<=' */ |
| | 1577 | p->inc(); |
| | 1578 | if (p->getch() == '=') |
| | 1579 | { |
| | 1580 | p->inc(); |
| | 1581 | typ = TOKT_SHLEQ; |
| | 1582 | } |
| | 1583 | else |
| | 1584 | typ = TOKT_SHL; |
| | 1585 | } |
| | 1586 | #if 0 |
| | 1587 | else if (p->getch() == '>') |
| | 1588 | { |
| | 1589 | /* '<>' is obsolete */ |
| | 1590 | if (!expanding) |
| | 1591 | log_error(TCERR_LTGT_OBSOLETE); |
| | 1592 | |
| | 1593 | /* ... but for now proceed as though it's != */ |
| | 1594 | p->inc(); |
| | 1595 | typ = TOKT_NE; |
| | 1596 | } |
| | 1597 | #endif |
| | 1598 | else |
| | 1599 | typ = TOKT_LT; |
| | 1600 | break; |
| | 1601 | |
| | 1602 | case ';': |
| | 1603 | typ = TOKT_SEM; |
| | 1604 | break; |
| | 1605 | |
| | 1606 | case '&': |
| | 1607 | /* check for '&&' and '&=' */ |
| | 1608 | if (p->getch() == '&') |
| | 1609 | { |
| | 1610 | p->inc(); |
| | 1611 | typ = TOKT_ANDAND; |
| | 1612 | } |
| | 1613 | else if (p->getch() == '=') |
| | 1614 | { |
| | 1615 | p->inc(); |
| | 1616 | typ = TOKT_ANDEQ; |
| | 1617 | } |
| | 1618 | else |
| | 1619 | typ = TOKT_AND; |
| | 1620 | break; |
| | 1621 | |
| | 1622 | case '|': |
| | 1623 | /* check for '||' and '|=' */ |
| | 1624 | if (p->getch() == '|') |
| | 1625 | { |
| | 1626 | p->inc(); |
| | 1627 | typ = TOKT_OROR; |
| | 1628 | } |
| | 1629 | else if (p->getch() == '=') |
| | 1630 | { |
| | 1631 | p->inc(); |
| | 1632 | typ = TOKT_OREQ; |
| | 1633 | } |
| | 1634 | else |
| | 1635 | typ = TOKT_OR; |
| | 1636 | break; |
| | 1637 | |
| | 1638 | case '^': |
| | 1639 | /* check for '^=' */ |
| | 1640 | if (p->getch() == '=') |
| | 1641 | { |
| | 1642 | p->inc(); |
| | 1643 | typ = TOKT_XOREQ; |
| | 1644 | } |
| | 1645 | else |
| | 1646 | typ = TOKT_XOR; |
| | 1647 | break; |
| | 1648 | |
| | 1649 | case '!': |
| | 1650 | /* check for '!=' */ |
| | 1651 | if (p->getch() == '=') |
| | 1652 | { |
| | 1653 | p->inc(); |
| | 1654 | typ = TOKT_NE; |
| | 1655 | } |
| | 1656 | else |
| | 1657 | typ = TOKT_NOT; |
| | 1658 | break; |
| | 1659 | |
| | 1660 | case '~': |
| | 1661 | typ = TOKT_BNOT; |
| | 1662 | break; |
| | 1663 | |
| | 1664 | case '@': |
| | 1665 | typ = TOKT_AT; |
| | 1666 | break; |
| | 1667 | |
| | 1668 | case '#': |
| | 1669 | /* check for '##' and '#@' */ |
| | 1670 | if (p->getch() == '#') |
| | 1671 | { |
| | 1672 | p->inc(); |
| | 1673 | typ = TOKT_POUNDPOUND; |
| | 1674 | } |
| | 1675 | else if (p->getch() == '@') |
| | 1676 | { |
| | 1677 | p->inc(); |
| | 1678 | typ = TOKT_POUNDAT; |
| | 1679 | } |
| | 1680 | else |
| | 1681 | typ = TOKT_POUND; |
| | 1682 | break; |
| | 1683 | |
| | 1684 | default: |
| | 1685 | /* check to see if it's a symbol */ |
| | 1686 | if (is_syminit(cur)) |
| | 1687 | { |
| | 1688 | size_t len, full_len; |
| | 1689 | |
| | 1690 | /* |
| | 1691 | * scan the identifier (note that we've already skipped the |
| | 1692 | * first character, so we start out at length = 1) |
| | 1693 | */ |
| | 1694 | tokenize_symbol: |
| | 1695 | for (len = full_len = 1 ; is_sym(p->getch()) ; p->inc()) |
| | 1696 | { |
| | 1697 | /* count the full length */ |
| | 1698 | ++full_len; |
| | 1699 | |
| | 1700 | /* |
| | 1701 | * count this character if we're not over the maximum |
| | 1702 | * length |
| | 1703 | */ |
| | 1704 | if (len < TOK_SYM_MAX_LEN) |
| | 1705 | ++len; |
| | 1706 | } |
| | 1707 | |
| | 1708 | /* if we truncated the symbol, issue a warning */ |
| | 1709 | if (full_len != len && !expanding) |
| | 1710 | log_warning(TCERR_SYMBOL_TRUNCATED, |
| | 1711 | (int)full_len, start.getptr(), |
| | 1712 | (int)len, start.getptr()); |
| | 1713 | |
| | 1714 | /* it's a symbol */ |
| | 1715 | typ = TOKT_SYM; |
| | 1716 | } |
| | 1717 | else |
| | 1718 | { |
| | 1719 | /* invalid token */ |
| | 1720 | typ = TOKT_INVALID; |
| | 1721 | } |
| | 1722 | break; |
| | 1723 | } |
| | 1724 | |
| | 1725 | done: |
| | 1726 | /* set the type */ |
| | 1727 | tok->settyp(typ); |
| | 1728 | |
| | 1729 | /* set the text */ |
| | 1730 | tok->set_text(start.getptr(), p->getptr() - start.getptr()); |
| | 1731 | |
| | 1732 | /* return the type */ |
| | 1733 | return typ; |
| | 1734 | } |
| | 1735 | |
| | 1736 | /* |
| | 1737 | * get the next token, limiting to the length of the source buffer |
| | 1738 | */ |
| | 1739 | tc_toktyp_t CTcTokenizer::next_on_line(const CTcTokString *srcbuf, |
| | 1740 | utf8_ptr *p, CTcToken *tok, |
| | 1741 | int *in_embedding, int expanding) |
| | 1742 | { |
| | 1743 | /* get the next token */ |
| | 1744 | next_on_line(p, tok, in_embedding, expanding); |
| | 1745 | |
| | 1746 | /* if the token is past the end of the line, return EOF */ |
| | 1747 | if (tok->get_text() >= srcbuf->get_text_end()) |
| | 1748 | { |
| | 1749 | /* set the token to indicate end of line */ |
| | 1750 | tok->settyp(TOKT_EOF); |
| | 1751 | |
| | 1752 | /* set the token to point to the end of the buffer */ |
| | 1753 | tok->set_text(srcbuf->get_text_end(), 0); |
| | 1754 | } |
| | 1755 | |
| | 1756 | /* return the token type */ |
| | 1757 | return tok->gettyp(); |
| | 1758 | } |
| | 1759 | |
| | 1760 | /* |
| | 1761 | * Get the next token on the line, translating escapes in strings. This |
| | 1762 | * updates the line buffer in-place to incorporate the translated string |
| | 1763 | * text. |
| | 1764 | */ |
| | 1765 | tc_toktyp_t CTcTokenizer::next_on_line_xlat(utf8_ptr *p, CTcToken *tok, |
| | 1766 | int *in_embedding) |
| | 1767 | { |
| | 1768 | /* skip whitespace */ |
| | 1769 | skip_ws_and_markers(p); |
| | 1770 | |
| | 1771 | /* if this is a string, translate escapes */ |
| | 1772 | switch(p->getch()) |
| | 1773 | { |
| | 1774 | case '"': |
| | 1775 | case '\'': |
| | 1776 | /* translate the string */ |
| | 1777 | return xlat_string(p, tok, in_embedding); |
| | 1778 | |
| | 1779 | case '>': |
| | 1780 | /* if we're in an embedding, check for '>>' */ |
| | 1781 | if (in_embedding != 0 && *in_embedding && p->getch_at(1) == '>') |
| | 1782 | return tokenize_string(p, tok, in_embedding); |
| | 1783 | |
| | 1784 | /* use the default case */ |
| | 1785 | goto do_normal; |
| | 1786 | |
| | 1787 | default: |
| | 1788 | do_normal: |
| | 1789 | /* for anything else, use the default tokenizer */ |
| | 1790 | return next_on_line(p, tok, in_embedding, FALSE); |
| | 1791 | } |
| | 1792 | } |
| | 1793 | |
| | 1794 | /* |
| | 1795 | * Look up a keyword |
| | 1796 | */ |
| | 1797 | int CTcTokenizer::look_up_keyword(const CTcToken *tok, tc_toktyp_t *kwtok) |
| | 1798 | { |
| | 1799 | CTcHashEntryKw *kw; |
| | 1800 | |
| | 1801 | /* look it up in the keyword table */ |
| | 1802 | kw = (CTcHashEntryKw *)kw_->find(tok->get_text(), tok->get_text_len()); |
| | 1803 | if (kw != 0) |
| | 1804 | { |
| | 1805 | /* we found the keyword - set 'kw' to the keyword token id */ |
| | 1806 | *kwtok = kw->get_tok_id(); |
| | 1807 | |
| | 1808 | /* tell the caller we found it */ |
| | 1809 | return TRUE; |
| | 1810 | } |
| | 1811 | else |
| | 1812 | { |
| | 1813 | /* tell the caller it's not a keyword */ |
| | 1814 | return FALSE; |
| | 1815 | } |
| | 1816 | } |
| | 1817 | |
| | 1818 | /* |
| | 1819 | * Get the next token on the line, translating escape sequences in |
| | 1820 | * strings, and storing strings and symbols in the source block list. |
| | 1821 | * This routine also translates keywords for token types. |
| | 1822 | */ |
| | 1823 | tc_toktyp_t CTcTokenizer::next_on_line_xlat_keep() |
| | 1824 | { |
| | 1825 | tc_toktyp_t typ; |
| | 1826 | |
| | 1827 | /* keep going until we find a valid symbol */ |
| | 1828 | for (;;) |
| | 1829 | { |
| | 1830 | /* skip whitespace and macro expansion flags */ |
| | 1831 | skip_ws_and_markers(&p_); |
| | 1832 | |
| | 1833 | /* see what we have */ |
| | 1834 | switch(p_.getch()) |
| | 1835 | { |
| | 1836 | case '"': |
| | 1837 | case '\'': |
| | 1838 | /* it's a string - translate and save it */ |
| | 1839 | return xlat_string_to_src(&main_in_embedding_, FALSE); |
| | 1840 | |
| | 1841 | case '>': |
| | 1842 | /* if we're in an embedding, this is the end of it */ |
| | 1843 | if (main_in_embedding_ && p_.getch_at(1) == '>') |
| | 1844 | return xlat_string_to_src(&main_in_embedding_, FALSE); |
| | 1845 | |
| | 1846 | /* use the normal parsing */ |
| | 1847 | goto do_normal; |
| | 1848 | |
| | 1849 | default: |
| | 1850 | do_normal: |
| | 1851 | /* for anything else, use the default tokenizer */ |
| | 1852 | typ = next_on_line(&p_, &curtok_, &main_in_embedding_, FALSE); |
| | 1853 | |
| | 1854 | /* check the token type */ |
| | 1855 | switch(typ) |
| | 1856 | { |
| | 1857 | case TOKT_SYM: |
| | 1858 | /* symbol */ |
| | 1859 | { |
| | 1860 | const char *p; |
| | 1861 | CTcHashEntryKw *kw; |
| | 1862 | |
| | 1863 | /* look it up in the keyword table */ |
| | 1864 | kw = (CTcHashEntryKw *)kw_->find(curtok_.get_text(), |
| | 1865 | curtok_.get_text_len()); |
| | 1866 | if (kw != 0) |
| | 1867 | { |
| | 1868 | /* replace the token with the keyword token type */ |
| | 1869 | typ = kw->get_tok_id(); |
| | 1870 | curtok_.settyp(typ); |
| | 1871 | } |
| | 1872 | else |
| | 1873 | { |
| | 1874 | /* ordinary symbol - save the text */ |
| | 1875 | p = store_source(curtok_.get_text(), |
| | 1876 | curtok_.get_text_len()); |
| | 1877 | |
| | 1878 | /* |
| | 1879 | * change the token's text to point to the |
| | 1880 | * source block, so that this token's text |
| | 1881 | * pointer will remain permanently valid (the |
| | 1882 | * original copy, in the source line buffer, |
| | 1883 | * will be overwritten as soon as we read |
| | 1884 | * another source line; we don't want the caller |
| | 1885 | * to have to worry about this, so we return the |
| | 1886 | * permanent copy) |
| | 1887 | */ |
| | 1888 | curtok_.set_text(p, curtok_.get_text_len()); |
| | 1889 | } |
| | 1890 | } |
| | 1891 | break; |
| | 1892 | |
| | 1893 | case TOKT_FLOAT: |
| | 1894 | /* floating-point number */ |
| | 1895 | { |
| | 1896 | const char *p; |
| | 1897 | |
| | 1898 | /* |
| | 1899 | * save the text so that it remains permanently |
| | 1900 | * valid - we keep track of floats by the original |
| | 1901 | * text, and let the code generator produce the |
| | 1902 | * appropriate object file representation |
| | 1903 | */ |
| | 1904 | p = store_source(curtok_.get_text(), |
| | 1905 | curtok_.get_text_len()); |
| | 1906 | curtok_.set_text(p, curtok_.get_text_len()); |
| | 1907 | } |
| | 1908 | break; |
| | 1909 | |
| | 1910 | case TOKT_INVALID: |
| | 1911 | /* |
| | 1912 | * check for unmappable characters - these will show up as |
| | 1913 | * Unicode U+FFFD, the "replacement character"; log it as |
| | 1914 | * 'unmappable' if applicable, otherwise as an invalid |
| | 1915 | * character |
| | 1916 | */ |
| | 1917 | if (utf8_ptr::s_getch(curtok_.get_text()) == 0xfffd) |
| | 1918 | log_error_curtok(TCERR_UNMAPPABLE_CHAR); |
| | 1919 | else |
| | 1920 | log_error_curtok(TCERR_INVALID_CHAR); |
| | 1921 | |
| | 1922 | /* skip this character */ |
| | 1923 | p_.inc(); |
| | 1924 | |
| | 1925 | /* keep going */ |
| | 1926 | continue; |
| | 1927 | |
| | 1928 | default: |
| | 1929 | break; |
| | 1930 | } |
| | 1931 | } |
| | 1932 | |
| | 1933 | /* return the type */ |
| | 1934 | return typ; |
| | 1935 | } |
| | 1936 | } |
| | 1937 | |
| | 1938 | |
| | 1939 | /* |
| | 1940 | * Translate the string at the current token position in the input |
| | 1941 | * stream to the source block list. |
| | 1942 | */ |
| | 1943 | tc_toktyp_t CTcTokenizer::xlat_string_to_src(int *in_embedding, |
| | 1944 | int force_embed_end) |
| | 1945 | { |
| | 1946 | tc_toktyp_t typ; |
| | 1947 | |
| | 1948 | /* |
| | 1949 | * Reserve space for the entire rest of the line. This is |
| | 1950 | * conservative, in that we will definitely need less space than |
| | 1951 | * this. This might cause us to waste a little space here and |
| | 1952 | * there, since we will over-allocate when we have a short string |
| | 1953 | * early in a long line, but this will save us the time of scanning |
| | 1954 | * the string twice just to see how long it is. |
| | 1955 | */ |
| | 1956 | reserve_source(curbuf_->get_text_len() - |
| | 1957 | (p_.getptr() - curbuf_->get_text())); |
| | 1958 | |
| | 1959 | /* translate into the source block */ |
| | 1960 | typ = xlat_string_to(src_ptr_, &p_, &curtok_, |
| | 1961 | in_embedding, force_embed_end); |
| | 1962 | |
| | 1963 | /* commit the space in the source block */ |
| | 1964 | commit_source(curtok_.get_text_len() + 1); |
| | 1965 | |
| | 1966 | /* return the string token */ |
| | 1967 | return typ; |
| | 1968 | } |
| | 1969 | |
| | 1970 | /* |
| | 1971 | * Translate a string, setting up the token structure for the string, |
| | 1972 | * and writing the translated version of the string directly over the |
| | 1973 | * original source buffer of the string. |
| | 1974 | * |
| | 1975 | * Since a translated string can only shrink (because a translated |
| | 1976 | * escape sequence is always shorter than the original source version), |
| | 1977 | * we don't need a separate buffer, but can simply translate into the |
| | 1978 | * source buffer, overwriting the original string as we go. |
| | 1979 | */ |
| | 1980 | tc_toktyp_t CTcTokenizer::xlat_string(utf8_ptr *p, CTcToken *tok, |
| | 1981 | int *in_embedding) |
| | 1982 | { |
| | 1983 | char *dst; |
| | 1984 | |
| | 1985 | /* |
| | 1986 | * write the translated string over the original string's text, |
| | 1987 | * starting at the character after the quote |
| | 1988 | */ |
| | 1989 | dst = p->getptr() + 1; |
| | 1990 | |
| | 1991 | /* translate the string into our destination buffer */ |
| | 1992 | return xlat_string_to(dst, p, tok, in_embedding, FALSE); |
| | 1993 | } |
| | 1994 | |
| | 1995 | /* |
| | 1996 | * Translate a string, setting up the token structure for the string. |
| | 1997 | * We will update the line buffer in-place to incorporate the translated |
| | 1998 | * string text. |
| | 1999 | */ |
| | 2000 | tc_toktyp_t CTcTokenizer::xlat_string_to(char *dstp, utf8_ptr *p, |
| | 2001 | CTcToken *tok, int *in_embedding, |
| | 2002 | int force_embed_end) |
| | 2003 | { |
| | 2004 | utf8_ptr dst; |
| | 2005 | wchar_t qu; |
| | 2006 | utf8_ptr start, end; |
| | 2007 | int i; |
| | 2008 | |
| | 2009 | /* set up our output utf8 pointer */ |
| | 2010 | dst.set(dstp); |
| | 2011 | |
| | 2012 | /* note the open quote character */ |
| | 2013 | qu = p->getch(); |
| | 2014 | |
| | 2015 | /* set the appropriate string token type */ |
| | 2016 | tok->settyp(qu == '"' |
| | 2017 | ? TOKT_DSTR |
| | 2018 | : (qu == '>' ? TOKT_DSTR_END : TOKT_SSTR)); |
| | 2019 | |
| | 2020 | /* skip the open quote */ |
| | 2021 | p->inc(); |
| | 2022 | |
| | 2023 | /* skip the second '>' if it's a '>>' */ |
| | 2024 | if (force_embed_end) |
| | 2025 | { |
| | 2026 | /* |
| | 2027 | * they want us to assume the embedding ends here, regardless of |
| | 2028 | * what we're looking at - act the same as though we had |
| | 2029 | * actually seen '>>', but don't skip any input (in fact, back |
| | 2030 | * up one, since we already skipped one character for what we |
| | 2031 | * had thought was the open quote |
| | 2032 | */ |
| | 2033 | p->dec(); |
| | 2034 | |
| | 2035 | /* clear the caller's in-embedding status */ |
| | 2036 | *in_embedding = FALSE; |
| | 2037 | |
| | 2038 | /* close with a double quote */ |
| | 2039 | qu = '"'; |
| | 2040 | |
| | 2041 | /* it's a double-quoted string continuation */ |
| | 2042 | tok->settyp(TOKT_DSTR_END); |
| | 2043 | } |
| | 2044 | else if (qu == '>') |
| | 2045 | { |
| | 2046 | /* skip the second '>' */ |
| | 2047 | p->inc(); |
| | 2048 | |
| | 2049 | /* clear the caller's in-embedding status */ |
| | 2050 | *in_embedding = FALSE; |
| | 2051 | |
| | 2052 | /* close with a double quote */ |
| | 2053 | qu = '"'; |
| | 2054 | } |
| | 2055 | |
| | 2056 | /* remember where the string's contents start */ |
| | 2057 | start = *p; |
| | 2058 | |
| | 2059 | /* scan the string and translate quotes */ |
| | 2060 | for (;;) |
| | 2061 | { |
| | 2062 | wchar_t cur; |
| | 2063 | |
| | 2064 | /* get this character */ |
| | 2065 | cur = p->getch(); |
| | 2066 | |
| | 2067 | /* if this is the matching quote, we're done */ |
| | 2068 | if (cur == qu) |
| | 2069 | break; |
| | 2070 | |
| | 2071 | /* |
| | 2072 | * if we find an end-of-line within the string, it's an error - |
| | 2073 | * we should always splice strings together onto a single line |
| | 2074 | * before starting to tokenize the line |
| | 2075 | */ |
| | 2076 | if (cur == '\0') |
| | 2077 | { |
| | 2078 | size_t len; |
| | 2079 | utf8_ptr p; |
| | 2080 | |
| | 2081 | /* note where the string ends */ |
| | 2082 | end = dst; |
| | 2083 | |
| | 2084 | /* set the token's text pointer */ |
| | 2085 | tok->set_text(dstp, end.getptr() - dstp); |
| | 2086 | |
| | 2087 | /* null-terminate the result string */ |
| | 2088 | dst.setch('\0'); |
| | 2089 | |
| | 2090 | /* |
| | 2091 | * get the length of the unterminated string so far, but for |
| | 2092 | * error logging, limit the length to twenty characters -- |
| | 2093 | * we just want to give the user enough information to find |
| | 2094 | * the string in error, without making the error message |
| | 2095 | * huge |
| | 2096 | */ |
| | 2097 | p.set(dstp); |
| | 2098 | len = p.len(end.getptr() - dstp); |
| | 2099 | if (len > 20) |
| | 2100 | len = p.bytelen(20); |
| | 2101 | |
| | 2102 | /* |
| | 2103 | * Check for a special heuristic case. If the string was of |
| | 2104 | * zero length, and we have something sitting in our |
| | 2105 | * unsplice buffer, here's what probably happened: the input |
| | 2106 | * was missing a ">>" sequence at the end of an embedded |
| | 2107 | * expression, and the parser told us to put it back in. We |
| | 2108 | * had earlier decided we needed to splice up to a quote to |
| | 2109 | * end what looked to us like an unterminated string. If |
| | 2110 | * this is the case, we and the parser are working at cross |
| | 2111 | * purposes; the parser is smarter than we are, so we should |
| | 2112 | * synchronize with it. |
| | 2113 | */ |
| | 2114 | if (tok->get_text_len() == 0 |
| | 2115 | && qu == '"' |
| | 2116 | && unsplicebuf_.get_text_len() != 0) |
| | 2117 | { |
| | 2118 | char *buf; |
| | 2119 | |
| | 2120 | /* |
| | 2121 | * we must have spliced a line to finish a string - |
| | 2122 | * insert the quote into the splice buffer, and ignore |
| | 2123 | * it here |
| | 2124 | */ |
| | 2125 | |
| | 2126 | /* |
| | 2127 | * make sure there's room for one more character (plus a |
| | 2128 | * null byte) |
| | 2129 | */ |
| | 2130 | unsplicebuf_.ensure_space(unsplicebuf_.get_text_len() + 2); |
| | 2131 | |
| | 2132 | /* get the buffer pointer */ |
| | 2133 | buf = unsplicebuf_.get_buf(); |
| | 2134 | |
| | 2135 | /* make room for the '"' */ |
| | 2136 | memmove(buf + 1, buf, unsplicebuf_.get_text_len()); |
| | 2137 | unsplicebuf_.set_text_len(unsplicebuf_.get_text_len() + 1); |
| | 2138 | |
| | 2139 | /* add the '"' */ |
| | 2140 | *buf = '"'; |
| | 2141 | |
| | 2142 | /* |
| | 2143 | * return the 'null token' to tell the caller to try |
| | 2144 | * again - do not log an error at this point |
| | 2145 | */ |
| | 2146 | return TOKT_NULLTOK; |
| | 2147 | } |
| | 2148 | |
| | 2149 | /* log the error */ |
| | 2150 | log_error(TCERR_UNTERM_STRING, |
| | 2151 | (char)qu, (int)len, dstp, (char)qu); |
| | 2152 | |
| | 2153 | /* return the string type */ |
| | 2154 | return tok->gettyp(); |
| | 2155 | } |
| | 2156 | |
| | 2157 | /* if this is an escape, translate it */ |
| | 2158 | if (cur == '\\') |
| | 2159 | { |
| | 2160 | long acc; |
| | 2161 | |
| | 2162 | /* get the character after the escape */ |
| | 2163 | p->inc(); |
| | 2164 | cur = p->getch(); |
| | 2165 | |
| | 2166 | /* see what we have */ |
| | 2167 | switch(cur) |
| | 2168 | { |
| | 2169 | case '^': |
| | 2170 | /* caps - 0x000F */ |
| | 2171 | cur = 0x000F; |
| | 2172 | break; |
| | 2173 | |
| | 2174 | case 'v': |
| | 2175 | /* miniscules - 0x000E */ |
| | 2176 | cur = 0x000E; |
| | 2177 | break; |
| | 2178 | |
| | 2179 | case 'b': |
| | 2180 | /* blank line - 0x000B */ |
| | 2181 | cur = 0x000B; |
| | 2182 | break; |
| | 2183 | |
| | 2184 | case ' ': |
| | 2185 | /* quoted space - 0x0015 */ |
| | 2186 | cur = 0x0015; |
| | 2187 | break; |
| | 2188 | |
| | 2189 | case 'n': |
| | 2190 | /* newline - explicitly use Unicode 10 character */ |
| | 2191 | cur = 10; |
| | 2192 | break; |
| | 2193 | |
| | 2194 | case 't': |
| | 2195 | /* tab - explicitly use Unicode 9 character */ |
| | 2196 | cur = 9; |
| | 2197 | break; |
| | 2198 | |
| | 2199 | case 'u': |
| | 2200 | /* |
| | 2201 | * Hex unicode character number. Read up to 4 hex |
| | 2202 | * digits that follow the 'u', and use that as a Unicode |
| | 2203 | * character ID. |
| | 2204 | */ |
| | 2205 | for (i = 0, acc = 0, p->inc() ; i < 4 ; ++i, p->inc()) |
| | 2206 | { |
| | 2207 | /* get the next character */ |
| | 2208 | cur = p->getch(); |
| | 2209 | |
| | 2210 | /* |
| | 2211 | * if it's another hex digit, add it into the |
| | 2212 | * accumulator; otherwise, we're done |
| | 2213 | */ |
| | 2214 | if (is_xdigit(cur)) |
| | 2215 | acc = 16*acc + value_of_xdigit(cur); |
| | 2216 | else |
| | 2217 | break; |
| | 2218 | } |
| | 2219 | |
| | 2220 | /* use the accumulated value as the character number */ |
| | 2221 | dst.setch((wchar_t)acc); |
| | 2222 | |
| | 2223 | /* |
| | 2224 | * continue with the current character, since we've |
| | 2225 | * already skipped ahead to the next one |
| | 2226 | */ |
| | 2227 | continue; |
| | 2228 | |
| | 2229 | case '0': |
| | 2230 | case '1': |
| | 2231 | case '2': |
| | 2232 | case '3': |
| | 2233 | case '4': |
| | 2234 | case '5': |
| | 2235 | case '6': |
| | 2236 | case '7': |
| | 2237 | /* |
| | 2238 | * Octal ASCII character number. Accumulate up to three |
| | 2239 | * octal numbers, and use the result as a character ID. |
| | 2240 | */ |
| | 2241 | for (i = 0, acc = 0 ; i < 3 ; ++i, p->inc()) |
| | 2242 | { |
| | 2243 | /* get the next character */ |
| | 2244 | cur = p->getch(); |
| | 2245 | |
| | 2246 | /* |
| | 2247 | * if it's another digit, and it would leave our |
| | 2248 | * result in the 0-255 range, count it; if not, |
| | 2249 | * we're done |
| | 2250 | */ |
| | 2251 | if (is_odigit(cur)) |
| | 2252 | { |
| | 2253 | long new_acc; |
| | 2254 | |
| | 2255 | /* compute the new value */ |
| | 2256 | new_acc = 8*acc + value_of_odigit(cur); |
| | 2257 | |
| | 2258 | /* if this would be too high, don't count it */ |
| | 2259 | if (new_acc > 255) |
| | 2260 | break; |
| | 2261 | else |
| | 2262 | acc = new_acc; |
| | 2263 | } |
| | 2264 | else |
| | 2265 | break; |
| | 2266 | } |
| | 2267 | |
| | 2268 | /* use the accumulated value as the character number */ |
| | 2269 | dst.setch((wchar_t)acc); |
| | 2270 | |
| | 2271 | /* |
| | 2272 | * continue with the current character, since we've |
| | 2273 | * already skipped ahead to the next one |
| | 2274 | */ |
| | 2275 | continue; |
| | 2276 | |
| | 2277 | case 'x': |
| | 2278 | /* |
| | 2279 | * Hex ASCII character number. Read up to two hex |
| | 2280 | * digits as a character number. |
| | 2281 | */ |
| | 2282 | for (i = 0, acc = 0, p->inc() ; i < 2 ; ++i, p->inc()) |
| | 2283 | { |
| | 2284 | /* get the next character */ |
| | 2285 | cur = p->getch(); |
| | 2286 | |
| | 2287 | /* |
| | 2288 | * if it's another hex digit, add it into the |
| | 2289 | * accumulator; otherwise, we're done |
| | 2290 | */ |
| | 2291 | if (is_xdigit(cur)) |
| | 2292 | acc = 16*acc + value_of_xdigit(cur); |
| | 2293 | else |
| | 2294 | break; |
| | 2295 | } |
| | 2296 | |
| | 2297 | /* use the accumulated value as the character number */ |
| | 2298 | dst.setch((wchar_t)acc); |
| | 2299 | |
| | 2300 | /* |
| | 2301 | * continue with the current character, since we've |
| | 2302 | * already skipped ahead to the next one |
| | 2303 | */ |
| | 2304 | continue; |
| | 2305 | |
| | 2306 | default: |
| | 2307 | /* copy anything else as-is */ |
| | 2308 | break; |
| | 2309 | } |
| | 2310 | } |
| | 2311 | else if (in_embedding != 0 && !*in_embedding |
| | 2312 | && cur == '<' && p->getch_at(1) == '<') |
| | 2313 | { |
| | 2314 | /* |
| | 2315 | * it's the start of an embedded expression - change the |
| | 2316 | * type to so indicate |
| | 2317 | */ |
| | 2318 | tok->settyp(tok->gettyp() == TOKT_DSTR |
| | 2319 | ? TOKT_DSTR_START : TOKT_DSTR_MID); |
| | 2320 | |
| | 2321 | /* tell the caller we're in an embedding */ |
| | 2322 | *in_embedding = TRUE; |
| | 2323 | |
| | 2324 | /* stop scanning */ |
| | 2325 | break; |
| | 2326 | } |
| | 2327 | |
| | 2328 | /* copy this character to the output position */ |
| | 2329 | dst.setch(cur); |
| | 2330 | |
| | 2331 | /* get the next character */ |
| | 2332 | p->inc(); |
| | 2333 | } |
| | 2334 | |
| | 2335 | /* note where the string ends */ |
| | 2336 | end = dst; |
| | 2337 | |
| | 2338 | /* set the token's text pointer */ |
| | 2339 | tok->set_text(dstp, end.getptr() - dstp); |
| | 2340 | |
| | 2341 | /* null-terminate the result string */ |
| | 2342 | dst.setch('\0'); |
| | 2343 | |
| | 2344 | /* skip an extra character if this is the start of an embedding */ |
| | 2345 | if (p->getch() == '<') |
| | 2346 | p->inc(); |
| | 2347 | |
| | 2348 | /* skip the closing quote */ |
| | 2349 | p->inc(); |
| | 2350 | |
| | 2351 | /* return the string type */ |
| | 2352 | return tok->gettyp(); |
| | 2353 | } |
| | 2354 | |
| | 2355 | |
| | 2356 | /* |
| | 2357 | * Skip a string, setting up the token structure for the string. This |
| | 2358 | * routine only parses to the end of the line; if the line ends with the |
| | 2359 | * string unterminated, we'll flag an error |
| | 2360 | */ |
| | 2361 | tc_toktyp_t CTcTokenizer::tokenize_string(utf8_ptr *p, CTcToken *tok, |
| | 2362 | int *in_embedding) |
| | 2363 | { |
| | 2364 | const char *start; |
| | 2365 | const char *contents_start; |
| | 2366 | const char *contents_end; |
| | 2367 | tc_toktyp_t typ; |
| | 2368 | wchar_t qu; |
| | 2369 | int allow_embedding; |
| | 2370 | |
| | 2371 | /* remember where the text starts */ |
| | 2372 | start = p->getptr(); |
| | 2373 | |
| | 2374 | /* note the quote type */ |
| | 2375 | qu = p->getch(); |
| | 2376 | |
| | 2377 | /* skip the quote in the input */ |
| | 2378 | p->inc(); |
| | 2379 | |
| | 2380 | /* determine the token type based on the quote type */ |
| | 2381 | switch(qu) |
| | 2382 | { |
| | 2383 | case '\'': |
| | 2384 | /* single-quoted string */ |
| | 2385 | typ = TOKT_SSTR; |
| | 2386 | allow_embedding = FALSE; |
| | 2387 | break; |
| | 2388 | |
| | 2389 | case '>': |
| | 2390 | /* |
| | 2391 | * this must be the next part of a string with embeddings; for now, |
| | 2392 | * assume it's the end of the string, although it may just turn out |
| | 2393 | * to be the middle |
| | 2394 | */ |
| | 2395 | typ = TOKT_DSTR_END; |
| | 2396 | allow_embedding = (in_embedding != 0); |
| | 2397 | |
| | 2398 | /* skip the extra '>' character */ |
| | 2399 | p->inc(); |
| | 2400 | |
| | 2401 | /* clear the embedding flag */ |
| | 2402 | if (in_embedding != 0) |
| | 2403 | *in_embedding = FALSE; |
| | 2404 | |
| | 2405 | /* look for a closing double quote */ |
| | 2406 | qu = '"'; |
| | 2407 | break; |
| | 2408 | |
| | 2409 | case '"': |
| | 2410 | /* regular double-quoted string */ |
| | 2411 | typ = TOKT_DSTR; |
| | 2412 | allow_embedding = (in_embedding != 0); |
| | 2413 | break; |
| | 2414 | |
| | 2415 | default: |
| | 2416 | /* anything else is invalid */ |
| | 2417 | typ = TOKT_INVALID; |
| | 2418 | allow_embedding = FALSE; |
| | 2419 | break; |
| | 2420 | } |
| | 2421 | |
| | 2422 | /* this is where the string's contents start */ |
| | 2423 | contents_start = p->getptr(); |
| | 2424 | |
| | 2425 | /* scan the string */ |
| | 2426 | for (;;) |
| | 2427 | { |
| | 2428 | wchar_t cur; |
| | 2429 | |
| | 2430 | /* get the current character */ |
| | 2431 | cur = p->getch(); |
| | 2432 | |
| | 2433 | /* see what we have */ |
| | 2434 | if (cur == '\\') |
| | 2435 | { |
| | 2436 | /* escape sequence - skip an extra character */ |
| | 2437 | p->inc(); |
| | 2438 | } |
| | 2439 | else if (cur == '<' && allow_embedding && p->getch_at(1) == '<') |
| | 2440 | { |
| | 2441 | /* |
| | 2442 | * it's the start of an embedded expression - return the |
| | 2443 | * appropriate embedded string part type |
| | 2444 | */ |
| | 2445 | if (typ == TOKT_DSTR) |
| | 2446 | typ = TOKT_DSTR_START; |
| | 2447 | else |
| | 2448 | typ = TOKT_DSTR_MID; |
| | 2449 | |
| | 2450 | /* remember that we're in an embedding in the token stream */ |
| | 2451 | *in_embedding = TRUE; |
| | 2452 | |
| | 2453 | /* this is where the contents end */ |
| | 2454 | contents_end = p->getptr(); |
| | 2455 | |
| | 2456 | /* skip the two embedding characters */ |
| | 2457 | p->inc(); |
| | 2458 | p->inc(); |
| | 2459 | |
| | 2460 | /* we're done - set the text in the token */ |
| | 2461 | tok->set_text(start, p->getptr() - start); |
| | 2462 | |
| | 2463 | /* done */ |
| | 2464 | break; |
| | 2465 | } |
| | 2466 | else if (cur == qu) |
| | 2467 | { |
| | 2468 | /* this is where the contents end */ |
| | 2469 | contents_end = p->getptr(); |
| | 2470 | |
| | 2471 | /* skip the closing quote */ |
| | 2472 | p->inc(); |
| | 2473 | |
| | 2474 | /* we're done - set the text in the token */ |
| | 2475 | tok->set_text(start, p->getptr() - start); |
| | 2476 | |
| | 2477 | /* done */ |
| | 2478 | break; |
| | 2479 | } |
| | 2480 | else if (cur == '\0') |
| | 2481 | { |
| | 2482 | /* this is where the contents end */ |
| | 2483 | contents_end = p->getptr(); |
| | 2484 | |
| | 2485 | /* |
| | 2486 | * We have an unterminated string. If we're evaluating a |
| | 2487 | * preprocessor constant expression, log an error; otherwise |
| | 2488 | * let it go for now, since we'll catch the error during the |
| | 2489 | * normal tokenizing pass for parsing. |
| | 2490 | */ |
| | 2491 | if (G_tok->in_pp_expr_) |
| | 2492 | log_error(TCERR_PP_UNTERM_STRING); |
| | 2493 | |
| | 2494 | /* set the partial text */ |
| | 2495 | tok->set_text(start, p->getptr() - start); |
| | 2496 | |
| | 2497 | /* end of line - return with the string unfinished */ |
| | 2498 | break; |
| | 2499 | } |
| | 2500 | |
| | 2501 | /* skip this charater of input */ |
| | 2502 | p->inc(); |
| | 2503 | } |
| | 2504 | |
| | 2505 | /* |
| | 2506 | * if we're not in preprocessor mode, and we're saving string text, |
| | 2507 | * write the string to the string text output file |
| | 2508 | */ |
| | 2509 | if (!G_tok->in_pp_expr_ && G_tok->string_fp_ != 0 |
| | 2510 | && contents_start != contents_end) |
| | 2511 | { |
| | 2512 | /* write the line, translating back to the source character set */ |
| | 2513 | G_tok->string_fp_map_ |
| | 2514 | ->write_file(G_tok->string_fp_, contents_start, |
| | 2515 | (size_t)(contents_end - contents_start)); |
| | 2516 | |
| | 2517 | /* add a newline */ |
| | 2518 | osfwb(G_tok->string_fp_, "\n", 1); |
| | 2519 | } |
| | 2520 | |
| | 2521 | /* set the type in the token */ |
| | 2522 | tok->settyp(typ); |
| | 2523 | |
| | 2524 | /* return the token type */ |
| | 2525 | return tok->gettyp(); |
| | 2526 | } |
| | 2527 | |
| | 2528 | |
| | 2529 | /* ------------------------------------------------------------------------ */ |
| | 2530 | /* |
| | 2531 | * Read a source line and handle preprocessor directives. This routine |
| | 2532 | * will transparently handle #include, #define, and other directives; |
| | 2533 | * when this routine returns, the input buffer will have a line of text |
| | 2534 | * that contains no # directive. |
| | 2535 | * |
| | 2536 | * Returns zero on success, non-zero upon reaching the end of the input. |
| | 2537 | */ |
| | 2538 | int CTcTokenizer::read_line_pp() |
| | 2539 | { |
| | 2540 | int started_in_string; |
| | 2541 | int ofs; |
| | 2542 | |
| | 2543 | /* |
| | 2544 | * Read the next line from the input. If that fails, return an end |
| | 2545 | * of file indication. |
| | 2546 | */ |
| | 2547 | ofs = read_line(FALSE); |
| | 2548 | if (ofs == -1) |
| | 2549 | return 1; |
| | 2550 | |
| | 2551 | /* |
| | 2552 | * before we process comments, note whether or not the line started |
| | 2553 | * out within a character string |
| | 2554 | */ |
| | 2555 | started_in_string = (in_quote_ != '\0'); |
| | 2556 | |
| | 2557 | /* set up our source pointer to the start of the new line */ |
| | 2558 | start_new_line(&linebuf_, ofs); |
| | 2559 | |
| | 2560 | /* skip leading whitespace */ |
| | 2561 | while (is_space(p_.getch())) |
| | 2562 | p_.inc(); |
| | 2563 | |
| | 2564 | /* |
| | 2565 | * If this line begins with a '#', process the directive. Ignore |
| | 2566 | * any initial '#' if the line started off in a string. |
| | 2567 | */ |
| | 2568 | if (!started_in_string && p_.getch() == '#' && allow_pp_) |
| | 2569 | { |
| | 2570 | struct pp_kw_def |
| | 2571 | { |
| | 2572 | const char *kw; |
| | 2573 | int process_in_false_if; |
| | 2574 | void (CTcTokenizer::*func)(); |
| | 2575 | }; |
| | 2576 | static pp_kw_def kwlist[] = |
| | 2577 | { |
| | 2578 | { "charset", FALSE, &CTcTokenizer::pp_charset }, |
| | 2579 | { "pragma", FALSE, &CTcTokenizer::pp_pragma }, |
| | 2580 | { "include", FALSE, &CTcTokenizer::pp_include }, |
| | 2581 | { "define", FALSE, &CTcTokenizer::pp_define }, |
| | 2582 | { "if", TRUE, &CTcTokenizer::pp_if }, |
| | 2583 | { "ifdef", TRUE, &CTcTokenizer::pp_ifdef }, |
| | 2584 | { "ifndef", TRUE, &CTcTokenizer::pp_ifndef }, |
| | 2585 | { "else", TRUE, &CTcTokenizer::pp_else }, |
| | 2586 | { "elif", TRUE, &CTcTokenizer::pp_elif }, |
| | 2587 | { "endif", TRUE, &CTcTokenizer::pp_endif }, |
| | 2588 | { "error", FALSE, &CTcTokenizer::pp_error }, |
| | 2589 | { "undef", FALSE, &CTcTokenizer::pp_undef }, |
| | 2590 | { "line", FALSE, &CTcTokenizer::pp_line }, |
| | 2591 | { 0, 0, 0 } |
| | 2592 | }; |
| | 2593 | pp_kw_def *kwp; |
| | 2594 | const char *kwtxt; |
| | 2595 | size_t kwlen; |
| | 2596 | |
| | 2597 | /* skip the '#' */ |
| | 2598 | p_.inc(); |
| | 2599 | |
| | 2600 | /* |
| | 2601 | * If the line ended inside a comment, read the next line until |
| | 2602 | * we're no longer in a comment. The ANSI C preprocessor rules |
| | 2603 | * say that a newline in a comment should not be treated as a |
| | 2604 | * lexical newline, so pretend that the next line is part of the |
| | 2605 | * preprocessor line in such a case. |
| | 2606 | */ |
| | 2607 | while (str_->is_in_comment()) |
| | 2608 | { |
| | 2609 | size_t p_ofs; |
| | 2610 | |
| | 2611 | /* remember the current offset in the line buffer */ |
| | 2612 | p_ofs = p_.getptr() - linebuf_.get_buf(); |
| | 2613 | |
| | 2614 | /* append another line - stop at the end of the stream */ |
| | 2615 | if (read_line(TRUE) == -1) |
| | 2616 | break; |
| | 2617 | |
| | 2618 | /* restore the line pointer, in case the buffer moved */ |
| | 2619 | start_new_line(&linebuf_, p_ofs); |
| | 2620 | } |
| | 2621 | |
| | 2622 | /* read the directive */ |
| | 2623 | next_on_line(); |
| | 2624 | |
| | 2625 | /* |
| | 2626 | * if we've reached the end of the line, it's a null directive; |
| | 2627 | * simply return an empty line |
| | 2628 | */ |
| | 2629 | if (curtok_.gettyp() == TOKT_EOF) |
| | 2630 | { |
| | 2631 | clear_linebuf(); |
| | 2632 | return 0; |
| | 2633 | } |
| | 2634 | |
| | 2635 | /* get the text and length of the keyword */ |
| | 2636 | kwtxt = curtok_.get_text(); |
| | 2637 | kwlen = curtok_.get_text_len(); |
| | 2638 | |
| | 2639 | /* if it's not a symbol, it's not a valid directive */ |
| | 2640 | if (curtok_.gettyp() != TOKT_SYM) |
| | 2641 | { |
| | 2642 | /* log the error and return an empty line */ |
| | 2643 | log_error(TCERR_INV_PP_DIR, (int)kwlen, kwtxt); |
| | 2644 | clear_linebuf(); |
| | 2645 | return 0; |
| | 2646 | } |
| | 2647 | |
| | 2648 | /* determine which keyword we have, and process it */ |
| | 2649 | for (kwp = kwlist ; kwp->kw != 0 ; ++kwp) |
| | 2650 | { |
| | 2651 | /* is this our keyword? */ |
| | 2652 | if (strlen(kwp->kw) == kwlen |
| | 2653 | && memcmp(kwtxt, kwp->kw, kwlen) == 0) |
| | 2654 | { |
| | 2655 | /* |
| | 2656 | * This is our directive. |
| | 2657 | * |
| | 2658 | * If we're in the false branch of a #if block, only |
| | 2659 | * process the directive if it's a kind of directive |
| | 2660 | * that we should process in false #if branches. The |
| | 2661 | * only directives that we process in #if branches are |
| | 2662 | * those that would affect the #if branching, such as a |
| | 2663 | * #endif or a nested #if. |
| | 2664 | */ |
| | 2665 | if (!in_false_if() || kwp->process_in_false_if) |
| | 2666 | { |
| | 2667 | /* invoke the handler to process the directive */ |
| | 2668 | (this->*(kwp->func))(); |
| | 2669 | } |
| | 2670 | else |
| | 2671 | { |
| | 2672 | /* |
| | 2673 | * we're in a #if branch not taken - simply clear |
| | 2674 | * the buffer |
| | 2675 | */ |
| | 2676 | clear_linebuf(); |
| | 2677 | } |
| | 2678 | |
| | 2679 | /* we don't need to look any further */ |
| | 2680 | break; |
| | 2681 | } |
| | 2682 | } |
| | 2683 | |
| | 2684 | /* |
| | 2685 | * if we didn't find the keyword, log an error and otherwise |
| | 2686 | * ignore the entire line |
| | 2687 | */ |
| | 2688 | if (kwp->kw == 0) |
| | 2689 | log_error(TCERR_INV_PP_DIR, (int)kwlen, kwtxt); |
| | 2690 | |
| | 2691 | /* |
| | 2692 | * Preprocessor lines must always be entirely self-contained. |
| | 2693 | * Therefore, it's not valid for a string to start on a |
| | 2694 | * preprocessor line and continue onto subsequent lines. If |
| | 2695 | * we're marked as being inside a string, there must have been |
| | 2696 | * an error on the preprocessor line. Simply clear the |
| | 2697 | * in-string flag; we don't need to issue an error at this |
| | 2698 | * point, since the preprocessor line handler should have |
| | 2699 | * already caught the problem and reported an error. |
| | 2700 | */ |
| | 2701 | in_quote_ = '\0'; |
| | 2702 | } |
| | 2703 | else |
| | 2704 | { |
| | 2705 | /* |
| | 2706 | * There's no preprocessor directive. |
| | 2707 | * |
| | 2708 | * If we're in a false #if branch, return an empty line. We |
| | 2709 | * return an empty line rather than skipping to the next line so |
| | 2710 | * that the caller sees the same number of lines as are in the |
| | 2711 | * original source. |
| | 2712 | */ |
| | 2713 | if (in_false_if()) |
| | 2714 | { |
| | 2715 | /* |
| | 2716 | * it's a #if not taken - we don't want to compile the line |
| | 2717 | * at all, so just clear it out |
| | 2718 | */ |
| | 2719 | clear_linebuf(); |
| | 2720 | expbuf_.clear_text(); |
| | 2721 | } |
| | 2722 | else |
| | 2723 | { |
| | 2724 | /* |
| | 2725 | * If we ended the line in a string, splice additional lines |
| | 2726 | * onto the end of this line until we find the end of the |
| | 2727 | * string, then unsplice the part after the end of the |
| | 2728 | * string. |
| | 2729 | */ |
| | 2730 | if (in_quote_ != '\0') |
| | 2731 | { |
| | 2732 | /* splice additional lines to finish the quote */ |
| | 2733 | splice_string(); |
| | 2734 | } |
| | 2735 | |
| | 2736 | /* |
| | 2737 | * Expand macros in the line, splicing additional source |
| | 2738 | * lines if necessary to fill out any incomplete actual |
| | 2739 | * parameter lists. |
| | 2740 | */ |
| | 2741 | start_new_line(&linebuf_, 0); |
| | 2742 | expand_macros_curline(TRUE, FALSE, FALSE); |
| | 2743 | } |
| | 2744 | |
| | 2745 | /* store the line in the appropriate place */ |
| | 2746 | if (pp_only_mode_) |
| | 2747 | { |
| | 2748 | /* |
| | 2749 | * we're only preprocessing - store the macro-expanded line |
| | 2750 | * back in the line buffer so that the caller can read out |
| | 2751 | * the final preprocessed text |
| | 2752 | */ |
| | 2753 | linebuf_.copy(expbuf_.get_text(), expbuf_.get_text_len()); |
| | 2754 | } |
| | 2755 | else |
| | 2756 | { |
| | 2757 | /* |
| | 2758 | * We're compiling - simply read subsequent tokens out of |
| | 2759 | * the expansion buffer. |
| | 2760 | */ |
| | 2761 | start_new_line(&expbuf_, 0); |
| | 2762 | } |
| | 2763 | } |
| | 2764 | |
| | 2765 | /* return success */ |
| | 2766 | return 0; |
| | 2767 | } |
| | 2768 | |
| | 2769 | /* ------------------------------------------------------------------------ */ |
| | 2770 | /* |
| | 2771 | * Read the next line from the input file. Returns a pointer to the |
| | 2772 | * start of the newly-read data on success, or null if we reach the end |
| | 2773 | * of the input. |
| | 2774 | * |
| | 2775 | * If 'append' is true, we'll add the line on to the end of the existing |
| | 2776 | * buffer; otherwise, we'll overwrite what's in the buffer. |
| | 2777 | * |
| | 2778 | * The only preprocessing performed in this routine is line-splicing. |
| | 2779 | * Any line that ends with a backslash character will be spliced with |
| | 2780 | * the following line, with the backslash and newline removed. |
| | 2781 | * |
| | 2782 | * The new line will be stored in our internal buffer, and will be |
| | 2783 | * null-terminated with the trailing newline removed. |
| | 2784 | * |
| | 2785 | * If we reach the end of the current file, and there's an enclosing |
| | 2786 | * file, we'll resume reading from the enclosing file. Hence, when this |
| | 2787 | * routine returns non-zero, it indicates that we've reached the end of |
| | 2788 | * the entire source, not just of the current file. |
| | 2789 | */ |
| | 2790 | int CTcTokenizer::read_line(int append) |
| | 2791 | { |
| | 2792 | size_t len; |
| | 2793 | size_t start_len; |
| | 2794 | |
| | 2795 | /* if there's no input stream, indicate end-of-file */ |
| | 2796 | if (str_ == 0) |
| | 2797 | return -1; |
| | 2798 | |
| | 2799 | /* if we're not appending, clear out the line buffer */ |
| | 2800 | if (!append) |
| | 2801 | { |
| | 2802 | /* start with an empty line */ |
| | 2803 | clear_linebuf(); |
| | 2804 | |
| | 2805 | /* note the current input position */ |
| | 2806 | last_desc_ = str_->get_desc(); |
| | 2807 | last_linenum_ = str_->get_next_linenum(); |
| | 2808 | } |
| | 2809 | |
| | 2810 | /* note where the new data starts */ |
| | 2811 | len = linebuf_.get_text_len(); |
| | 2812 | start_len = len; |
| | 2813 | |
| | 2814 | /* |
| | 2815 | * if there's anything in the unsplice buffer, use it as the new |
| | 2816 | * line |
| | 2817 | */ |
| | 2818 | if (unsplicebuf_.get_text_len() != 0) |
| | 2819 | { |
| | 2820 | /* |
| | 2821 | * Copy the unsplice buffer as the current line. Note that we |
| | 2822 | * don't have to worry about any of the complicated cases, such |
| | 2823 | * as whether or not it ends with a newline or a backslash, |
| | 2824 | * because the unspliced line was already processed as an input |
| | 2825 | * line when we read it in the first place. |
| | 2826 | */ |
| | 2827 | linebuf_.append(unsplicebuf_.get_text(), unsplicebuf_.get_text_len()); |
| | 2828 | |
| | 2829 | /* clear the unsplice buffer, since it's been consumed now */ |
| | 2830 | unsplicebuf_.clear_text(); |
| | 2831 | |
| | 2832 | /* |
| | 2833 | * make the current line the appended line - if we're |
| | 2834 | * unsplicing, it means that we appended, so the current line is |
| | 2835 | * now the line from which the last appended text came |
| | 2836 | */ |
| | 2837 | last_desc_ = appended_desc_; |
| | 2838 | last_linenum_ = appended_linenum_; |
| | 2839 | |
| | 2840 | /* return the offset of the new text */ |
| | 2841 | return start_len; |
| | 2842 | } |
| | 2843 | |
| | 2844 | /* if we're appending, note where the appendage is coming from */ |
| | 2845 | if (append) |
| | 2846 | { |
| | 2847 | /* remember the last source line appended */ |
| | 2848 | appended_desc_ = str_->get_desc(); |
| | 2849 | appended_linenum_ = str_->get_next_linenum(); |
| | 2850 | } |
| | 2851 | |
| | 2852 | /* keep going until we finish reading the input line */ |
| | 2853 | for ( ;; ) |
| | 2854 | { |
| | 2855 | size_t curlen; |
| | 2856 | |
| | 2857 | /* read a line of text from the input file */ |
| | 2858 | curlen = str_->get_src()-> |
| | 2859 | read_line(linebuf_.get_buf() + len, |
| | 2860 | linebuf_.get_buf_size() - len); |
| | 2861 | |
| | 2862 | /* check for end of file */ |
| | 2863 | if (curlen == 0) |
| | 2864 | { |
| | 2865 | CTcTokStream *old_str; |
| | 2866 | |
| | 2867 | /* |
| | 2868 | * We've reached the end of the current input stream. If |
| | 2869 | * we've already read anything into the current line, it |
| | 2870 | * means that the file ended in mid-line, without a final |
| | 2871 | * newline character; ignore this and proceed with the line |
| | 2872 | * as it now stands in this case. |
| | 2873 | */ |
| | 2874 | if (len > start_len) |
| | 2875 | break; |
| | 2876 | |
| | 2877 | /* |
| | 2878 | * We've finished with this stream. If there's a parent |
| | 2879 | * stream, return to it; otherwise, we're at the end of the |
| | 2880 | * source. |
| | 2881 | */ |
| | 2882 | |
| | 2883 | /* |
| | 2884 | * if we didn't close all of the #if/#ifdef levels opened |
| | 2885 | * within this file, flag one or more errors |
| | 2886 | */ |
| | 2887 | while (if_sp_ > str_->get_init_if_level()) |
| | 2888 | { |
| | 2889 | const char *fname; |
| | 2890 | |
| | 2891 | /* get the filename from the #if stack */ |
| | 2892 | fname = if_stack_[if_sp_ - 1].desc->get_fname(); |
| | 2893 | |
| | 2894 | /* if we're in test reporting mode, use the root name only */ |
| | 2895 | if (test_report_mode_) |
| | 2896 | fname = os_get_root_name((char *)fname); |
| | 2897 | |
| | 2898 | /* log the error */ |
| | 2899 | log_error(TCERR_IF_WITHOUT_ENDIF, |
| | 2900 | if_stack_[if_sp_ - 1].linenum, |
| | 2901 | (int)strlen(fname), fname); |
| | 2902 | |
| | 2903 | /* discard the #if level */ |
| | 2904 | pop_if(); |
| | 2905 | } |
| | 2906 | |
| | 2907 | /* remember the old stream */ |
| | 2908 | old_str = str_; |
| | 2909 | |
| | 2910 | /* return to the parent stream, if there is one */ |
| | 2911 | str_ = str_->get_parent(); |
| | 2912 | |
| | 2913 | /* delete the old stream now that we're done with it */ |
| | 2914 | delete old_str; |
| | 2915 | |
| | 2916 | /* note the new file the line will be coming from */ |
| | 2917 | if (!append && str_ != 0) |
| | 2918 | { |
| | 2919 | last_desc_ = str_->get_desc(); |
| | 2920 | last_linenum_ = str_->get_next_linenum(); |
| | 2921 | } |
| | 2922 | |
| | 2923 | /* if there's no stream, return end of file */ |
| | 2924 | if (str_ == 0) |
| | 2925 | return -1; |
| | 2926 | |
| | 2927 | /* |
| | 2928 | * restore the #pragma newline_spacing mode that was in effect |
| | 2929 | * when we interrupted the parent stream |
| | 2930 | */ |
| | 2931 | string_newline_spacing_ = str_->get_newline_spacing(); |
| | 2932 | |
| | 2933 | /* if there's a parser, notify it of the new pragma C mode */ |
| | 2934 | #if 0 // #pragma C is not currently used |
| | 2935 | if (G_prs != 0) |
| | 2936 | G_prs->set_pragma_c(str_->is_pragma_c()); |
| | 2937 | #endif |
| | 2938 | |
| | 2939 | /* go back to read the next line from the parent */ |
| | 2940 | continue; |
| | 2941 | } |
| | 2942 | |
| | 2943 | /* set the new length of the buffer contents */ |
| | 2944 | len += curlen - 1; |
| | 2945 | linebuf_.set_text_len(len); |
| | 2946 | |
| | 2947 | /* |
| | 2948 | * Check the result to see if it ends in a newline. If not, it |
| | 2949 | * means either that we don't have room in the buffer for the |
| | 2950 | * full source line, or we've reached the last line in the file, |
| | 2951 | * and it doesn't end with a newline. |
| | 2952 | * |
| | 2953 | * Note that the file reader will always supply us with '\n' |
| | 2954 | * newlines, regardless of the local operating system |
| | 2955 | * conventions. |
| | 2956 | * |
| | 2957 | * Also, check to see if the line ends with '\\'. If so, remove |
| | 2958 | * the '\\' character and read the next line, since this |
| | 2959 | * indicates that the logical line continues onto the next |
| | 2960 | * newline-deliminted line. |
| | 2961 | */ |
| | 2962 | if (len != 0 && linebuf_.get_text()[len - 1] != '\n') |
| | 2963 | { |
| | 2964 | /* |
| | 2965 | * There's no newline, hence the file reader wasn't able to |
| | 2966 | * fit the entire line into our buffer, or else we've read |
| | 2967 | * the last line in the file and there's no newline at the |
| | 2968 | * end. If we haven't reached the end of the file, expand |
| | 2969 | * our line buffer to make room to read more from this same |
| | 2970 | * line. |
| | 2971 | */ |
| | 2972 | if (!str_->get_src()->at_eof()) |
| | 2973 | linebuf_.expand(); |
| | 2974 | } |
| | 2975 | else if (len > 1 && linebuf_.get_text()[len - 2] == '\\') |
| | 2976 | { |
| | 2977 | /* |
| | 2978 | * There's a backslash at the end of the line, so they want |
| | 2979 | * to continue this logical line. Remove the backslash, and |
| | 2980 | * read the next line onto the end of the current line. |
| | 2981 | * |
| | 2982 | * Note that we must remove two characters from the end of |
| | 2983 | * the line (and tested for buf_[len-2] above) because we |
| | 2984 | * have both a backslash and a newline at the end of the |
| | 2985 | * line. |
| | 2986 | */ |
| | 2987 | len -= 2; |
| | 2988 | linebuf_.set_text_len(len); |
| | 2989 | |
| | 2990 | /* count reading the physical line */ |
| | 2991 | str_->count_line(); |
| | 2992 | } |
| | 2993 | else |
| | 2994 | { |
| | 2995 | /* remove the newline from the buffer */ |
| | 2996 | if (len != 0) |
| | 2997 | { |
| | 2998 | --len; |
| | 2999 | linebuf_.set_text_len(len); |
| | 3000 | } |
| | 3001 | |
| | 3002 | /* count reading the line */ |
| | 3003 | str_->count_line(); |
| | 3004 | |
| | 3005 | /* done */ |
| | 3006 | break; |
| | 3007 | } |
| | 3008 | } |
| | 3009 | |
| | 3010 | /* |
| | 3011 | * remove comments from the newly-read material - this replaces each |
| | 3012 | * comment by a single whitespace character |
| | 3013 | */ |
| | 3014 | process_comments(start_len); |
| | 3015 | |
| | 3016 | /* |
| | 3017 | * we've successfully read a line -- return the offset of the start of |
| | 3018 | * the newly-read text |
| | 3019 | */ |
| | 3020 | return start_len; |
| | 3021 | } |
| | 3022 | |
| | 3023 | /* |
| | 3024 | * Un-splice a line at the given point. This breaks the current source |
| | 3025 | * line in two, keeping the part before the given point as the current |
| | 3026 | * line, but making the part from the given point to the end of the line |
| | 3027 | * a new source line. We'll put the new source line into a special |
| | 3028 | * holding buffer, and then fetch this part as a new line the next time |
| | 3029 | * we read a line in read_line(). |
| | 3030 | */ |
| | 3031 | void CTcTokenizer::unsplice_line(const char *new_line_start) |
| | 3032 | { |
| | 3033 | size_t keep_len; |
| | 3034 | |
| | 3035 | /* make sure the starting point is within the current line */ |
| | 3036 | if (!(new_line_start >= linebuf_.get_text() |
| | 3037 | && new_line_start <= linebuf_.get_text() + linebuf_.get_text_len())) |
| | 3038 | { |
| | 3039 | /* note the error - this is an internal problem */ |
| | 3040 | throw_internal_error(TCERR_UNSPLICE_NOT_CUR); |
| | 3041 | return; |
| | 3042 | } |
| | 3043 | |
| | 3044 | /* calculate the length of the part we're keeping */ |
| | 3045 | keep_len = new_line_start - linebuf_.get_text(); |
| | 3046 | |
| | 3047 | /* |
| | 3048 | * prepend the remainder of the current line into the unsplice buffer |
| | 3049 | * (we prepend it because the unsplice line is text that comes after |
| | 3050 | * the current line - so anything in the current line comes before |
| | 3051 | * anything already in the unsplice buffer) |
| | 3052 | */ |
| | 3053 | unsplicebuf_.prepend(new_line_start, linebuf_.get_text_len() - keep_len); |
| | 3054 | |
| | 3055 | /* cut off the current line at the given point */ |
| | 3056 | linebuf_.set_text_len(keep_len); |
| | 3057 | } |
| | 3058 | |
| | 3059 | |
| | 3060 | /* ------------------------------------------------------------------------ */ |
| | 3061 | /* |
| | 3062 | * Store text in the source array |
| | 3063 | */ |
| | 3064 | const char *CTcTokenizer::store_source(const char *txt, size_t len) |
| | 3065 | { |
| | 3066 | /* reserve space for the text */ |
| | 3067 | reserve_source(len); |
| | 3068 | |
| | 3069 | /* store it */ |
| | 3070 | const char *p = store_source_partial(txt, len); |
| | 3071 | |
| | 3072 | /* add a null terminator */ |
| | 3073 | static const char nt[1] = { '\0' }; |
| | 3074 | store_source_partial(nt, 1); |
| | 3075 | |
| | 3076 | /* return the pointer to the stored space */ |
| | 3077 | return p; |
| | 3078 | } |
| | 3079 | |
| | 3080 | /* |
| | 3081 | * Store partial source; use this AFTER reserving the necessary space. If |
| | 3082 | * you want null-termination, be sure to reserve the extra byte for that |
| | 3083 | * and include it in the string. This can be used to build a string piece |
| | 3084 | * by piece; we simply add the text without null-terminating it. |
| | 3085 | */ |
| | 3086 | const char *CTcTokenizer::store_source_partial(const char *txt, size_t len) |
| | 3087 | { |
| | 3088 | /* remember where the string starts */ |
| | 3089 | const char *p = src_ptr_; |
| | 3090 | |
| | 3091 | /* store the text */ |
| | 3092 | memcpy(src_ptr_, txt, len); |
| | 3093 | |
| | 3094 | /* advance the source block write position and length */ |
| | 3095 | src_ptr_ += len; |
| | 3096 | src_rem_ -= len; |
| | 3097 | |
| | 3098 | /* return the storage pointer */ |
| | 3099 | return p; |
| | 3100 | } |
| | 3101 | |
| | 3102 | /* |
| | 3103 | * Reserve space for text in the source array. This always reserves the |
| | 3104 | * requested amount of space, plus an extra byte for null termination. |
| | 3105 | */ |
| | 3106 | void CTcTokenizer::reserve_source(size_t len) |
| | 3107 | { |
| | 3108 | /* |
| | 3109 | * if we don't have enough space for this line in the current source |
| | 3110 | * block, start a new block |
| | 3111 | */ |
| | 3112 | if (len + 1 > src_rem_) |
| | 3113 | { |
| | 3114 | CTcTokSrcBlock *blk; |
| | 3115 | |
| | 3116 | /* |
| | 3117 | * if the line is too long for a source block, throw a fatal |
| | 3118 | * error |
| | 3119 | */ |
| | 3120 | if (len + 1 > TCTOK_SRC_BLOCK_SIZE) |
| | 3121 | throw_fatal_error(TCERR_SRCLINE_TOO_LONG, |
| | 3122 | (long)TCTOK_SRC_BLOCK_SIZE); |
| | 3123 | |
| | 3124 | /* allocate a new block */ |
| | 3125 | blk = new CTcTokSrcBlock(); |
| | 3126 | |
| | 3127 | /* link it into our list */ |
| | 3128 | src_cur_->set_next(blk); |
| | 3129 | |
| | 3130 | /* it's now the current block */ |
| | 3131 | src_cur_ = blk; |
| | 3132 | |
| | 3133 | /* start writing at the start of this block */ |
| | 3134 | src_rem_ = TCTOK_SRC_BLOCK_SIZE; |
| | 3135 | src_ptr_ = blk->get_buf(); |
| | 3136 | } |
| | 3137 | } |
| | 3138 | |
| | 3139 | /* |
| | 3140 | * Commit space previously reserved and now used in the source block |
| | 3141 | * list |
| | 3142 | */ |
| | 3143 | void CTcTokenizer::commit_source(size_t len) |
| | 3144 | { |
| | 3145 | /* advance the write position past the committed text */ |
| | 3146 | src_ptr_ += len; |
| | 3147 | src_rem_ -= len; |
| | 3148 | } |
| | 3149 | |
| | 3150 | |
| | 3151 | /* ------------------------------------------------------------------------ */ |
| | 3152 | /* |
| | 3153 | * Expand macros in the current line from the current source pointer, |
| | 3154 | * filling in expbuf_ with the expanded result. |
| | 3155 | */ |
| | 3156 | int CTcTokenizer::expand_macros_curline(int read_more, int allow_defined, |
| | 3157 | int append_to_expbuf) |
| | 3158 | { |
| | 3159 | int err; |
| | 3160 | |
| | 3161 | /* expand macros in the current line */ |
| | 3162 | err = expand_macros(&linebuf_, &p_, &expbuf_, read_more, allow_defined, |
| | 3163 | append_to_expbuf); |
| | 3164 | |
| | 3165 | /* if that failed, return an error */ |
| | 3166 | if (err != 0) |
| | 3167 | return err; |
| | 3168 | |
| | 3169 | /* |
| | 3170 | * if we're in preprocessor mode, clean up the text for human |
| | 3171 | * consumption by removing our various expansion flags |
| | 3172 | */ |
| | 3173 | if (pp_only_mode_) |
| | 3174 | remove_expansion_flags(&expbuf_); |
| | 3175 | |
| | 3176 | /* return the result */ |
| | 3177 | return err; |
| | 3178 | } |
| | 3179 | |
| | 3180 | /* ------------------------------------------------------------------------ */ |
| | 3181 | /* |
| | 3182 | * Remove the special internal macro expansion flags from an expanded macro |
| | 3183 | * buffer. |
| | 3184 | */ |
| | 3185 | void CTcTokenizer::remove_expansion_flags(CTcTokString *buf) |
| | 3186 | { |
| | 3187 | utf8_ptr p; |
| | 3188 | char *src; |
| | 3189 | char *dst; |
| | 3190 | |
| | 3191 | /* |
| | 3192 | * Scan the expansion buffer and remove all of the no-more-expansion |
| | 3193 | * flag bytes - we're done expanding the macro now, so we don't need |
| | 3194 | * this information any longer. When we're writing out the |
| | 3195 | * preprocessed source for human viewing, we don't want to leave these |
| | 3196 | * internal markers in the expanded source. |
| | 3197 | */ |
| | 3198 | for (src = dst = buf->get_buf(), p.set(src) ; p.getch() != '\0' ; ) |
| | 3199 | { |
| | 3200 | /* if this isn't a macro flag, copy it */ |
| | 3201 | if (p.getch() == TOK_MACRO_EXP_END) |
| | 3202 | { |
| | 3203 | /* skip the flag byte and the following embedded pointer */ |
| | 3204 | src += 1 + sizeof(CTcHashEntryPp *); |
| | 3205 | p.set(src); |
| | 3206 | } |
| | 3207 | else if (p.getch() == TOK_FULLY_EXPANDED_FLAG) |
| | 3208 | { |
| | 3209 | /* skip the flag byte */ |
| | 3210 | ++src; |
| | 3211 | p.set(src); |
| | 3212 | } |
| | 3213 | else |
| | 3214 | { |
| | 3215 | /* skip this character */ |
| | 3216 | p.inc(); |
| | 3217 | |
| | 3218 | /* copy the bytes of this character as-is */ |
| | 3219 | while (src < p.getptr()) |
| | 3220 | *dst++ = *src++; |
| | 3221 | } |
| | 3222 | } |
| | 3223 | |
| | 3224 | /* set the new buffer length */ |
| | 3225 | buf->set_text_len(dst - buf->get_buf()); |
| | 3226 | } |
| | 3227 | |
| | 3228 | /* ------------------------------------------------------------------------ */ |
| | 3229 | /* |
| | 3230 | * Expand macros in the current line, reading additional source lines if |
| | 3231 | * necessary. |
| | 3232 | * |
| | 3233 | * 'src' is a pointer to the start of the text to expand; it must point |
| | 3234 | * into the 'srcbuf' buffer. If 'src' is null, we'll simply start at |
| | 3235 | * the beginning of the source buffer. |
| | 3236 | */ |
| | 3237 | int CTcTokenizer::expand_macros(CTcTokString *srcbuf, utf8_ptr *src, |
| | 3238 | CTcTokString *expbuf, int read_more, |
| | 3239 | int allow_defined, int append) |
| | 3240 | { |
| | 3241 | tc_toktyp_t typ; |
| | 3242 | CTcToken tok; |
| | 3243 | CTcTokString *subexp; |
| | 3244 | size_t startofs; |
| | 3245 | utf8_ptr local_src; |
| | 3246 | CTcTokStringRef local_srcbuf; |
| | 3247 | CTcMacroRsc *res; |
| | 3248 | int err; |
| | 3249 | |
| | 3250 | /* presume success */ |
| | 3251 | err = 0; |
| | 3252 | |
| | 3253 | /* get a macro expansion resource object */ |
| | 3254 | res = alloc_macro_rsc(); |
| | 3255 | if (res == 0) |
| | 3256 | return 1; |
| | 3257 | |
| | 3258 | /* get our subexpression buffer from the resource object */ |
| | 3259 | subexp = &res->line_exp_; |
| | 3260 | |
| | 3261 | /* if there's no source buffer or source pointer, provide one */ |
| | 3262 | if (srcbuf == 0) |
| | 3263 | { |
| | 3264 | /* |
| | 3265 | * there's no source buffer - provide our own non-allocated |
| | 3266 | * buffer tied to the caller's buffer |
| | 3267 | */ |
| | 3268 | local_srcbuf.set_buffer(src->getptr(), strlen(src->getptr())); |
| | 3269 | srcbuf = &local_srcbuf; |
| | 3270 | } |
| | 3271 | else if (src == 0) |
| | 3272 | { |
| | 3273 | /* |
| | 3274 | * there's no source pointer - start at the beginning of the |
| | 3275 | * source buffer |
| | 3276 | */ |
| | 3277 | local_src.set((char *)srcbuf->get_text()); |
| | 3278 | src = &local_src; |
| | 3279 | } |
| | 3280 | |
| | 3281 | /* clear the expansion buffer, unless we're appending to the buffer */ |
| | 3282 | if (!append) |
| | 3283 | expbuf->clear_text(); |
| | 3284 | |
| | 3285 | /* |
| | 3286 | * Make sure we have room for a copy of the source line. This is an |
| | 3287 | * optimization for the simple case where we'll just copy the source |
| | 3288 | * line unchanged, so that we don't have to repeatedly expand the |
| | 3289 | * buffer; we will, however, expand the buffer dynamically later, if |
| | 3290 | * this pre-allocation should prove to be insufficient. |
| | 3291 | */ |
| | 3292 | expbuf->ensure_space(expbuf->get_text_len() + srcbuf->get_text_len()); |
| | 3293 | |
| | 3294 | /* note the starting offset, if we have an underlying string buffer */ |
| | 3295 | startofs = src->getptr() - srcbuf->get_text(); |
| | 3296 | |
| | 3297 | /* read the first token */ |
| | 3298 | typ = next_on_line(srcbuf, src, &tok, ¯o_in_embedding_, TRUE); |
| | 3299 | |
| | 3300 | /* scan through the tokens on the line, looking for macros to expand */ |
| | 3301 | while (typ != TOKT_EOF) |
| | 3302 | { |
| | 3303 | /* |
| | 3304 | * if it's a symbol, and it hasn't already been marked as fully |
| | 3305 | * expanded, look it up in the #define table |
| | 3306 | */ |
| | 3307 | if (typ == TOKT_SYM && !tok.get_fully_expanded()) |
| | 3308 | { |
| | 3309 | CTcHashEntryPp *entry; |
| | 3310 | |
| | 3311 | /* |
| | 3312 | * Look up the symbol in the #define symbol table. If we |
| | 3313 | * find it, expand the macro. Otherwise, if the "defined" |
| | 3314 | * operator is active, check for that. |
| | 3315 | * |
| | 3316 | * Do not expand the macro if we find that it has already |
| | 3317 | * been expanded on a prior scan through the current text. |
| | 3318 | */ |
| | 3319 | entry = find_define(tok.get_text(), tok.get_text_len()); |
| | 3320 | if ((entry != 0 |
| | 3321 | && !scan_for_prior_expansion(*src, srcbuf->get_text_end(), |
| | 3322 | entry)) |
| | 3323 | || (allow_defined |
| | 3324 | && tok.get_text_len() == 7 |
| | 3325 | && memcmp(tok.get_text(), "defined", 7) == 0)) |
| | 3326 | { |
| | 3327 | size_t macro_ofs; |
| | 3328 | size_t rem_len; |
| | 3329 | int expanded; |
| | 3330 | |
| | 3331 | /* get the offset of the macro token in the source buffer */ |
| | 3332 | macro_ofs = tok.get_text() - srcbuf->get_text(); |
| | 3333 | |
| | 3334 | /* expand it into our sub-expansion buffer */ |
| | 3335 | if (entry != 0) |
| | 3336 | { |
| | 3337 | /* expand the macro */ |
| | 3338 | err = expand_macro(res, subexp, srcbuf, src, |
| | 3339 | macro_ofs, entry, |
| | 3340 | read_more, allow_defined, &expanded); |
| | 3341 | } |
| | 3342 | else |
| | 3343 | { |
| | 3344 | /* parse and expand the defined() operator */ |
| | 3345 | err = expand_defined(subexp, srcbuf, src); |
| | 3346 | |
| | 3347 | /* "defined" always expands if there's not an error */ |
| | 3348 | expanded = TRUE; |
| | 3349 | } |
| | 3350 | |
| | 3351 | /* if an error occurred, return failure */ |
| | 3352 | if (err) |
| | 3353 | goto done; |
| | 3354 | |
| | 3355 | /* |
| | 3356 | * if we expanded something, append everything we |
| | 3357 | * skipped preceding the macro, then rescan; otherwise, |
| | 3358 | * just keep going without a rescan |
| | 3359 | */ |
| | 3360 | if (expanded) |
| | 3361 | { |
| | 3362 | /* copy the preceding text to the output */ |
| | 3363 | expbuf->append(srcbuf->get_text() + startofs, |
| | 3364 | macro_ofs - startofs); |
| | 3365 | } |
| | 3366 | else |
| | 3367 | { |
| | 3368 | /* |
| | 3369 | * we didn't expand - get the next token after the |
| | 3370 | * macro |
| | 3371 | */ |
| | 3372 | typ = next_on_line(srcbuf, src, &tok, |
| | 3373 | ¯o_in_embedding_, TRUE); |
| | 3374 | |
| | 3375 | /* continue processing from this token */ |
| | 3376 | continue; |
| | 3377 | } |
| | 3378 | |
| | 3379 | /* |
| | 3380 | * We must now insert the expansion into the source |
| | 3381 | * buffer at the current point, and re-scan the |
| | 3382 | * expansion, *along with* the rest of the original |
| | 3383 | * source line (this is how ANSI C specifies the |
| | 3384 | * process). |
| | 3385 | * |
| | 3386 | * If we can read more, we must be reading out of the |
| | 3387 | * main input line buffer, so insert the expansion text |
| | 3388 | * directly into the original source stream, and |
| | 3389 | * continue reading out of the source stream; this will |
| | 3390 | * simplify the case where we must read more data from |
| | 3391 | * the file in the course of the expansion. If we can't |
| | 3392 | * read more, simply copy the remainder of the current |
| | 3393 | * input line onto the expanded macro and use it as the |
| | 3394 | * new input buffer. |
| | 3395 | */ |
| | 3396 | |
| | 3397 | /* get the current offset in the source line */ |
| | 3398 | startofs = src->getptr() - srcbuf->get_text(); |
| | 3399 | |
| | 3400 | /* figure out how much is left on the current line */ |
| | 3401 | rem_len = srcbuf->get_text_len() - startofs; |
| | 3402 | |
| | 3403 | /* check to see if we can read more */ |
| | 3404 | if (read_more) |
| | 3405 | { |
| | 3406 | /* |
| | 3407 | * we're reading from the original line input buffer |
| | 3408 | * -- insert the expansion into the source buffer at |
| | 3409 | * the current point, replacing the original macro |
| | 3410 | * text |
| | 3411 | */ |
| | 3412 | |
| | 3413 | /* make sure we have room for adding the expansion text */ |
| | 3414 | srcbuf->ensure_space(macro_ofs + rem_len |
| | 3415 | + subexp->get_text_len()); |
| | 3416 | |
| | 3417 | /* make sure src is still pointing to the right place */ |
| | 3418 | src->set(srcbuf->get_buf() + macro_ofs); |
| | 3419 | |
| | 3420 | /* move the remainder of the current line to make room */ |
| | 3421 | memmove(srcbuf->get_buf() + macro_ofs |
| | 3422 | + subexp->get_text_len(), |
| | 3423 | srcbuf->get_buf() + startofs, |
| | 3424 | rem_len); |
| | 3425 | |
| | 3426 | /* insert the expansion text */ |
| | 3427 | memcpy(srcbuf->get_buf() + macro_ofs, subexp->get_buf(), |
| | 3428 | subexp->get_text_len()); |
| | 3429 | |
| | 3430 | /* set the new source length */ |
| | 3431 | srcbuf->set_text_len(macro_ofs + rem_len |
| | 3432 | + subexp->get_text_len()); |
| | 3433 | |
| | 3434 | /* the new starting offset is the current position */ |
| | 3435 | startofs = macro_ofs; |
| | 3436 | |
| | 3437 | /* get the next token */ |
| | 3438 | typ = next_on_line(srcbuf, src, &tok, |
| | 3439 | ¯o_in_embedding_, TRUE); |
| | 3440 | |
| | 3441 | /* continue processing from this token */ |
| | 3442 | continue; |
| | 3443 | } |
| | 3444 | else |
| | 3445 | { |
| | 3446 | /* |
| | 3447 | * we're reading from a read-only buffer -- add the |
| | 3448 | * remainder of the source to the expansion buffer, |
| | 3449 | * and recursively parse the remainder |
| | 3450 | */ |
| | 3451 | subexp->append(srcbuf->get_text() + startofs, rem_len); |
| | 3452 | |
| | 3453 | /* |
| | 3454 | * evaluate the remainder recursively and append it |
| | 3455 | * to the expansion already in progress |
| | 3456 | */ |
| | 3457 | err = expand_macros(subexp, 0, expbuf, FALSE, |
| | 3458 | allow_defined, TRUE); |
| | 3459 | |
| | 3460 | /* we're done */ |
| | 3461 | goto done; |
| | 3462 | } |
| | 3463 | } |
| | 3464 | } |
| | 3465 | |
| | 3466 | /* get the next token */ |
| | 3467 | typ = next_on_line(srcbuf, src, &tok, ¯o_in_embedding_, TRUE); |
| | 3468 | } |
| | 3469 | |
| | 3470 | /* add the remainder of the source to the output */ |
| | 3471 | expbuf->append(srcbuf->get_text() + startofs, |
| | 3472 | tok.get_text() - startofs - srcbuf->get_text()); |
| | 3473 | |
| | 3474 | done: |
| | 3475 | /* release our macro resource object */ |
| | 3476 | release_macro_rsc(res); |
| | 3477 | |
| | 3478 | /* return the result */ |
| | 3479 | return err; |
| | 3480 | } |
| | 3481 | |
| | 3482 | /* |
| | 3483 | * Allocate a macro resource object. If we're out of resource objects |
| | 3484 | * in the pool, we'll add another object to the pool. |
| | 3485 | */ |
| | 3486 | CTcMacroRsc *CTcTokenizer::alloc_macro_rsc() |
| | 3487 | { |
| | 3488 | CTcMacroRsc *rsc; |
| | 3489 | |
| | 3490 | /* |
| | 3491 | * if there's anything in the available list, take the first item |
| | 3492 | * off the list and return it |
| | 3493 | */ |
| | 3494 | if (macro_res_avail_ != 0) |
| | 3495 | { |
| | 3496 | /* remember the item to return */ |
| | 3497 | rsc = macro_res_avail_; |
| | 3498 | |
| | 3499 | /* remove it from the list */ |
| | 3500 | macro_res_avail_ = macro_res_avail_->next_avail_; |
| | 3501 | |
| | 3502 | /* return it */ |
| | 3503 | return rsc; |
| | 3504 | } |
| | 3505 | |
| | 3506 | /* there's nothing on the available list - allocate a new item */ |
| | 3507 | rsc = new CTcMacroRsc(); |
| | 3508 | |
| | 3509 | /* if that failed, return failure */ |
| | 3510 | if (rsc == 0) |
| | 3511 | { |
| | 3512 | log_error(TCERR_OUT_OF_MEM_MAC_EXP); |
| | 3513 | return 0; |
| | 3514 | } |
| | 3515 | |
| | 3516 | /* add it onto the master list */ |
| | 3517 | rsc->next_ = macro_res_head_; |
| | 3518 | macro_res_head_ = rsc; |
| | 3519 | |
| | 3520 | /* return it */ |
| | 3521 | return rsc; |
| | 3522 | } |
| | 3523 | |
| | 3524 | /* |
| | 3525 | * Release a macro resource, returning it to the pool |
| | 3526 | */ |
| | 3527 | void CTcTokenizer::release_macro_rsc(CTcMacroRsc *rsc) |
| | 3528 | { |
| | 3529 | /* put it back at the head of the available list */ |
| | 3530 | rsc->next_avail_ = macro_res_avail_; |
| | 3531 | macro_res_avail_ = rsc; |
| | 3532 | } |
| | 3533 | |
| | 3534 | /* |
| | 3535 | * Scan a buffer for a prior-expansion flag for a given macro. We'll |
| | 3536 | * look through the buffer for a TOK_MACRO_EXP_END byte that mentions |
| | 3537 | * the given symbol table entry; we'll return true if found, false if |
| | 3538 | * not. True means that the symbol has already been expanded on a prior |
| | 3539 | * scan of the text, so it should not be re-expanded now. |
| | 3540 | */ |
| | 3541 | int CTcTokenizer::scan_for_prior_expansion(utf8_ptr src, const char *src_end, |
| | 3542 | const CTcHashEntryPp *entry) |
| | 3543 | { |
| | 3544 | /* scan the buffer for the expansion flag byte */ |
| | 3545 | while (src.getptr() < src_end) |
| | 3546 | { |
| | 3547 | /* if this is the flag, check what follows */ |
| | 3548 | if (src.getch() == TOK_MACRO_EXP_END) |
| | 3549 | { |
| | 3550 | CTcHashEntryPp *flag_entry; |
| | 3551 | |
| | 3552 | /* read the entry from the buffer */ |
| | 3553 | memcpy(&flag_entry, src.getptr() + 1, sizeof(flag_entry)); |
| | 3554 | |
| | 3555 | /* if it matches, indicate that we found it */ |
| | 3556 | if (entry == flag_entry) |
| | 3557 | return TRUE; |
| | 3558 | |
| | 3559 | /* it's not a match - keep scanning after this flag sequence */ |
| | 3560 | src.set(src.getptr() + 1 + sizeof(flag_entry)); |
| | 3561 | } |
| | 3562 | else |
| | 3563 | { |
| | 3564 | /* it's not the flag - skip this character */ |
| | 3565 | src.inc(); |
| | 3566 | } |
| | 3567 | } |
| | 3568 | |
| | 3569 | /* we didn't find it */ |
| | 3570 | return FALSE; |
| | 3571 | } |
| | 3572 | |
| | 3573 | /* |
| | 3574 | * Go through a macro expansion and translate from end-of-expansion |
| | 3575 | * markers to individual token full-expansion markers. This is used |
| | 3576 | * after we leave a recursion level to convert expanded text into text |
| | 3577 | * suitable for use in further expansion at an enclosing recursion |
| | 3578 | * level. |
| | 3579 | */ |
| | 3580 | void CTcTokenizer::mark_full_exp_tokens(CTcTokString *dstbuf, |
| | 3581 | const CTcTokString *srcbuf, |
| | 3582 | int append) const |
| | 3583 | { |
| | 3584 | utf8_ptr p; |
| | 3585 | CTcToken tok; |
| | 3586 | const char *start; |
| | 3587 | int in_embedding; |
| | 3588 | |
| | 3589 | /* clear the output buffer if we're not appending to existing text */ |
| | 3590 | if (!append) |
| | 3591 | dstbuf->clear_text(); |
| | 3592 | |
| | 3593 | /* remember the starting point */ |
| | 3594 | start = srcbuf->get_text(); |
| | 3595 | |
| | 3596 | /* not in an embedded expression within the expansion text yet */ |
| | 3597 | in_embedding = FALSE; |
| | 3598 | |
| | 3599 | /* scan the source buffer */ |
| | 3600 | p.set((char *)start); |
| | 3601 | for (;;) |
| | 3602 | { |
| | 3603 | CTcHashEntryPp *cur_entry; |
| | 3604 | tc_toktyp_t typ; |
| | 3605 | char ch; |
| | 3606 | |
| | 3607 | /* get the next token; stop at the end of the line */ |
| | 3608 | typ = next_on_line(srcbuf, &p, &tok, &in_embedding, TRUE); |
| | 3609 | if (typ == TOKT_EOF) |
| | 3610 | break; |
| | 3611 | |
| | 3612 | /* |
| | 3613 | * if this macro token is being expanded, and it's not already |
| | 3614 | * marked for no more expansion, mark it |
| | 3615 | */ |
| | 3616 | if (typ == TOKT_SYM |
| | 3617 | && !tok.get_fully_expanded() |
| | 3618 | && (cur_entry = find_define(tok.get_text(), |
| | 3619 | tok.get_text_len())) != 0 |
| | 3620 | && scan_for_prior_expansion(p, srcbuf->get_text_end(), cur_entry)) |
| | 3621 | { |
| | 3622 | /* |
| | 3623 | * This token has been fully expanded in the substitution |
| | 3624 | * buffer but hasn't yet been marked as such - we must |
| | 3625 | * insert the fully-expanded marker. First, add up to the |
| | 3626 | * current point to the output buffer. |
| | 3627 | */ |
| | 3628 | if (tok.get_text() > start) |
| | 3629 | dstbuf->append(start, tok.get_text() - start); |
| | 3630 | |
| | 3631 | /* add the fully-expanded marker */ |
| | 3632 | ch = TOK_FULLY_EXPANDED_FLAG; |
| | 3633 | dstbuf->append(&ch, 1); |
| | 3634 | |
| | 3635 | /* the new starting point is the start of the symbol token */ |
| | 3636 | start = tok.get_text(); |
| | 3637 | } |
| | 3638 | } |
| | 3639 | |
| | 3640 | /* copy any remaining text to the output */ |
| | 3641 | if (tok.get_text() > start) |
| | 3642 | dstbuf->append(start, tok.get_text() - start); |
| | 3643 | |
| | 3644 | /* |
| | 3645 | * Remove any macro expansion end markers from the output buffer. |
| | 3646 | * We don't want to leave these around, because they don't apply to |
| | 3647 | * the enclosing buffer into which we'll substitute this result. |
| | 3648 | * Note that we've already ensured that these markers will be |
| | 3649 | * respected for the substitution text by inserting "fully expanded" |
| | 3650 | * markers in front of each token to which any of the markers we're |
| | 3651 | * removing should apply. |
| | 3652 | */ |
| | 3653 | remove_end_markers(dstbuf); |
| | 3654 | } |
| | 3655 | |
| | 3656 | |
| | 3657 | /* |
| | 3658 | * Remove end markers from a buffer |
| | 3659 | */ |
| | 3660 | void CTcTokenizer::remove_end_markers(CTcTokString *buf) |
| | 3661 | { |
| | 3662 | char *src; |
| | 3663 | char *dst; |
| | 3664 | utf8_ptr p; |
| | 3665 | |
| | 3666 | /* scan the buffer */ |
| | 3667 | for (src = dst = buf->get_buf(), p.set(src) ; |
| | 3668 | p.getptr() < buf->get_text_end() ; ) |
| | 3669 | { |
| | 3670 | /* check for our flag */ |
| | 3671 | if (p.getch() == TOK_MACRO_EXP_END) |
| | 3672 | { |
| | 3673 | /* skip the flag byte and the following embedded pointer */ |
| | 3674 | src += 1 + sizeof(CTcHashEntryPp *); |
| | 3675 | p.set(src); |
| | 3676 | } |
| | 3677 | else |
| | 3678 | { |
| | 3679 | /* skip this character */ |
| | 3680 | p.inc(); |
| | 3681 | |
| | 3682 | /* copy the bytes of this character as-is */ |
| | 3683 | while (src < p.getptr()) |
| | 3684 | *dst++ = *src++; |
| | 3685 | } |
| | 3686 | } |
| | 3687 | |
| | 3688 | /* set the new buffer size */ |
| | 3689 | buf->set_text_len(dst - buf->get_buf()); |
| | 3690 | } |
| | 3691 | |
| | 3692 | |
| | 3693 | /* |
| | 3694 | * Expand the macro at the current token in the current line. |
| | 3695 | * |
| | 3696 | * 'src' is a pointer to the current position in 'srcbuf'. We'll update |
| | 3697 | * 'src' to point to the next token after macro or its actual parameters |
| | 3698 | * list, if it has one. |
| | 3699 | */ |
| | 3700 | int CTcTokenizer::expand_macro(CTcMacroRsc *rsc, CTcTokString *expbuf, |
| | 3701 | const CTcTokString *srcbuf, utf8_ptr *src, |
| | 3702 | size_t macro_srcbuf_ofs, |
| | 3703 | CTcHashEntryPp *entry, int read_more, |
| | 3704 | int allow_defined, int *expanded) |
| | 3705 | { |
| | 3706 | CTcTokString *subexp; |
| | 3707 | size_t argofs[TOK_MAX_MACRO_ARGS]; |
| | 3708 | size_t arglen[TOK_MAX_MACRO_ARGS]; |
| | 3709 | size_t startofs; |
| | 3710 | const char *start; |
| | 3711 | const char *end; |
| | 3712 | int err; |
| | 3713 | char flagbuf[1 + sizeof(entry)]; |
| | 3714 | |
| | 3715 | /* presume we won't do any expansion */ |
| | 3716 | *expanded = FALSE; |
| | 3717 | |
| | 3718 | /* get our resources */ |
| | 3719 | subexp = &rsc->macro_exp_; |
| | 3720 | |
| | 3721 | /* remember our parsing starting offset */ |
| | 3722 | startofs = src->getptr() - srcbuf->get_text(); |
| | 3723 | |
| | 3724 | /* clear the expansion output buffer */ |
| | 3725 | expbuf->clear_text(); |
| | 3726 | |
| | 3727 | /* if the macro has arguments, scan the actuals */ |
| | 3728 | if (entry->has_args()) |
| | 3729 | { |
| | 3730 | int found_actuals; |
| | 3731 | |
| | 3732 | /* read the macro arguments */ |
| | 3733 | if (parse_macro_actuals(srcbuf, src, entry, argofs, arglen, |
| | 3734 | read_more, &found_actuals)) |
| | 3735 | { |
| | 3736 | err = 1; |
| | 3737 | goto done; |
| | 3738 | } |
| | 3739 | |
| | 3740 | /* |
| | 3741 | * If we found no actuals, then this wasn't really an invocation |
| | 3742 | * of the macro after all - a function-like macro invoked with |
| | 3743 | * no arguments is simply not replaced. Store the original text |
| | 3744 | * in the output buffer and return success. |
| | 3745 | */ |
| | 3746 | if (!found_actuals) |
| | 3747 | { |
| | 3748 | /* copy the original text */ |
| | 3749 | expbuf->copy(srcbuf->get_text() + macro_srcbuf_ofs, |
| | 3750 | startofs - macro_srcbuf_ofs); |
| | 3751 | |
| | 3752 | /* |
| | 3753 | * restore the source read pointer to where it was when we |
| | 3754 | * started |
| | 3755 | */ |
| | 3756 | src->set((char *)srcbuf->get_text() + startofs); |
| | 3757 | |
| | 3758 | /* return success */ |
| | 3759 | err = 0; |
| | 3760 | goto done; |
| | 3761 | } |
| | 3762 | } |
| | 3763 | |
| | 3764 | /* |
| | 3765 | * if there are arguments, replace the macro and substitute actuals |
| | 3766 | * for the formals; otherwise, just copy the replacement text |
| | 3767 | * directly |
| | 3768 | */ |
| | 3769 | if (entry->get_argc() != 0) |
| | 3770 | { |
| | 3771 | /* substitute the actuals */ |
| | 3772 | if (substitute_macro_actuals(rsc, subexp, entry, srcbuf, |
| | 3773 | argofs, arglen, allow_defined)) |
| | 3774 | { |
| | 3775 | err = 1; |
| | 3776 | goto done; |
| | 3777 | } |
| | 3778 | |
| | 3779 | /* set up to parse from the expansion buffer */ |
| | 3780 | start = subexp->get_text(); |
| | 3781 | end = start + subexp->get_text_len(); |
| | 3782 | } |
| | 3783 | else |
| | 3784 | { |
| | 3785 | /* |
| | 3786 | * use our local source buffer that simply references the |
| | 3787 | * original expansion text, rather than making a copy of the |
| | 3788 | * expansion text |
| | 3789 | */ |
| | 3790 | start = entry->get_expansion(); |
| | 3791 | end = start + entry->get_expan_len(); |
| | 3792 | } |
| | 3793 | |
| | 3794 | /* copy the expansion into the output buffer */ |
| | 3795 | expbuf->copy(start, end - start); |
| | 3796 | |
| | 3797 | /* |
| | 3798 | * After the end of the expansion sequence, insert the |
| | 3799 | * fully-expanded flag plus a pointer to the symbol table entry that |
| | 3800 | * we just expanded. This will allow us to detect during the |
| | 3801 | * re-scan of the expansion text that this symbol has already been |
| | 3802 | * expanded, in which case we must suppress further expansion of the |
| | 3803 | * symbol. This allows us to follow the ANSI C rules for recursive |
| | 3804 | * macro usage. |
| | 3805 | */ |
| | 3806 | flagbuf[0] = TOK_MACRO_EXP_END; |
| | 3807 | memcpy(&flagbuf[1], &entry, sizeof(entry)); |
| | 3808 | expbuf->append(flagbuf, sizeof(flagbuf)); |
| | 3809 | |
| | 3810 | /* indicate that we expanded the macro */ |
| | 3811 | *expanded = TRUE; |
| | 3812 | |
| | 3813 | /* success */ |
| | 3814 | err = 0; |
| | 3815 | |
| | 3816 | done: |
| | 3817 | /* return the result */ |
| | 3818 | return err; |
| | 3819 | } |
| | 3820 | |
| | 3821 | /* |
| | 3822 | * Parse a macro's actual parameter list, filling in the given hash |
| | 3823 | * table with the arguments. Returns zero on success, non-zero on |
| | 3824 | * error. 'entry' is the macro's defining symbol table entry. |
| | 3825 | */ |
| | 3826 | int CTcTokenizer::parse_macro_actuals(const CTcTokString *srcbuf, |
| | 3827 | utf8_ptr *src, |
| | 3828 | const CTcHashEntryPp *entry, |
| | 3829 | size_t argofs[TOK_MAX_MACRO_ARGS], |
| | 3830 | size_t arglen[TOK_MAX_MACRO_ARGS], |
| | 3831 | int read_more, int *found_actuals) |
| | 3832 | { |
| | 3833 | tc_toktyp_t typ; |
| | 3834 | CTcToken tok; |
| | 3835 | int argc; |
| | 3836 | int spliced; |
| | 3837 | int i; |
| | 3838 | |
| | 3839 | /* presume we're not going to do any line splicing */ |
| | 3840 | spliced = FALSE; |
| | 3841 | |
| | 3842 | /* no arguments parsed yet */ |
| | 3843 | argc = 0; |
| | 3844 | |
| | 3845 | /* get the next token after the macro symbol */ |
| | 3846 | typ = next_on_line(srcbuf, src, &tok, ¯o_in_embedding_, TRUE); |
| | 3847 | |
| | 3848 | /* splice another line if necessary */ |
| | 3849 | if (typ == TOKT_EOF && read_more) |
| | 3850 | { |
| | 3851 | /* splice a line */ |
| | 3852 | typ = actual_splice_next_line(srcbuf, src, &tok); |
| | 3853 | |
| | 3854 | /* note the splice */ |
| | 3855 | spliced = TRUE; |
| | 3856 | } |
| | 3857 | |
| | 3858 | /* if we didn't find an open paren, there's no actual list after all */ |
| | 3859 | if (typ != TOKT_LPAR) |
| | 3860 | { |
| | 3861 | /* tell the caller we didn't find any actuals */ |
| | 3862 | *found_actuals = FALSE; |
| | 3863 | |
| | 3864 | /* if we spliced a line, unsplice it at the current token */ |
| | 3865 | if (spliced) |
| | 3866 | unsplice_line(tok.get_text()); |
| | 3867 | |
| | 3868 | /* return success */ |
| | 3869 | return 0; |
| | 3870 | } |
| | 3871 | |
| | 3872 | /* remember the offset of the start of the first argument */ |
| | 3873 | argofs[argc] = tok.get_text() + tok.get_text_len() - srcbuf->get_text(); |
| | 3874 | |
| | 3875 | /* skip the open paren */ |
| | 3876 | typ = next_on_line(srcbuf, src, &tok, ¯o_in_embedding_, TRUE); |
| | 3877 | |
| | 3878 | /* read the arguments */ |
| | 3879 | while (typ != TOKT_RPAR) |
| | 3880 | { |
| | 3881 | utf8_ptr p; |
| | 3882 | int paren_depth, bracket_depth, brace_depth; |
| | 3883 | int sp_cnt; |
| | 3884 | |
| | 3885 | /* if we have too many arguments, it's an error */ |
| | 3886 | if ((argc >= entry->get_argc() && !entry->has_varargs()) |
| | 3887 | || argc >= TOK_MAX_MACRO_ARGS) |
| | 3888 | { |
| | 3889 | /* log the error */ |
| | 3890 | log_error(TCERR_PP_MANY_MACRO_ARGS, |
| | 3891 | (int)entry->getlen(), entry->getstr()); |
| | 3892 | |
| | 3893 | /* scan ahead to to close paren or end of line */ |
| | 3894 | while (typ != TOKT_RPAR && typ != TOKT_EOF) |
| | 3895 | typ = next_on_line(srcbuf, src, &tok, ¯o_in_embedding_, |
| | 3896 | TRUE); |
| | 3897 | |
| | 3898 | /* done scanning arguments */ |
| | 3899 | break; |
| | 3900 | } |
| | 3901 | |
| | 3902 | /* |
| | 3903 | * skip tokens until we find a comma outside of nested parens, |
| | 3904 | * square brackets, or curly braces |
| | 3905 | */ |
| | 3906 | paren_depth = bracket_depth = brace_depth = 0; |
| | 3907 | while (paren_depth != 0 |
| | 3908 | || bracket_depth != 0 |
| | 3909 | || brace_depth != 0 |
| | 3910 | || (typ != TOKT_COMMA && typ != TOKT_RPAR)) |
| | 3911 | { |
| | 3912 | /* |
| | 3913 | * if it's an open or close paren, brace, or bracket, adjust |
| | 3914 | * the depth accordingly |
| | 3915 | */ |
| | 3916 | switch(typ) |
| | 3917 | { |
| | 3918 | case TOKT_LPAR: |
| | 3919 | ++paren_depth; |
| | 3920 | break; |
| | 3921 | |
| | 3922 | case TOKT_RPAR: |
| | 3923 | --paren_depth; |
| | 3924 | break; |
| | 3925 | |
| | 3926 | case TOKT_LBRACE: |
| | 3927 | ++brace_depth; |
| | 3928 | break; |
| | 3929 | |
| | 3930 | case TOKT_RBRACE: |
| | 3931 | --brace_depth; |
| | 3932 | break; |
| | 3933 | |
| | 3934 | case TOKT_LBRACK: |
| | 3935 | ++bracket_depth; |
| | 3936 | break; |
| | 3937 | |
| | 3938 | case TOKT_RBRACK: |
| | 3939 | --bracket_depth; |
| | 3940 | break; |
| | 3941 | |
| | 3942 | default: |
| | 3943 | break; |
| | 3944 | } |
| | 3945 | |
| | 3946 | /* get the next token */ |
| | 3947 | typ = next_on_line(srcbuf, src, &tok, ¯o_in_embedding_, |
| | 3948 | TRUE); |
| | 3949 | |
| | 3950 | /* |
| | 3951 | * if we're at the end of the line, and we're allowed to |
| | 3952 | * read more, splice the next line onto the current line |
| | 3953 | */ |
| | 3954 | if (typ == TOKT_EOF && read_more) |
| | 3955 | { |
| | 3956 | /* splice a line */ |
| | 3957 | typ = actual_splice_next_line(srcbuf, src, &tok); |
| | 3958 | |
| | 3959 | /* note that we've done some line splicing */ |
| | 3960 | spliced = TRUE; |
| | 3961 | } |
| | 3962 | |
| | 3963 | /* if we've reached the end of the file, stop */ |
| | 3964 | if (typ == TOKT_EOF) |
| | 3965 | break; |
| | 3966 | } |
| | 3967 | |
| | 3968 | /* if we've reached the end of the file, stop */ |
| | 3969 | if (typ == TOKT_EOF) |
| | 3970 | break; |
| | 3971 | |
| | 3972 | /* remove any trailing whitespace from the actual's text */ |
| | 3973 | sp_cnt = 0; |
| | 3974 | p.set((char *)tok.get_text()); |
| | 3975 | while (p.getptr() > srcbuf->get_text() + argofs[argc]) |
| | 3976 | { |
| | 3977 | wchar_t ch; |
| | 3978 | |
| | 3979 | /* move to the prior character */ |
| | 3980 | p.dec(); |
| | 3981 | |
| | 3982 | /* if it's not a space, stop looking */ |
| | 3983 | ch = p.getch(); |
| | 3984 | if (!is_space(ch)) |
| | 3985 | { |
| | 3986 | /* |
| | 3987 | * advance past this character so that we keep it in the |
| | 3988 | * expansion |
| | 3989 | */ |
| | 3990 | p.inc(); |
| | 3991 | |
| | 3992 | /* |
| | 3993 | * if this last character was a backslash, and we removed |
| | 3994 | * at least one space following it, keep the one space |
| | 3995 | * that immediately follows the backslash, since that |
| | 3996 | * space is part of the backslash's two-character escape |
| | 3997 | * sequence |
| | 3998 | */ |
| | 3999 | if (ch == '\\' && sp_cnt != 0) |
| | 4000 | p.inc(); |
| | 4001 | |
| | 4002 | /* stop scanning */ |
| | 4003 | break; |
| | 4004 | } |
| | 4005 | |
| | 4006 | /* that's one more trailing space we've removed - count it */ |
| | 4007 | ++sp_cnt; |
| | 4008 | } |
| | 4009 | |
| | 4010 | /* note the argument length */ |
| | 4011 | arglen[argc] = (p.getptr() - srcbuf->get_text()) - argofs[argc]; |
| | 4012 | |
| | 4013 | /* count the argument */ |
| | 4014 | ++argc; |
| | 4015 | |
| | 4016 | /* check for another argument */ |
| | 4017 | if (typ == TOKT_COMMA) |
| | 4018 | { |
| | 4019 | /* remember the offset of the start of this argument */ |
| | 4020 | argofs[argc] = tok.get_text() + tok.get_text_len() |
| | 4021 | - srcbuf->get_text(); |
| | 4022 | |
| | 4023 | /* skip the comma and go back for another argument */ |
| | 4024 | typ = next_on_line(srcbuf, src, &tok, ¯o_in_embedding_, |
| | 4025 | TRUE); |
| | 4026 | } |
| | 4027 | else if (typ == TOKT_RPAR) |
| | 4028 | { |
| | 4029 | /* |
| | 4030 | * No need to look any further. Note that we don't want to |
| | 4031 | * get another token, since we're done parsing the input |
| | 4032 | * now, and we want to leave the token stream positioned for |
| | 4033 | * the caller just after the extent of the macro, which, in |
| | 4034 | * the case of this function-like macro, ends with the |
| | 4035 | * closing paren. |
| | 4036 | */ |
| | 4037 | break; |
| | 4038 | } |
| | 4039 | } |
| | 4040 | |
| | 4041 | /* if we didn't find the right paren, flag the error */ |
| | 4042 | if (typ != TOKT_RPAR) |
| | 4043 | { |
| | 4044 | log_error(read_more |
| | 4045 | ? TCERR_PP_MACRO_ARG_RPAR : TCERR_PP_MACRO_ARG_RPAR_1LINE, |
| | 4046 | (int)entry->getlen(), entry->getstr()); |
| | 4047 | return 1; |
| | 4048 | } |
| | 4049 | |
| | 4050 | /* remove leading and trailing whitespace from each argument */ |
| | 4051 | for (i = 0 ; i < argc ; ++i) |
| | 4052 | { |
| | 4053 | const char *start; |
| | 4054 | const char *end; |
| | 4055 | utf8_ptr p; |
| | 4056 | size_t del_len; |
| | 4057 | int sp_cnt; |
| | 4058 | |
| | 4059 | /* figure the limits of the argument text */ |
| | 4060 | start = srcbuf->get_text() + argofs[i]; |
| | 4061 | end = start + arglen[i]; |
| | 4062 | |
| | 4063 | /* remove leading whitespace */ |
| | 4064 | for (p.set((char *)start) ; p.getptr() < end && is_space(p.getch()) ; |
| | 4065 | p.inc()) ; |
| | 4066 | |
| | 4067 | /* set the new offset and length */ |
| | 4068 | del_len = p.getptr() - start; |
| | 4069 | argofs[i] += del_len; |
| | 4070 | arglen[i] -= del_len; |
| | 4071 | start += del_len; |
| | 4072 | |
| | 4073 | /* remove trailing whitespace */ |
| | 4074 | p.set((char *)end); |
| | 4075 | sp_cnt = 0; |
| | 4076 | while (p.getptr() > start) |
| | 4077 | { |
| | 4078 | wchar_t ch; |
| | 4079 | |
| | 4080 | /* go to the prior character */ |
| | 4081 | p.dec(); |
| | 4082 | |
| | 4083 | /* if it's not whitespace, keep it */ |
| | 4084 | ch = p.getch(); |
| | 4085 | if (!is_space(ch)) |
| | 4086 | { |
| | 4087 | /* put the character back */ |
| | 4088 | p.inc(); |
| | 4089 | |
| | 4090 | /* |
| | 4091 | * if this is a backslash, and a space follows, keep the |
| | 4092 | * immediately following space, since it's part of the |
| | 4093 | * backslash sequence |
| | 4094 | */ |
| | 4095 | if (ch == '\\' && sp_cnt != 0) |
| | 4096 | p.inc(); |
| | 4097 | |
| | 4098 | /* we're done scanning */ |
| | 4099 | break; |
| | 4100 | } |
| | 4101 | |
| | 4102 | /* count another removed trailing space */ |
| | 4103 | ++sp_cnt; |
| | 4104 | } |
| | 4105 | |
| | 4106 | /* adjust the length */ |
| | 4107 | arglen[i] -= (end - p.getptr()); |
| | 4108 | } |
| | 4109 | |
| | 4110 | /* |
| | 4111 | * if we did any line splicing, cut off the rest of the line and |
| | 4112 | * push it back into the logical input stream as a new line - this |
| | 4113 | * will allow better error message positioning if errors occur in |
| | 4114 | * the remainder of the line, since this means we'll only |
| | 4115 | * artificially join onto one line the part of the new line that |
| | 4116 | * contained the macro parameters |
| | 4117 | */ |
| | 4118 | if (spliced) |
| | 4119 | unsplice_line(tok.get_text() + tok.get_text_len()); |
| | 4120 | |
| | 4121 | /* make sure we found enough arguments */ |
| | 4122 | if (argc < entry->get_min_argc()) |
| | 4123 | { |
| | 4124 | /* fill in the remaining arguments with empty strings */ |
| | 4125 | for ( ; argc < entry->get_argc() ; ++argc) |
| | 4126 | { |
| | 4127 | argofs[argc] = 0; |
| | 4128 | arglen[argc] = 0; |
| | 4129 | } |
| | 4130 | |
| | 4131 | /* note the error, but proceed with empty arguments */ |
| | 4132 | log_warning(TCERR_PP_FEW_MACRO_ARGS, |
| | 4133 | (int)entry->getlen(), entry->getstr()); |
| | 4134 | } |
| | 4135 | |
| | 4136 | /* |
| | 4137 | * if we have varargs, always supply an empty marker for the last |
| | 4138 | * argument |
| | 4139 | */ |
| | 4140 | if (entry->has_varargs() && argc < TOK_MAX_MACRO_ARGS) |
| | 4141 | { |
| | 4142 | argofs[argc] = 0; |
| | 4143 | arglen[argc] = 0; |
| | 4144 | } |
| | 4145 | |
| | 4146 | /* success - we found an actual parameter list */ |
| | 4147 | *found_actuals = TRUE; |
| | 4148 | return 0; |
| | 4149 | } |
| | 4150 | |
| | 4151 | /* |
| | 4152 | * Splice a line for macro actual parameters. Sets the source pointer |
| | 4153 | * to the start of the new line. Reads the first token on the spliced |
| | 4154 | * line and returns it. |
| | 4155 | * |
| | 4156 | * We will splice new lines until we find a non-empty line or reach the |
| | 4157 | * end of the input. If this returns EOF, it indicates that we've |
| | 4158 | * reached the end of the entire input. |
| | 4159 | */ |
| | 4160 | tc_toktyp_t CTcTokenizer:: |
| | 4161 | actual_splice_next_line(const CTcTokString *srcbuf, |
| | 4162 | utf8_ptr *src, CTcToken *tok) |
| | 4163 | { |
| | 4164 | /* add a space onto the end of the current line */ |
| | 4165 | linebuf_.append(" ", 1); |
| | 4166 | |
| | 4167 | /* keep going until we find a non-empty line */ |
| | 4168 | for (;;) |
| | 4169 | { |
| | 4170 | int new_line_ofs; |
| | 4171 | tc_toktyp_t typ; |
| | 4172 | |
| | 4173 | /* splice the next line onto the current line */ |
| | 4174 | new_line_ofs = read_line(TRUE); |
| | 4175 | |
| | 4176 | /* |
| | 4177 | * make sure we read additional lines as needed to complete any |
| | 4178 | * strings left open at the end of the line |
| | 4179 | */ |
| | 4180 | if (in_quote_ != '\0') |
| | 4181 | splice_string(); |
| | 4182 | |
| | 4183 | /* if there was no more, return end of file */ |
| | 4184 | if (new_line_ofs == -1) |
| | 4185 | return TOKT_EOF; |
| | 4186 | |
| | 4187 | /* set the source to the start of the additional line */ |
| | 4188 | src->set((char *)linebuf_.get_text() + new_line_ofs); |
| | 4189 | |
| | 4190 | /* get the next token */ |
| | 4191 | typ = next_on_line(srcbuf, src, tok, ¯o_in_embedding_, TRUE); |
| | 4192 | |
| | 4193 | /* if we didn't get EOF, it means we found a non-empty line */ |
| | 4194 | if (typ != TOKT_EOF) |
| | 4195 | return typ; |
| | 4196 | } |
| | 4197 | } |
| | 4198 | |
| | 4199 | /* |
| | 4200 | * Substitute the actual parameters in a macro's expansion |
| | 4201 | */ |
| | 4202 | int CTcTokenizer::substitute_macro_actuals(CTcMacroRsc *rsc, |
| | 4203 | CTcTokString *subexp, |
| | 4204 | CTcHashEntryPp *entry, |
| | 4205 | const CTcTokString *srcbuf, |
| | 4206 | const size_t *argofs, |
| | 4207 | const size_t *arglen, |
| | 4208 | int allow_defined) |
| | 4209 | { |
| | 4210 | const char *start; |
| | 4211 | utf8_ptr expsrc; |
| | 4212 | CTcToken prvtok; |
| | 4213 | CTcToken prvprvtok; |
| | 4214 | CTcToken tok; |
| | 4215 | tc_toktyp_t typ; |
| | 4216 | const CVmHashTable *actuals; |
| | 4217 | CTcTokString *actual_exp_buf; |
| | 4218 | const size_t expand_max = 10; |
| | 4219 | static struct expand_info_t |
| | 4220 | { |
| | 4221 | /* type of expansion (#foreach, #ifempty, #ifnempty) */ |
| | 4222 | tc_toktyp_t typ; |
| | 4223 | |
| | 4224 | /* |
| | 4225 | * flag: this is an iterator type (if this is true, the varargs |
| | 4226 | * formal should be expanded to the current argument given by our |
| | 4227 | * 'arg' member; if this is false, the varargs formal should be |
| | 4228 | * expanded as the full varargs list) |
| | 4229 | */ |
| | 4230 | int is_iterator; |
| | 4231 | |
| | 4232 | /* the marker character that delimits the foreach arguments */ |
| | 4233 | wchar_t delim; |
| | 4234 | |
| | 4235 | /* location of start of expansion region for foreach */ |
| | 4236 | utf8_ptr start; |
| | 4237 | |
| | 4238 | /* current argument index */ |
| | 4239 | int arg; |
| | 4240 | |
| | 4241 | /* the current expansion part (0 = first part, etc) */ |
| | 4242 | int part; |
| | 4243 | } |
| | 4244 | expand_stack[expand_max], *expand_sp; |
| | 4245 | |
| | 4246 | /* get the actuals table */ |
| | 4247 | actuals = entry->get_params_table(); |
| | 4248 | |
| | 4249 | /* get the actual expansion buffer from the resource object */ |
| | 4250 | actual_exp_buf = &rsc->actual_exp_buf_; |
| | 4251 | |
| | 4252 | /* |
| | 4253 | * Scan the replacement text for formals, and replace each formal |
| | 4254 | * with the actual. Set up a pointer at the start of the expansion |
| | 4255 | * text. |
| | 4256 | */ |
| | 4257 | start = entry->get_expansion(); |
| | 4258 | expsrc.set((char *)start); |
| | 4259 | |
| | 4260 | /* we don't yet have a previous token */ |
| | 4261 | prvtok.settyp(TOKT_EOF); |
| | 4262 | prvprvtok.settyp(TOKT_EOF); |
| | 4263 | |
| | 4264 | /* clear the expansion buffer */ |
| | 4265 | subexp->clear_text(); |
| | 4266 | |
| | 4267 | /* we have no #foreach/#ifempty/#ifnempty stack yet */ |
| | 4268 | expand_sp = expand_stack; |
| | 4269 | |
| | 4270 | /* scan the tokens in the expansion text */ |
| | 4271 | for (typ = next_on_line(&expsrc, &tok, ¯o_in_embedding_, TRUE) ; |
| | 4272 | typ != TOKT_EOF ; ) |
| | 4273 | { |
| | 4274 | /* |
| | 4275 | * check to see if we've reached the end of a |
| | 4276 | * #foreach/#ifempty/#ifnempty |
| | 4277 | */ |
| | 4278 | if (expand_sp != expand_stack) |
| | 4279 | { |
| | 4280 | /* check to see if we're at the delimiter */ |
| | 4281 | if (utf8_ptr::s_getch(tok.get_text()) == (expand_sp-1)->delim) |
| | 4282 | { |
| | 4283 | /* copy the prior expansion so far */ |
| | 4284 | if (tok.get_text() > start) |
| | 4285 | subexp->append(start, tok.get_text() - start); |
| | 4286 | |
| | 4287 | /* go back to the start of the token */ |
| | 4288 | expsrc.set((char *)tok.get_text()); |
| | 4289 | |
| | 4290 | /* see what kind of token we're expanding */ |
| | 4291 | switch((expand_sp-1)->typ) |
| | 4292 | { |
| | 4293 | case TOKT_MACRO_FOREACH: |
| | 4294 | /* it's a #foreach - process the appropriate part */ |
| | 4295 | switch ((expand_sp-1)->part) |
| | 4296 | { |
| | 4297 | case 0: |
| | 4298 | /* |
| | 4299 | * We've been doing the first part, which is the |
| | 4300 | * main expansion per actual. This delimiter thus |
| | 4301 | * introduces the 'between' portion, which we copy |
| | 4302 | * between each iteration, but not after the last |
| | 4303 | * iteration. So, if we've just done the last |
| | 4304 | * actual, skip this part entirely; otherwise, |
| | 4305 | * keep going, using this part. |
| | 4306 | */ |
| | 4307 | if (argofs[(expand_sp-1)->arg + 1] == 0) |
| | 4308 | { |
| | 4309 | /* skip this one remaining part */ |
| | 4310 | skip_delimited_group(&expsrc, 1); |
| | 4311 | |
| | 4312 | /* we're finished with the iteration */ |
| | 4313 | goto end_foreach; |
| | 4314 | } |
| | 4315 | else |
| | 4316 | { |
| | 4317 | /* |
| | 4318 | * we have more arguments, so we want to |
| | 4319 | * expand this part - skip the deliter and |
| | 4320 | * keep going |
| | 4321 | */ |
| | 4322 | expsrc.inc(); |
| | 4323 | |
| | 4324 | /* we're now in the next part of the iterator */ |
| | 4325 | (expand_sp-1)->part++; |
| | 4326 | } |
| | 4327 | break; |
| | 4328 | |
| | 4329 | case 1: |
| | 4330 | /* |
| | 4331 | * We've reached the end of the entire #foreach |
| | 4332 | * string, so we're done with this iteration. |
| | 4333 | * Skip the delimiter. |
| | 4334 | */ |
| | 4335 | expsrc.inc(); |
| | 4336 | |
| | 4337 | end_foreach: |
| | 4338 | /* |
| | 4339 | * if we have more arguments, start over with the |
| | 4340 | * next iteration; otherwise, pop the #foreach |
| | 4341 | * level |
| | 4342 | */ |
| | 4343 | if (argofs[(expand_sp-1)->arg + 1] == 0) |
| | 4344 | { |
| | 4345 | /* no more arguments - pop the #foreach level */ |
| | 4346 | --expand_sp; |
| | 4347 | } |
| | 4348 | else |
| | 4349 | { |
| | 4350 | /* we have more arguments - move to the next */ |
| | 4351 | (expand_sp-1)->arg++; |
| | 4352 | |
| | 4353 | /* go back to the start of the expansion */ |
| | 4354 | expsrc = (expand_sp-1)->start; |
| | 4355 | |
| | 4356 | /* we have no previous token for pasting ops */ |
| | 4357 | prvtok.settyp(TOKT_EOF); |
| | 4358 | prvprvtok.settyp(TOKT_EOF); |
| | 4359 | |
| | 4360 | /* we're back in the first part of the iterator */ |
| | 4361 | (expand_sp-1)->part = 0; |
| | 4362 | } |
| | 4363 | break; |
| | 4364 | } |
| | 4365 | break; |
| | 4366 | |
| | 4367 | case TOKT_MACRO_IFEMPTY: |
| | 4368 | case TOKT_MACRO_IFNEMPTY: |
| | 4369 | /* |
| | 4370 | * #ifempty or #ifnempty - we've reached the end of |
| | 4371 | * the conditional text, so simply pop a level and |
| | 4372 | * keep going after the delimiter |
| | 4373 | */ |
| | 4374 | |
| | 4375 | /* skip the delimiter */ |
| | 4376 | expsrc.inc(); |
| | 4377 | |
| | 4378 | /* pop a level */ |
| | 4379 | --expand_sp; |
| | 4380 | |
| | 4381 | /* done */ |
| | 4382 | break; |
| | 4383 | |
| | 4384 | default: |
| | 4385 | break; |
| | 4386 | } |
| | 4387 | |
| | 4388 | /* the next chunk starts here */ |
| | 4389 | start = expsrc.getptr(); |
| | 4390 | |
| | 4391 | /* get the next token */ |
| | 4392 | typ = next_on_line(&expsrc, &tok, ¯o_in_embedding_, TRUE); |
| | 4393 | |
| | 4394 | /* we have the next token, so back and process it */ |
| | 4395 | continue; |
| | 4396 | } |
| | 4397 | } |
| | 4398 | |
| | 4399 | /* if it's a #foreach marker, start a #foreach iteration */ |
| | 4400 | if (typ == TOKT_MACRO_FOREACH && entry->has_varargs()) |
| | 4401 | { |
| | 4402 | /* copy the prior expansion so far */ |
| | 4403 | if (tok.get_text() > start) |
| | 4404 | subexp->append(start, tok.get_text() - start); |
| | 4405 | |
| | 4406 | /* push a #foreach level, if possible */ |
| | 4407 | if (expand_sp - expand_stack >= expand_max) |
| | 4408 | { |
| | 4409 | /* |
| | 4410 | * we can't create another level - log an error and ignore |
| | 4411 | * this new level |
| | 4412 | */ |
| | 4413 | log_error(TCERR_PP_FOREACH_TOO_DEEP); |
| | 4414 | } |
| | 4415 | else if (argofs[entry->get_argc() - 1] == 0) |
| | 4416 | { |
| | 4417 | /* |
| | 4418 | * we have no actuals for the variable part of the |
| | 4419 | * formals, so we must iterate zero times through the |
| | 4420 | * #foreach part - in other words, simply skip ahead to |
| | 4421 | * the end of the #foreach |
| | 4422 | */ |
| | 4423 | skip_delimited_group(&expsrc, 2); |
| | 4424 | } |
| | 4425 | else |
| | 4426 | { |
| | 4427 | /* remember and skip the marker character */ |
| | 4428 | expand_sp->delim = expsrc.getch(); |
| | 4429 | expsrc.inc(); |
| | 4430 | |
| | 4431 | /* set the expansion type */ |
| | 4432 | expand_sp->typ = typ; |
| | 4433 | |
| | 4434 | /* |
| | 4435 | * remember the position where the #foreach started, since |
| | 4436 | * we need to come back here for each use of the variable |
| | 4437 | */ |
| | 4438 | expand_sp->start = expsrc; |
| | 4439 | |
| | 4440 | /* we're an iterator type */ |
| | 4441 | expand_sp->is_iterator = TRUE; |
| | 4442 | |
| | 4443 | /* |
| | 4444 | * Start at the first argument in the variable part of the |
| | 4445 | * argument list. The last formal corresponds to the |
| | 4446 | * first variable argument. |
| | 4447 | */ |
| | 4448 | expand_sp->arg = entry->get_argc() - 1; |
| | 4449 | |
| | 4450 | /* we're in the main expansion part of the expression */ |
| | 4451 | expand_sp->part = 0; |
| | 4452 | |
| | 4453 | /* push the new level */ |
| | 4454 | ++expand_sp; |
| | 4455 | } |
| | 4456 | |
| | 4457 | /* the next chunk starts here */ |
| | 4458 | start = expsrc.getptr(); |
| | 4459 | |
| | 4460 | /* get the next token */ |
| | 4461 | typ = next_on_line(&expsrc, &tok, ¯o_in_embedding_, TRUE); |
| | 4462 | |
| | 4463 | /* we have the next token, so back and process it */ |
| | 4464 | continue; |
| | 4465 | } |
| | 4466 | |
| | 4467 | /* if it's a varargs #ifempty or #ifnempty flag, expand it */ |
| | 4468 | if ((typ == TOKT_MACRO_IFEMPTY || typ == TOKT_MACRO_IFNEMPTY) |
| | 4469 | && entry->has_varargs()) |
| | 4470 | { |
| | 4471 | int is_empty; |
| | 4472 | int expand; |
| | 4473 | |
| | 4474 | /* copy the prior expansion so far */ |
| | 4475 | if (tok.get_text() > start) |
| | 4476 | subexp->append(start, tok.get_text() - start); |
| | 4477 | |
| | 4478 | /* determine if the varargs list is empty or not */ |
| | 4479 | is_empty = (argofs[entry->get_argc() - 1] == 0); |
| | 4480 | |
| | 4481 | /* |
| | 4482 | * decide whether or not expand it, according to the empty |
| | 4483 | * state and the flag type |
| | 4484 | */ |
| | 4485 | expand = ((is_empty && typ == TOKT_MACRO_IFEMPTY) |
| | 4486 | || (!is_empty && typ == TOKT_MACRO_IFNEMPTY)); |
| | 4487 | |
| | 4488 | /* |
| | 4489 | * if we're going to expand it, push a level; otherwise, just |
| | 4490 | * skip the entire expansion |
| | 4491 | */ |
| | 4492 | if (expand) |
| | 4493 | { |
| | 4494 | /* make sure we have room for another level */ |
| | 4495 | if (expand_sp - expand_stack >= expand_max) |
| | 4496 | { |
| | 4497 | /* no room - log an error and ignore the new level */ |
| | 4498 | log_error(TCERR_PP_FOREACH_TOO_DEEP); |
| | 4499 | } |
| | 4500 | else |
| | 4501 | { |
| | 4502 | /* remember and skip the delimiter */ |
| | 4503 | expand_sp->delim = expsrc.getch(); |
| | 4504 | expsrc.inc(); |
| | 4505 | |
| | 4506 | /* |
| | 4507 | * we're not an iterator type, so inherit the |
| | 4508 | * enclosing level's meaning of the varargs formal |
| | 4509 | */ |
| | 4510 | if (expand_sp - expand_stack == 0) |
| | 4511 | { |
| | 4512 | /* outermost level - use the whole varargs list */ |
| | 4513 | expand_sp->is_iterator = FALSE; |
| | 4514 | } |
| | 4515 | else |
| | 4516 | { |
| | 4517 | /* use the enclosing level's meaning */ |
| | 4518 | expand_sp->is_iterator = (expand_sp-1)->is_iterator; |
| | 4519 | expand_sp->arg = (expand_sp-1)->arg; |
| | 4520 | } |
| | 4521 | |
| | 4522 | /* set the expansion type */ |
| | 4523 | expand_sp->typ = typ; |
| | 4524 | |
| | 4525 | /* push the new level */ |
| | 4526 | ++expand_sp; |
| | 4527 | } |
| | 4528 | } |
| | 4529 | else |
| | 4530 | { |
| | 4531 | /* not expanding - just skip the entire expansion */ |
| | 4532 | skip_delimited_group(&expsrc, 1); |
| | 4533 | } |
| | 4534 | |
| | 4535 | /* the next chunk starts here */ |
| | 4536 | start = expsrc.getptr(); |
| | 4537 | |
| | 4538 | /* get the next token */ |
| | 4539 | typ = next_on_line(&expsrc, &tok, ¯o_in_embedding_, TRUE); |
| | 4540 | |
| | 4541 | /* we have the next token, so back and process it */ |
| | 4542 | continue; |
| | 4543 | } |
| | 4544 | |
| | 4545 | /* if it's a varargs #argcount indicator, expand it */ |
| | 4546 | if (typ == TOKT_MACRO_ARGCOUNT && entry->has_varargs()) |
| | 4547 | { |
| | 4548 | char buf[20]; |
| | 4549 | int i; |
| | 4550 | |
| | 4551 | /* copy the prior expansion so far */ |
| | 4552 | if (tok.get_text() > start) |
| | 4553 | subexp->append(start, tok.get_text() - start); |
| | 4554 | |
| | 4555 | /* |
| | 4556 | * count the number of arguments after and including the |
| | 4557 | * variable argument placeholder |
| | 4558 | */ |
| | 4559 | for (i = entry->get_argc() - 1 ; argofs[i] != 0 ; ++i) ; |
| | 4560 | |
| | 4561 | /* make a string out of the variable argument count */ |
| | 4562 | sprintf(buf, "%d", i - (entry->get_argc() - 1)); |
| | 4563 | |
| | 4564 | /* add the argument count to the output buffer */ |
| | 4565 | subexp->append(buf, strlen(buf)); |
| | 4566 | |
| | 4567 | /* the next chunk starts after the #argcount */ |
| | 4568 | start = expsrc.getptr(); |
| | 4569 | |
| | 4570 | /* get the next token */ |
| | 4571 | typ = next_on_line(&expsrc, &tok, ¯o_in_embedding_, TRUE); |
| | 4572 | |
| | 4573 | /* we have the next token, so back and process it */ |
| | 4574 | continue; |
| | 4575 | } |
| | 4576 | |
| | 4577 | /* if it's a symbol, check for an actual */ |
| | 4578 | if (typ == TOKT_MACRO_FORMAL) |
| | 4579 | { |
| | 4580 | const char *p; |
| | 4581 | int argnum; |
| | 4582 | size_t argnum_len; |
| | 4583 | int pasting; |
| | 4584 | int pasting_at_left, pasting_at_right; |
| | 4585 | int stringize; |
| | 4586 | char stringize_qu; |
| | 4587 | tc_toktyp_t stringize_type; |
| | 4588 | CTcToken paste_at_right_tok; |
| | 4589 | |
| | 4590 | /* assume we'll copy up to the start of this token */ |
| | 4591 | p = tok.get_text(); |
| | 4592 | |
| | 4593 | /* |
| | 4594 | * get the index of the actual in the argument vector -- |
| | 4595 | * this is given by the second byte of the special macro |
| | 4596 | * parameter flag token |
| | 4597 | */ |
| | 4598 | argnum = (int)(uchar)tok.get_text()[1] - 1; |
| | 4599 | |
| | 4600 | /* |
| | 4601 | * If we have varargs, and this is the varargs argument, and |
| | 4602 | * the current #foreach stack level indicates that we're |
| | 4603 | * iterating through the varargs list, treat this as a |
| | 4604 | * reference to the current argument in the iteration. |
| | 4605 | */ |
| | 4606 | if (expand_sp != expand_stack |
| | 4607 | && argnum == entry->get_argc() - 1 |
| | 4608 | && (expand_sp-1)->is_iterator) |
| | 4609 | { |
| | 4610 | /* |
| | 4611 | * we're on a #foreach iterator, and this is the varargs |
| | 4612 | * formal - use the current #foreach iteration element |
| | 4613 | * instead |
| | 4614 | */ |
| | 4615 | argnum = (expand_sp-1)->arg; |
| | 4616 | } |
| | 4617 | |
| | 4618 | /* |
| | 4619 | * Get the length of this argument. If we have varargs, and |
| | 4620 | * this is the last formal, which is the placeholder for the |
| | 4621 | * variable argument list, and we're not in a #foreach |
| | 4622 | * iterator, the value is the value of the entire string of |
| | 4623 | * variable arguments, including the commas. |
| | 4624 | */ |
| | 4625 | if (expand_sp == expand_stack |
| | 4626 | && entry->has_varargs() |
| | 4627 | && argnum == entry->get_argc() - 1) |
| | 4628 | { |
| | 4629 | int i; |
| | 4630 | |
| | 4631 | /* |
| | 4632 | * It's the full varargs list - use the length from the |
| | 4633 | * first varargs argument to the last. Find the last |
| | 4634 | * argument. |
| | 4635 | */ |
| | 4636 | for (i = argnum ; |
| | 4637 | i < TOK_MAX_MACRO_ARGS && argofs[i] != 0 ; ++i) ; |
| | 4638 | |
| | 4639 | /* |
| | 4640 | * The full list length is the distance from the offset of |
| | 4641 | * the first to the end of the last. If there are no |
| | 4642 | * varargs arguments at all, the length is zero. |
| | 4643 | */ |
| | 4644 | if (i == argnum) |
| | 4645 | argnum_len = 0; |
| | 4646 | else |
| | 4647 | argnum_len = argofs[i-1] + arglen[i-1] - argofs[argnum]; |
| | 4648 | } |
| | 4649 | else |
| | 4650 | { |
| | 4651 | /* |
| | 4652 | * it's not the full varargs list, so just use the length |
| | 4653 | * of this single actual |
| | 4654 | */ |
| | 4655 | argnum_len = arglen[argnum]; |
| | 4656 | } |
| | 4657 | |
| | 4658 | /* assume we won't do any token pasting or stringizing */ |
| | 4659 | pasting = pasting_at_left = pasting_at_right = FALSE; |
| | 4660 | stringize = FALSE; |
| | 4661 | |
| | 4662 | /* |
| | 4663 | * if the previous token was a token-pasting operator, |
| | 4664 | * remove it and any preceding whitespace from the source |
| | 4665 | * material, since we want to append the actual parameter |
| | 4666 | * text directly after the preceding token |
| | 4667 | */ |
| | 4668 | check_paste_left: |
| | 4669 | if (prvtok.gettyp() == TOKT_POUNDPOUND) |
| | 4670 | { |
| | 4671 | wchar_t prv_ch; |
| | 4672 | |
| | 4673 | /* |
| | 4674 | * note that we have token pasting - we're pasting |
| | 4675 | * something to the left of this token (since we had a |
| | 4676 | * "##" before this token |
| | 4677 | */ |
| | 4678 | pasting = TRUE; |
| | 4679 | pasting_at_left = TRUE; |
| | 4680 | |
| | 4681 | /* go back to the ## token */ |
| | 4682 | p = prvtok.get_text(); |
| | 4683 | |
| | 4684 | /* remove any preceding whitespace */ |
| | 4685 | for (prv_ch = 0 ; p > start ; ) |
| | 4686 | { |
| | 4687 | const char *prvp; |
| | 4688 | |
| | 4689 | /* get the previous character */ |
| | 4690 | prvp = utf8_ptr::s_dec((char *)p); |
| | 4691 | prv_ch = utf8_ptr::s_getch((char *)prvp); |
| | 4692 | |
| | 4693 | /* if it's not a space, we're done */ |
| | 4694 | if (!is_space(prv_ch)) |
| | 4695 | break; |
| | 4696 | |
| | 4697 | /* move back over this character */ |
| | 4698 | p = prvp; |
| | 4699 | } |
| | 4700 | |
| | 4701 | /* |
| | 4702 | * Weird special case: if the previous character was a |
| | 4703 | * comma, and the formal we're pasting is a variable |
| | 4704 | * argument formal (i.e., the last formal in a varargs |
| | 4705 | * macro), and the varargs list is empty, then remove the |
| | 4706 | * comma. This is a handy shorthand notation that allows |
| | 4707 | * the varargs list to be added to a comma-delimited list, |
| | 4708 | * such as a function call's actuals or the contents of a |
| | 4709 | * list. |
| | 4710 | */ |
| | 4711 | if (prv_ch == ',' |
| | 4712 | && entry->has_varargs() |
| | 4713 | && argnum == entry->get_argc() - 1 |
| | 4714 | && argofs[argnum] == 0) |
| | 4715 | { |
| | 4716 | /* |
| | 4717 | * it's the special case - move back one more |
| | 4718 | * character to delete the comma |
| | 4719 | */ |
| | 4720 | p = utf8_ptr::s_dec((char *)p); |
| | 4721 | } |
| | 4722 | } |
| | 4723 | else if (prvtok.gettyp() == TOKT_POUND |
| | 4724 | || prvtok.gettyp() == TOKT_POUNDAT) |
| | 4725 | { |
| | 4726 | /* go back to the # token */ |
| | 4727 | p = prvtok.get_text(); |
| | 4728 | |
| | 4729 | /* note that we have stringizing */ |
| | 4730 | stringize = TRUE; |
| | 4731 | stringize_type = prvtok.gettyp(); |
| | 4732 | stringize_qu = (prvtok.gettyp() == TOKT_POUND |
| | 4733 | ? '"' : '\''); |
| | 4734 | |
| | 4735 | /* go back one more token */ |
| | 4736 | prvtok = prvprvtok; |
| | 4737 | prvprvtok.settyp(TOKT_EOF); |
| | 4738 | |
| | 4739 | /* |
| | 4740 | * go back and check for pasting again, since we could |
| | 4741 | * be pasting to a stringized token |
| | 4742 | */ |
| | 4743 | goto check_paste_left; |
| | 4744 | } |
| | 4745 | |
| | 4746 | /* copy the prior expansion so far */ |
| | 4747 | if (p > start) |
| | 4748 | subexp->append(start, p - start); |
| | 4749 | |
| | 4750 | /* remember the symbol as the previous token */ |
| | 4751 | prvprvtok = prvtok; |
| | 4752 | prvtok = tok; |
| | 4753 | |
| | 4754 | /* get the next token after the formal */ |
| | 4755 | typ = next_on_line(&expsrc, &tok, ¯o_in_embedding_, TRUE); |
| | 4756 | |
| | 4757 | /* |
| | 4758 | * If it's followed by a token-pasting operator, we need to |
| | 4759 | * paste the next token directly onto the end of the text we |
| | 4760 | * just added to the buffer, skipping any intervening |
| | 4761 | * whitespace; otherwise, we want to start adding again at |
| | 4762 | * the next character after the original token. |
| | 4763 | */ |
| | 4764 | if (typ == TOKT_POUNDPOUND) |
| | 4765 | { |
| | 4766 | utf8_ptr old_expsrc; |
| | 4767 | CTcToken old_tok; |
| | 4768 | |
| | 4769 | /* note that we have pasting to the right of this token */ |
| | 4770 | pasting = TRUE; |
| | 4771 | pasting_at_right = TRUE; |
| | 4772 | |
| | 4773 | /* remember where we started */ |
| | 4774 | old_expsrc = expsrc; |
| | 4775 | |
| | 4776 | /* remember the current token for a moment */ |
| | 4777 | old_tok = tok; |
| | 4778 | |
| | 4779 | /* skip to the next token after the ## */ |
| | 4780 | typ = next_on_line(&expsrc, &tok, ¯o_in_embedding_, TRUE); |
| | 4781 | |
| | 4782 | /* remember the token we're pasting to the right */ |
| | 4783 | paste_at_right_tok = tok; |
| | 4784 | |
| | 4785 | /* check for pasting to a stringizer */ |
| | 4786 | if (stringize && typ == stringize_type) |
| | 4787 | { |
| | 4788 | /* |
| | 4789 | * leave the ## in the stream for now - we'll fix it |
| | 4790 | * up when we stringize the next token, rather than |
| | 4791 | * doing so now |
| | 4792 | */ |
| | 4793 | expsrc = old_expsrc; |
| | 4794 | tok = old_tok; |
| | 4795 | } |
| | 4796 | else |
| | 4797 | { |
| | 4798 | /* |
| | 4799 | * remember that we have a token-pasting operator, |
| | 4800 | * so that we can tell that we're pasting when we |
| | 4801 | * look at the next token |
| | 4802 | */ |
| | 4803 | prvprvtok = prvtok; |
| | 4804 | prvtok = old_tok; |
| | 4805 | } |
| | 4806 | |
| | 4807 | /* start next text from here */ |
| | 4808 | start = tok.get_text(); |
| | 4809 | } |
| | 4810 | else |
| | 4811 | { |
| | 4812 | /* Start at the end of the symbol token */ |
| | 4813 | start = prvtok.get_text() + prvtok.get_text_len(); |
| | 4814 | } |
| | 4815 | |
| | 4816 | /* |
| | 4817 | * If we're not doing any pasting, recursively expand macros |
| | 4818 | * in the actual expansion text. If we're pasting, do not |
| | 4819 | * expand any macros in the expansion, since we want to do |
| | 4820 | * the pasting before we do any expanding. |
| | 4821 | */ |
| | 4822 | if (pasting && stringize) |
| | 4823 | { |
| | 4824 | int add_open; |
| | 4825 | int add_close; |
| | 4826 | |
| | 4827 | /* presume we'll include the open and close quotes */ |
| | 4828 | add_close = TRUE; |
| | 4829 | add_open = TRUE; |
| | 4830 | |
| | 4831 | /* |
| | 4832 | * If we're pasting to the left, and the buffer so far |
| | 4833 | * ends in the same quote we're adding to this token, |
| | 4834 | * combine the strings by removing the preceding quote |
| | 4835 | * and not adding the open quote on the new string |
| | 4836 | */ |
| | 4837 | if (subexp->get_text_len() > 0 |
| | 4838 | && *(subexp->get_text_end() - 1) == stringize_qu) |
| | 4839 | { |
| | 4840 | /* remove the close quote from the expansion so far */ |
| | 4841 | subexp->set_text_len(subexp->get_text_len() - 1); |
| | 4842 | |
| | 4843 | /* don't add the open quote to the new string */ |
| | 4844 | add_open = FALSE; |
| | 4845 | } |
| | 4846 | |
| | 4847 | /* |
| | 4848 | * If we're pasting to the right, and we have a string |
| | 4849 | * of the same type following, or we will be pasting a |
| | 4850 | * stringizing pair, paste the two strings together to |
| | 4851 | * form one string by removing the close quote from this |
| | 4852 | * string and the open quote from the next string |
| | 4853 | */ |
| | 4854 | if (pasting_at_right && *tok.get_text() == stringize_qu) |
| | 4855 | add_close = FALSE; |
| | 4856 | |
| | 4857 | /* |
| | 4858 | * We're both stringizing this argument and pasting |
| | 4859 | * another token - first stringize the actual. |
| | 4860 | */ |
| | 4861 | stringize_macro_actual(subexp, |
| | 4862 | srcbuf->get_text() |
| | 4863 | + argofs[argnum], argnum_len, |
| | 4864 | stringize_qu, add_open, add_close); |
| | 4865 | |
| | 4866 | /* |
| | 4867 | * if we decided to remove the closing quote, we want to |
| | 4868 | * remove the open quote from the following string as |
| | 4869 | * well - copy in the following string without its open |
| | 4870 | * quote |
| | 4871 | */ |
| | 4872 | if (!add_close) |
| | 4873 | { |
| | 4874 | /* |
| | 4875 | * append the following token without its first |
| | 4876 | * character (its open quote) |
| | 4877 | */ |
| | 4878 | subexp->append(tok.get_text() + 1, |
| | 4879 | tok.get_text_len() - 1); |
| | 4880 | |
| | 4881 | /* move on to the next token */ |
| | 4882 | prvprvtok = prvtok; |
| | 4883 | prvtok = tok; |
| | 4884 | typ = next_on_line(&expsrc, &tok, ¯o_in_embedding_, |
| | 4885 | TRUE); |
| | 4886 | |
| | 4887 | /* start from the new token */ |
| | 4888 | start = tok.get_text(); |
| | 4889 | } |
| | 4890 | } |
| | 4891 | else if (pasting) |
| | 4892 | { |
| | 4893 | const char *argp; |
| | 4894 | size_t len; |
| | 4895 | int done; |
| | 4896 | wchar_t quote_char; |
| | 4897 | |
| | 4898 | /* get the actual argument information */ |
| | 4899 | argp = srcbuf->get_text() + argofs[argnum]; |
| | 4900 | len = argnum_len; |
| | 4901 | |
| | 4902 | /* |
| | 4903 | * if we're pasting to the left of this token, and the |
| | 4904 | * token starts with a fully-expanded flag, remove the |
| | 4905 | * flag - we're making up a new token out of this and |
| | 4906 | * what comes before, so the token that we fully |
| | 4907 | * expanded is disappearing, so the fully-expanded |
| | 4908 | * status no longer applies |
| | 4909 | */ |
| | 4910 | if (pasting_at_left && *argp == TOK_FULLY_EXPANDED_FLAG) |
| | 4911 | { |
| | 4912 | /* skip the flag */ |
| | 4913 | ++argp; |
| | 4914 | --len; |
| | 4915 | } |
| | 4916 | |
| | 4917 | /* presume we won't find any quoted strings */ |
| | 4918 | quote_char = 0; |
| | 4919 | |
| | 4920 | /* |
| | 4921 | * check for string concatenation to the left - if we're |
| | 4922 | * concatenating two strings of the same type, remove |
| | 4923 | * the adjacent quotes to make it a single string |
| | 4924 | */ |
| | 4925 | if (pasting_at_left |
| | 4926 | && subexp->get_text_len() > 0 |
| | 4927 | && (*argp == '\'' || *argp == '"') |
| | 4928 | && *(subexp->get_text_end() - 1) == *argp) |
| | 4929 | { |
| | 4930 | /* remove the close quote from the expansion so far */ |
| | 4931 | subexp->set_text_len(subexp->get_text_len() - 1); |
| | 4932 | |
| | 4933 | /* remember the quote character */ |
| | 4934 | quote_char = *argp; |
| | 4935 | |
| | 4936 | /* don't add the open quote to the new string */ |
| | 4937 | ++argp; |
| | 4938 | --len; |
| | 4939 | } |
| | 4940 | |
| | 4941 | /* presume we won't have to do anything special */ |
| | 4942 | done = FALSE; |
| | 4943 | |
| | 4944 | /* |
| | 4945 | * If we're pasting at the right, also remove any |
| | 4946 | * fully-expanded flag just before the last token in the |
| | 4947 | * expansion. |
| | 4948 | */ |
| | 4949 | if (pasting_at_right) |
| | 4950 | { |
| | 4951 | CTcToken old_tok; |
| | 4952 | CTcToken tok; |
| | 4953 | utf8_ptr p; |
| | 4954 | |
| | 4955 | /* scan for the final token in the expansion string */ |
| | 4956 | p.set((char *)argp); |
| | 4957 | old_tok.settyp(TOKT_INVALID); |
| | 4958 | while (p.getptr() < argp + len) |
| | 4959 | { |
| | 4960 | /* |
| | 4961 | * get another token - stop at EOF or if we go |
| | 4962 | * past the bounds of the expansion text |
| | 4963 | */ |
| | 4964 | if (next_on_line(&p, &tok, ¯o_in_embedding_, |
| | 4965 | TRUE) |
| | 4966 | == TOKT_EOF |
| | 4967 | || tok.get_text() >= argp + len) |
| | 4968 | break; |
| | 4969 | |
| | 4970 | /* remember the previous token */ |
| | 4971 | old_tok = tok; |
| | 4972 | } |
| | 4973 | |
| | 4974 | /* |
| | 4975 | * if the final token is a symbol, and it has the |
| | 4976 | * fully-expanded flag, we must omit the flag from |
| | 4977 | * the appended text |
| | 4978 | */ |
| | 4979 | if (old_tok.gettyp() == TOKT_SYM |
| | 4980 | && old_tok.get_fully_expanded()) |
| | 4981 | { |
| | 4982 | /* |
| | 4983 | * append up to but not including the flag byte |
| | 4984 | * preceding the final token |
| | 4985 | */ |
| | 4986 | subexp->append(argp, tok.get_text() - 1 - argp); |
| | 4987 | |
| | 4988 | /* |
| | 4989 | * append from the last token to the end of the |
| | 4990 | * expansion, skipping the flag byte |
| | 4991 | */ |
| | 4992 | subexp->append(tok.get_text(), |
| | 4993 | len - (tok.get_text() - argp)); |
| | 4994 | |
| | 4995 | /* we've done the appending */ |
| | 4996 | done = TRUE; |
| | 4997 | } |
| | 4998 | else if (quote_char != 0 |
| | 4999 | && paste_at_right_tok.get_text_len() != 0 |
| | 5000 | && *paste_at_right_tok.get_text() == quote_char) |
| | 5001 | { |
| | 5002 | /* |
| | 5003 | * we're pasting two strings together - append |
| | 5004 | * up to but not including the close quote |
| | 5005 | */ |
| | 5006 | subexp->append(argp, len - 1); |
| | 5007 | |
| | 5008 | /* |
| | 5009 | * append the next token, but do not include the |
| | 5010 | * open quote |
| | 5011 | */ |
| | 5012 | subexp->append(paste_at_right_tok.get_text() + 1, |
| | 5013 | paste_at_right_tok.get_text_len() - 1); |
| | 5014 | |
| | 5015 | /* |
| | 5016 | * restart after the right token, since we've |
| | 5017 | * now fully processed that token |
| | 5018 | */ |
| | 5019 | start = paste_at_right_tok.get_text() |
| | 5020 | + paste_at_right_tok.get_text_len(); |
| | 5021 | |
| | 5022 | /* we're done */ |
| | 5023 | done = TRUE; |
| | 5024 | } |
| | 5025 | } |
| | 5026 | |
| | 5027 | /* |
| | 5028 | * append the actual without expansion, if we haven't |
| | 5029 | * already handled it specially |
| | 5030 | */ |
| | 5031 | if (!done) |
| | 5032 | subexp->append(argp, len); |
| | 5033 | } |
| | 5034 | else if (stringize) |
| | 5035 | { |
| | 5036 | /* stringize the actual */ |
| | 5037 | stringize_macro_actual(subexp, |
| | 5038 | srcbuf->get_text() |
| | 5039 | + argofs[argnum], argnum_len, |
| | 5040 | stringize_qu, TRUE, TRUE); |
| | 5041 | } |
| | 5042 | else |
| | 5043 | { |
| | 5044 | CTcTokStringRef actual_src_buf; |
| | 5045 | |
| | 5046 | /* recursively expand macros in the actual text */ |
| | 5047 | actual_src_buf. |
| | 5048 | set_buffer(srcbuf->get_text() + argofs[argnum], |
| | 5049 | argnum_len); |
| | 5050 | if (expand_macros(&actual_src_buf, 0, actual_exp_buf, |
| | 5051 | FALSE, allow_defined, FALSE)) |
| | 5052 | return 1; |
| | 5053 | |
| | 5054 | /* |
| | 5055 | * Append the expanded actual, marking any |
| | 5056 | * fully-expanded tokens as such and removing |
| | 5057 | * end-of-expansion markers. |
| | 5058 | * |
| | 5059 | * We can't leave end-of-expansion markers in the |
| | 5060 | * expanded actual text, because end-of-expansion |
| | 5061 | * markers apply only to the current recursion level, |
| | 5062 | * and we've now exited the actual's recursion level. |
| | 5063 | * However, we must not expand further anything in the |
| | 5064 | * actual's expansion that has already been fully |
| | 5065 | * expanded. To achieve both of these goals, we switch |
| | 5066 | * here from marking the run of text (with the end |
| | 5067 | * marker) to marking individual tokens. |
| | 5068 | */ |
| | 5069 | mark_full_exp_tokens(subexp, actual_exp_buf, TRUE); |
| | 5070 | } |
| | 5071 | |
| | 5072 | /* we've already read the next token, so proceed */ |
| | 5073 | continue; |
| | 5074 | } |
| | 5075 | |
| | 5076 | /* remember the current token as the previous token */ |
| | 5077 | prvprvtok = prvtok; |
| | 5078 | prvtok = tok; |
| | 5079 | |
| | 5080 | /* get the next token of the expansion */ |
| | 5081 | typ = next_on_line(&expsrc, &tok, ¯o_in_embedding_, TRUE); |
| | 5082 | } |
| | 5083 | |
| | 5084 | /* copy the remaining replacement text */ |
| | 5085 | subexp->append(start, tok.get_text() - start); |
| | 5086 | |
| | 5087 | /* success */ |
| | 5088 | return 0; |
| | 5089 | } |
| | 5090 | |
| | 5091 | /* |
| | 5092 | * Skip the source of a delimited macro expansion area (#foreach, |
| | 5093 | * #ifempty, #ifnempty). |
| | 5094 | */ |
| | 5095 | void CTcTokenizer::skip_delimited_group(utf8_ptr *p, int parts_to_skip) |
| | 5096 | { |
| | 5097 | wchar_t delim; |
| | 5098 | |
| | 5099 | /* get the delimiter character */ |
| | 5100 | delim = p->getch(); |
| | 5101 | |
| | 5102 | /* |
| | 5103 | * if the delimiter put us at the end of the line, there's nothing to |
| | 5104 | * skip |
| | 5105 | */ |
| | 5106 | if (delim == 0 || delim == TOK_END_PP_LINE) |
| | 5107 | return; |
| | 5108 | |
| | 5109 | /* skip the delimiter */ |
| | 5110 | p->inc(); |
| | 5111 | |
| | 5112 | /* keep going until we've skipped the desired number of parts */ |
| | 5113 | while (parts_to_skip != 0) |
| | 5114 | { |
| | 5115 | wchar_t ch; |
| | 5116 | |
| | 5117 | /* read the next character */ |
| | 5118 | ch = p->getch(); |
| | 5119 | |
| | 5120 | /* if it's the end of the line, give up */ |
| | 5121 | if (ch == 0 || ch == TOK_END_PP_LINE) |
| | 5122 | { |
| | 5123 | /* |
| | 5124 | * we ran out of input before reaching the delimiter, so this |
| | 5125 | * is implicitly the end of it |
| | 5126 | */ |
| | 5127 | return; |
| | 5128 | } |
| | 5129 | |
| | 5130 | /* check what we have */ |
| | 5131 | if (ch == delim) |
| | 5132 | { |
| | 5133 | /* that's one less part to skip */ |
| | 5134 | --parts_to_skip; |
| | 5135 | |
| | 5136 | /* skip it */ |
| | 5137 | p->inc(); |
| | 5138 | } |
| | 5139 | else if (ch == TOK_MACRO_FOREACH_FLAG) |
| | 5140 | { |
| | 5141 | /* it's a nested #foreach - skip all of its parts */ |
| | 5142 | skip_delimited_group(p, 2); |
| | 5143 | } |
| | 5144 | else if (ch == TOK_MACRO_IFEMPTY_FLAG |
| | 5145 | || ch == TOK_MACRO_IFNEMPTY_FLAG) |
| | 5146 | { |
| | 5147 | /* nested #ifempty or #ifnempty - skip its expansion */ |
| | 5148 | skip_delimited_group(p, 1); |
| | 5149 | } |
| | 5150 | else |
| | 5151 | { |
| | 5152 | /* it's nothing special to us - skip it */ |
| | 5153 | p->inc(); |
| | 5154 | } |
| | 5155 | } |
| | 5156 | } |
| | 5157 | |
| | 5158 | /* |
| | 5159 | * Stringize a macro actual parameter value into a macro expansion |
| | 5160 | * buffer |
| | 5161 | */ |
| | 5162 | void CTcTokenizer::stringize_macro_actual(CTcTokString *expbuf, |
| | 5163 | const char *actual_val, |
| | 5164 | size_t actual_len, char quote_char, |
| | 5165 | int add_open_quote, |
| | 5166 | int add_close_quote) |
| | 5167 | { |
| | 5168 | utf8_ptr src; |
| | 5169 | const char *start; |
| | 5170 | int in_inner_quote; |
| | 5171 | wchar_t inner_quote_char; |
| | 5172 | wchar_t prvch; |
| | 5173 | |
| | 5174 | /* add the open quote if desired */ |
| | 5175 | if (add_open_quote) |
| | 5176 | expbuf->append("e_char, 1); |
| | 5177 | |
| | 5178 | /* remember the start of the current segment */ |
| | 5179 | start = actual_val; |
| | 5180 | |
| | 5181 | /* |
| | 5182 | * add the characters of the actual parameter value, quoting any |
| | 5183 | * quotes or backslashes |
| | 5184 | */ |
| | 5185 | for (src.set((char *)actual_val), |
| | 5186 | in_inner_quote = FALSE, inner_quote_char = '\0', prvch = '\0' ; |
| | 5187 | src.getptr() < actual_val + actual_len ; ) |
| | 5188 | { |
| | 5189 | wchar_t cur; |
| | 5190 | |
| | 5191 | /* get this character */ |
| | 5192 | cur = src.getch(); |
| | 5193 | |
| | 5194 | /* compress runs of whitespace to single spaces */ |
| | 5195 | if (is_space(cur) && prvch != '\\') |
| | 5196 | { |
| | 5197 | /* append up to this character */ |
| | 5198 | if (src.getptr() > start) |
| | 5199 | expbuf->append(start, src.getptr() - start); |
| | 5200 | |
| | 5201 | /* find the next non-space character */ |
| | 5202 | for ( ; src.getptr() < actual_val + actual_len ; src.inc()) |
| | 5203 | { |
| | 5204 | if (!is_space(src.getch())) |
| | 5205 | break; |
| | 5206 | } |
| | 5207 | |
| | 5208 | /* |
| | 5209 | * if we're not at the start or end of the string, add a |
| | 5210 | * single space to replace the entire run of whitespace -- |
| | 5211 | * don't do this at the start or end of the string, since |
| | 5212 | * we must remove leading and trailing whitespace |
| | 5213 | */ |
| | 5214 | if (prvch != '\0' && src.getptr() < actual_val + actual_len) |
| | 5215 | expbuf->append(" ", 1); |
| | 5216 | |
| | 5217 | /* note that the previous character is a space */ |
| | 5218 | prvch = cur; |
| | 5219 | |
| | 5220 | /* this is the new starting point */ |
| | 5221 | start = src.getptr(); |
| | 5222 | |
| | 5223 | /* proceed - we're already at the next character */ |
| | 5224 | continue; |
| | 5225 | } |
| | 5226 | |
| | 5227 | /* |
| | 5228 | * Check to see if we need to quote this character. Quote any |
| | 5229 | * quote mark matching the enclosing quotes; also quote any |
| | 5230 | * backslash that occurs within nested quotes within the source |
| | 5231 | * material, but not backslashes that occur originally outside |
| | 5232 | * quotes. |
| | 5233 | */ |
| | 5234 | if (cur == quote_char |
| | 5235 | || (cur == '\\' && in_inner_quote)) |
| | 5236 | { |
| | 5237 | /* append the segment up to (but not including) this character */ |
| | 5238 | if (src.getptr() > start) |
| | 5239 | expbuf->append(start, src.getptr() - start); |
| | 5240 | |
| | 5241 | /* add an extra backslash */ |
| | 5242 | expbuf->append("\\", 1); |
| | 5243 | |
| | 5244 | /* remember the start of the next segment */ |
| | 5245 | start = src.getptr(); |
| | 5246 | } |
| | 5247 | |
| | 5248 | /* |
| | 5249 | * if this is a quote character, and it's not itself escaped, |
| | 5250 | * reverse our in-quote flag |
| | 5251 | */ |
| | 5252 | if (prvch != '\\') |
| | 5253 | { |
| | 5254 | /* |
| | 5255 | * If we're in an inner quote, and it's a match for the open |
| | 5256 | * inner quote, we're no longer in a quote. Otherwise, if |
| | 5257 | * we're not in quotes and this is some kind of quote, enter |
| | 5258 | * the new quotes. |
| | 5259 | */ |
| | 5260 | if (in_inner_quote && cur == inner_quote_char) |
| | 5261 | { |
| | 5262 | /* we're leaving the inner quoted string */ |
| | 5263 | in_inner_quote = FALSE; |
| | 5264 | } |
| | 5265 | else if (!in_inner_quote && (cur == '"' || cur == '\'')) |
| | 5266 | { |
| | 5267 | /* we're entering a new inner quoted string */ |
| | 5268 | in_inner_quote = TRUE; |
| | 5269 | inner_quote_char = cur; |
| | 5270 | } |
| | 5271 | } |
| | 5272 | |
| | 5273 | /* remember this as the previous character */ |
| | 5274 | prvch = cur; |
| | 5275 | |
| | 5276 | /* move on to the next character */ |
| | 5277 | src.inc(); |
| | 5278 | } |
| | 5279 | |
| | 5280 | /* if there's anything in the final segment, append it */ |
| | 5281 | if (src.getptr() > start) |
| | 5282 | expbuf->append(start, src.getptr() - start); |
| | 5283 | |
| | 5284 | /* add the close quote if desired */ |
| | 5285 | if (add_close_quote) |
| | 5286 | expbuf->append("e_char, 1); |
| | 5287 | } |
| | 5288 | |
| | 5289 | /* |
| | 5290 | * Expand a "defined" preprocessor operator |
| | 5291 | */ |
| | 5292 | int CTcTokenizer::expand_defined(CTcTokString *subexp, |
| | 5293 | const CTcTokString *srcbuf, utf8_ptr *src) |
| | 5294 | { |
| | 5295 | CTcToken tok; |
| | 5296 | tc_toktyp_t typ; |
| | 5297 | int paren; |
| | 5298 | int found; |
| | 5299 | |
| | 5300 | /* get the next token */ |
| | 5301 | typ = next_on_line(srcbuf, src, &tok, ¯o_in_embedding_, FALSE); |
| | 5302 | |
| | 5303 | /* note whether we have an open paren; if we do, skip it */ |
| | 5304 | paren = (typ == TOKT_LPAR); |
| | 5305 | if (paren) |
| | 5306 | typ = next_on_line(srcbuf, src, &tok, ¯o_in_embedding_, FALSE); |
| | 5307 | |
| | 5308 | /* get the symbol */ |
| | 5309 | if (typ != TOKT_SYM) |
| | 5310 | { |
| | 5311 | log_error(TCERR_PP_DEFINED_NO_SYM, |
| | 5312 | (int)tok.get_text_len(), tok.get_text()); |
| | 5313 | return 1; |
| | 5314 | } |
| | 5315 | |
| | 5316 | /* look to see if the symbol is defined */ |
| | 5317 | found = (find_define(tok.get_text(), tok.get_text_len()) != 0); |
| | 5318 | |
| | 5319 | /* expand the macro to "1" if found, "0" if not */ |
| | 5320 | subexp->copy(found ? "1" : "0", 1); |
| | 5321 | |
| | 5322 | /* check for and skip the matching close paren */ |
| | 5323 | if (paren) |
| | 5324 | { |
| | 5325 | /* require the closing paren */ |
| | 5326 | if (next_on_line(srcbuf, src, &tok, ¯o_in_embedding_, FALSE) |
| | 5327 | != TOKT_RPAR) |
| | 5328 | { |
| | 5329 | /* generate an error if we don't find it */ |
| | 5330 | log_error(TCERR_PP_DEFINED_RPAR); |
| | 5331 | return 1; |
| | 5332 | } |
| | 5333 | } |
| | 5334 | |
| | 5335 | /* success */ |
| | 5336 | return 0; |
| | 5337 | } |
| | 5338 | |
| | 5339 | |
| | 5340 | /* ------------------------------------------------------------------------ */ |
| | 5341 | /* |
| | 5342 | * Process comments. Replaces each character of a comment with a space. |
| | 5343 | */ |
| | 5344 | void CTcTokenizer::process_comments(size_t start_ofs) |
| | 5345 | { |
| | 5346 | utf8_ptr src; |
| | 5347 | utf8_ptr dst; |
| | 5348 | int trailing_sp_after_bs; |
| | 5349 | |
| | 5350 | /* we haven't found a backslash followed by trailing space yet */ |
| | 5351 | trailing_sp_after_bs = FALSE; |
| | 5352 | |
| | 5353 | /* |
| | 5354 | * Scan the line. When inside a comment, replace each character of |
| | 5355 | * the comment with a space. When outside comments, simply copy |
| | 5356 | * characters intact. |
| | 5357 | * |
| | 5358 | * Note that we need a separate src and dst pointer, because the |
| | 5359 | * character length of the original and replaced characters may |
| | 5360 | * change. Fortunately, the length will never do anything but |
| | 5361 | * shrink or stay the same, since the only change we make is to |
| | 5362 | * insert spaces, which are always one byte apiece in UTF-8; we can |
| | 5363 | * therefore update the buffer in place. |
| | 5364 | */ |
| | 5365 | for (src.set(linebuf_.get_buf() + start_ofs), |
| | 5366 | dst.set(linebuf_.get_buf() + start_ofs) ; |
| | 5367 | src.getch() != '\0' ; src.inc()) |
| | 5368 | { |
| | 5369 | wchar_t cur; |
| | 5370 | |
| | 5371 | /* get the current character */ |
| | 5372 | cur = src.getch(); |
| | 5373 | |
| | 5374 | /* check to see if we're in a comment */ |
| | 5375 | if (str_->is_in_comment()) |
| | 5376 | { |
| | 5377 | /* |
| | 5378 | * check to see if the comment is ending, or if we have an |
| | 5379 | * apparent nested comment (which isn't allowed) |
| | 5380 | */ |
| | 5381 | if (cur == '*' && src.getch_at(1) == '/') |
| | 5382 | { |
| | 5383 | /* |
| | 5384 | * skip an extra character of the source - we'll skip |
| | 5385 | * one in the main loop, so we only need to skip one |
| | 5386 | * more now |
| | 5387 | */ |
| | 5388 | src.inc(); |
| | 5389 | |
| | 5390 | /* we're no longer in a comment */ |
| | 5391 | str_->set_in_comment(FALSE); |
| | 5392 | } |
| | 5393 | else if (cur == '/' && src.getch_at(1) == '*') |
| | 5394 | { |
| | 5395 | /* looks like a nested comment - warn about it */ |
| | 5396 | if (!G_prs->get_syntax_only()) |
| | 5397 | log_warning(TCERR_NESTED_COMMENT); |
| | 5398 | } |
| | 5399 | |
| | 5400 | /* continue without copying anything from inside the comment */ |
| | 5401 | continue; |
| | 5402 | } |
| | 5403 | else if (in_quote_ != '\0') |
| | 5404 | { |
| | 5405 | /* see what we have */ |
| | 5406 | if (cur == '\\') |
| | 5407 | { |
| | 5408 | /* |
| | 5409 | * It's a backslash sequence -- copy the backslash to |
| | 5410 | * the output, and skip it. Note that we don't have to |
| | 5411 | * worry about the line ending with a backslash, since |
| | 5412 | * the line reader will already have considered that to |
| | 5413 | * be a line splice. |
| | 5414 | */ |
| | 5415 | src.inc(); |
| | 5416 | dst.setch(cur); |
| | 5417 | |
| | 5418 | /* get the next character, so we copy it directly */ |
| | 5419 | cur = src.getch(); |
| | 5420 | } |
| | 5421 | else if (cur == in_quote_) |
| | 5422 | { |
| | 5423 | /* |
| | 5424 | * this is the closing quote character - simply note |
| | 5425 | * that we're no longer in a quoted string |
| | 5426 | */ |
| | 5427 | in_quote_ = '\0'; |
| | 5428 | } |
| | 5429 | else if (in_quote_ == '"' && !comment_in_embedding_ |
| | 5430 | && cur == '<' && src.getch_at(1) == '<') |
| | 5431 | { |
| | 5432 | /* |
| | 5433 | * it's an embedded expression starting point - skip the |
| | 5434 | * first of the '<' characters (the enclosing loop will |
| | 5435 | * skip the second one) |
| | 5436 | */ |
| | 5437 | src.inc(); |
| | 5438 | |
| | 5439 | /* the string is done */ |
| | 5440 | in_quote_ = '\0'; |
| | 5441 | |
| | 5442 | /* we're in an embedding now */ |
| | 5443 | comment_in_embedding_ = TRUE; |
| | 5444 | |
| | 5445 | /* copy the extra '<' to the output */ |
| | 5446 | dst.setch('<'); |
| | 5447 | } |
| | 5448 | } |
| | 5449 | else |
| | 5450 | { |
| | 5451 | /* |
| | 5452 | * Monitor the stream for a backslash followed by trailing |
| | 5453 | * spaces. If this is a backslash, note that we might have a |
| | 5454 | * backslash with trailing spaces; if it's a space, we might |
| | 5455 | * still have this, so leave the flag alone; if it's anything |
| | 5456 | * else, clear the flag, since we've found something other |
| | 5457 | * than backslashes and spaces. |
| | 5458 | */ |
| | 5459 | if (cur == '\\') |
| | 5460 | trailing_sp_after_bs = TRUE; |
| | 5461 | else if (!is_space(cur)) |
| | 5462 | trailing_sp_after_bs = FALSE; |
| | 5463 | |
| | 5464 | /* check to see if we're starting a comment */ |
| | 5465 | if (cur == '/') |
| | 5466 | { |
| | 5467 | switch(src.getch_at(1)) |
| | 5468 | { |
| | 5469 | case '*': |
| | 5470 | /* note that we're starting a comment */ |
| | 5471 | str_->set_in_comment(TRUE); |
| | 5472 | |
| | 5473 | /* |
| | 5474 | * replace the starting slash with a space - this |
| | 5475 | * will effectively replace the entire comment with |
| | 5476 | * a single space, since we won't copy anything else |
| | 5477 | * from inside the comment |
| | 5478 | */ |
| | 5479 | cur = ' '; |
| | 5480 | break; |
| | 5481 | |
| | 5482 | case '/': |
| | 5483 | /* |
| | 5484 | * comment to end of line - we can terminate the |
| | 5485 | * line at the opening slash and return immediately, |
| | 5486 | * because the entire rest of the line is to be |
| | 5487 | * ignored |
| | 5488 | */ |
| | 5489 | dst.setch('\0'); |
| | 5490 | return; |
| | 5491 | |
| | 5492 | default: |
| | 5493 | /* not a comment - copy it as-is */ |
| | 5494 | break; |
| | 5495 | } |
| | 5496 | } |
| | 5497 | else if (cur == '"' || cur == '\'') |
| | 5498 | { |
| | 5499 | /* it's the start of a new string */ |
| | 5500 | in_quote_ = cur; |
| | 5501 | } |
| | 5502 | else if (cur < 0x09) |
| | 5503 | { |
| | 5504 | /* |
| | 5505 | * it's a special flag character - we need to guarantee |
| | 5506 | * that this character never occurs in input (it |
| | 5507 | * shouldn't anyway, since it's a control character), so |
| | 5508 | * translate it to a space |
| | 5509 | */ |
| | 5510 | cur = ' '; |
| | 5511 | } |
| | 5512 | else if (comment_in_embedding_ |
| | 5513 | && cur == '>' && src.getch_at(1) == '>') |
| | 5514 | { |
| | 5515 | /* |
| | 5516 | * it's the end of an embedded expression - we're back |
| | 5517 | * in a double-quoted string (only double-quoted strings |
| | 5518 | * can have embedded expressions) |
| | 5519 | */ |
| | 5520 | in_quote_ = '"'; |
| | 5521 | comment_in_embedding_ = FALSE; |
| | 5522 | |
| | 5523 | /* skip the extra '>' and copy it to the output */ |
| | 5524 | src.inc(); |
| | 5525 | dst.setch('>'); |
| | 5526 | } |
| | 5527 | } |
| | 5528 | |
| | 5529 | /* set the current character in the output */ |
| | 5530 | dst.setch(cur); |
| | 5531 | } |
| | 5532 | |
| | 5533 | /* set the updated line buffer length */ |
| | 5534 | linebuf_.set_text_len(dst.getptr() - linebuf_.get_buf()); |
| | 5535 | |
| | 5536 | /* |
| | 5537 | * if we found a backslash with nothing following but whitespace, flag |
| | 5538 | * a warning, since they might have meant the backslash as a line |
| | 5539 | * continuation signal, but we're not interpreting it that way because |
| | 5540 | * of the trailing whitespace |
| | 5541 | */ |
| | 5542 | if (trailing_sp_after_bs) |
| | 5543 | log_warning(TCERR_TRAILING_SP_AFTER_BS); |
| | 5544 | } |
| | 5545 | |
| | 5546 | /* |
| | 5547 | * Splice strings. Splice additional lines onto the current line until |
| | 5548 | * we find the end of the string. |
| | 5549 | */ |
| | 5550 | void CTcTokenizer::splice_string() |
| | 5551 | { |
| | 5552 | utf8_ptr p; |
| | 5553 | int in_quote; |
| | 5554 | int in_embedding; |
| | 5555 | char unterm; |
| | 5556 | |
| | 5557 | /* presume we'll find proper termination */ |
| | 5558 | unterm = '\0'; |
| | 5559 | |
| | 5560 | /* |
| | 5561 | * remember the current in-quote and in-embedding status, as of the |
| | 5562 | * end of the current line - when we splice, the line reader will |
| | 5563 | * update these to the status at the end of the newly-read material, |
| | 5564 | * but we want to scan from the beginning of the newly-read material |
| | 5565 | */ |
| | 5566 | in_quote = in_quote_; |
| | 5567 | in_embedding = comment_in_embedding_; |
| | 5568 | |
| | 5569 | /* keep going until we find the end of the string */ |
| | 5570 | for (;;) |
| | 5571 | { |
| | 5572 | int new_line_ofs; |
| | 5573 | char *new_line_p; |
| | 5574 | wchar_t cur; |
| | 5575 | |
| | 5576 | /* |
| | 5577 | * append a space at the end of the line, to replace the newline |
| | 5578 | * that we've eliminated |
| | 5579 | */ |
| | 5580 | if (string_newline_spacing_) |
| | 5581 | linebuf_.append(" ", 1); |
| | 5582 | |
| | 5583 | /* splice another line */ |
| | 5584 | new_line_ofs = read_line(TRUE); |
| | 5585 | |
| | 5586 | /* if we reached end of file, there's no more splicing we can do */ |
| | 5587 | if (new_line_ofs == -1) |
| | 5588 | break; |
| | 5589 | |
| | 5590 | /* get a pointer to the new text */ |
| | 5591 | new_line_p = (char *)linebuf_.get_text() + new_line_ofs; |
| | 5592 | |
| | 5593 | /* skip leading spaces in the new line */ |
| | 5594 | for (p.set(new_line_p) ; is_space(p.getch()) ; p.inc()) ; |
| | 5595 | |
| | 5596 | /* if we skipped any spaces, remove them from the text */ |
| | 5597 | if (p.getptr() > new_line_p) |
| | 5598 | { |
| | 5599 | size_t rem; |
| | 5600 | size_t new_len; |
| | 5601 | |
| | 5602 | /* calculate the length of the rest of the line */ |
| | 5603 | rem = linebuf_.get_text_len() - (p.getptr() - linebuf_.get_buf()); |
| | 5604 | |
| | 5605 | /* calculate the new length of the line */ |
| | 5606 | new_len = (new_line_p - linebuf_.get_buf()) + rem; |
| | 5607 | |
| | 5608 | /* move the rest of the line down over the spaces */ |
| | 5609 | memmove(new_line_p, p.getptr(), rem); |
| | 5610 | |
| | 5611 | /* set the new length */ |
| | 5612 | linebuf_.set_text_len(new_len); |
| | 5613 | } |
| | 5614 | |
| | 5615 | /* |
| | 5616 | * If the new line contains only "}" or ";", presume that the |
| | 5617 | * string is unterminated and terminate it here. (This |
| | 5618 | * heuristic could flag well-formed strings as erroneous, but |
| | 5619 | * users can always work around this by moving these characters |
| | 5620 | * onto lines that contain at least one other non-whitespace |
| | 5621 | * character.) |
| | 5622 | */ |
| | 5623 | p.set(new_line_p); |
| | 5624 | if (p.getch() == '}' || p.getch() == ';') |
| | 5625 | { |
| | 5626 | /* skip trailing whitespace */ |
| | 5627 | for (p.inc() ; is_space(p.getch()) ; p.inc()) ; |
| | 5628 | |
| | 5629 | /* |
| | 5630 | * if there's nothing else on the line, presume it's an |
| | 5631 | * unterminated string |
| | 5632 | */ |
| | 5633 | if (p.getch() == '\0') |
| | 5634 | { |
| | 5635 | /* log the error */ |
| | 5636 | log_error(TCERR_POSSIBLE_UNTERM_STR, |
| | 5637 | appended_linenum_); |
| | 5638 | |
| | 5639 | /* remember that it's unterminated */ |
| | 5640 | unterm = (char)in_quote; |
| | 5641 | |
| | 5642 | /* |
| | 5643 | * since we're adding a presumed close quote that never |
| | 5644 | * appears in the text, we need to figure the new |
| | 5645 | * in-string status for the line; clear the in-quote |
| | 5646 | * flag, and re-scan comments from the current point on |
| | 5647 | * the line |
| | 5648 | */ |
| | 5649 | in_quote_ = '\0'; |
| | 5650 | process_comments(new_line_p - linebuf_.get_buf()); |
| | 5651 | |
| | 5652 | /* we're done - unsplice from the start of the new line */ |
| | 5653 | p.set(new_line_p); |
| | 5654 | goto done; |
| | 5655 | } |
| | 5656 | } |
| | 5657 | |
| | 5658 | /* scan for the end of the string */ |
| | 5659 | for (p.set(new_line_p) ;; p.inc()) |
| | 5660 | { |
| | 5661 | /* get this character */ |
| | 5662 | cur = p.getch(); |
| | 5663 | |
| | 5664 | /* see what we have */ |
| | 5665 | if (cur == '\\') |
| | 5666 | { |
| | 5667 | /* it's a backslash sequence - skip the extra character */ |
| | 5668 | p.inc(); |
| | 5669 | } |
| | 5670 | else if (cur == in_quote) |
| | 5671 | { |
| | 5672 | /* it's our quote character - skip it, and we're done */ |
| | 5673 | p.inc(); |
| | 5674 | goto done; |
| | 5675 | } |
| | 5676 | else if (in_quote == '"' && !in_embedding |
| | 5677 | && cur == '<' && p.getch_at(1) == '<') |
| | 5678 | { |
| | 5679 | /* |
| | 5680 | * it's an embedded expression starter - skip the '<<' |
| | 5681 | * sequence and stop scanning |
| | 5682 | */ |
| | 5683 | p.inc(); |
| | 5684 | p.inc(); |
| | 5685 | goto done; |
| | 5686 | } |
| | 5687 | else if (cur == '\0') |
| | 5688 | { |
| | 5689 | /* end of line - go back and splice another line */ |
| | 5690 | break; |
| | 5691 | } |
| | 5692 | } |
| | 5693 | } |
| | 5694 | |
| | 5695 | done: |
| | 5696 | /* unsplice the line at the current point */ |
| | 5697 | unsplice_line(p.getptr()); |
| | 5698 | |
| | 5699 | /* if we found an unterminated string, supply implicit termination */ |
| | 5700 | if (unterm != '\0') |
| | 5701 | linebuf_.append(&unterm, 1); |
| | 5702 | } |
| | 5703 | |
| | 5704 | |
| | 5705 | /* ------------------------------------------------------------------------ */ |
| | 5706 | /* |
| | 5707 | * Process a #pragma directive |
| | 5708 | */ |
| | 5709 | void CTcTokenizer::pp_pragma() |
| | 5710 | { |
| | 5711 | struct pp_kw_def |
| | 5712 | { |
| | 5713 | const char *kw; |
| | 5714 | void (CTcTokenizer::*func)(); |
| | 5715 | }; |
| | 5716 | static pp_kw_def kwlist[] = |
| | 5717 | { |
| | 5718 | // { "c", &CTcTokenizer::pragma_c }, -- obsolete |
| | 5719 | { "once", &CTcTokenizer::pragma_once }, |
| | 5720 | { "all_once", &CTcTokenizer::pragma_all_once }, |
| | 5721 | { "message", &CTcTokenizer::pragma_message }, |
| | 5722 | { "newline_spacing", &CTcTokenizer::pragma_newline_spacing }, |
| | 5723 | { "sourceTextGroup", &CTcTokenizer::pragma_source_text_group }, |
| | 5724 | { 0, 0 } |
| | 5725 | }; |
| | 5726 | pp_kw_def *kwp; |
| | 5727 | size_t kwlen; |
| | 5728 | |
| | 5729 | /* get the pragma keyword */ |
| | 5730 | if (next_on_line() != TOKT_SYM) |
| | 5731 | { |
| | 5732 | log_warning(TCERR_UNKNOWN_PRAGMA, |
| | 5733 | (int)curtok_.get_text_len(), curtok_.get_text()); |
| | 5734 | return; |
| | 5735 | } |
| | 5736 | |
| | 5737 | /* get the keyword length */ |
| | 5738 | kwlen = curtok_.get_text_len(); |
| | 5739 | |
| | 5740 | /* scan the pragma list */ |
| | 5741 | for (kwp = kwlist ; kwp->kw != 0 ; ++kwp) |
| | 5742 | { |
| | 5743 | /* is this our keyword? */ |
| | 5744 | if (strlen(kwp->kw) == kwlen |
| | 5745 | && memicmp(curtok_.get_text(), kwp->kw, kwlen) == 0) |
| | 5746 | { |
| | 5747 | /* this is our keyword - invoke the handler */ |
| | 5748 | (this->*(kwp->func))(); |
| | 5749 | |
| | 5750 | /* we're done */ |
| | 5751 | return; |
| | 5752 | } |
| | 5753 | } |
| | 5754 | |
| | 5755 | /* we didn't find it - generate a warning */ |
| | 5756 | log_warning(TCERR_UNKNOWN_PRAGMA, kwlen, curtok_.get_text()); |
| | 5757 | } |
| | 5758 | |
| | 5759 | #if 0 // #pragma C is not currently used |
| | 5760 | /* |
| | 5761 | * Process a #pragma C directive |
| | 5762 | */ |
| | 5763 | void CTcTokenizer::pragma_c() |
| | 5764 | { |
| | 5765 | tc_toktyp_t tok; |
| | 5766 | int new_pragma_c; |
| | 5767 | |
| | 5768 | /* get the next token */ |
| | 5769 | tok = next_on_line(); |
| | 5770 | |
| | 5771 | /* |
| | 5772 | * "+" or empty (end of line or whitespace) indicates C mode; "-" |
| | 5773 | * indicates standard mode |
| | 5774 | */ |
| | 5775 | if (tok == TOKT_PLUS || tok == TOKT_EOF) |
| | 5776 | new_pragma_c = TRUE; |
| | 5777 | else if (tok == TOKT_MINUS) |
| | 5778 | new_pragma_c = FALSE; |
| | 5779 | else |
| | 5780 | { |
| | 5781 | log_warning(TCERR_BAD_PRAGMA_SYNTAX); |
| | 5782 | new_pragma_c = str_->is_pragma_c(); |
| | 5783 | } |
| | 5784 | |
| | 5785 | /* |
| | 5786 | * retain the pragma in the result if we're in preprocess-only mode, |
| | 5787 | * otherwise remove it |
| | 5788 | */ |
| | 5789 | if (!pp_only_mode_) |
| | 5790 | clear_linebuf(); |
| | 5791 | |
| | 5792 | /* set the mode in the stream */ |
| | 5793 | str_->set_pragma_c(new_pragma_c); |
| | 5794 | |
| | 5795 | /* if there's a parser, notify it of the change */ |
| | 5796 | if (G_prs != 0) |
| | 5797 | G_prs->set_pragma_c(new_pragma_c); |
| | 5798 | } |
| | 5799 | #endif |
| | 5800 | |
| | 5801 | /* |
| | 5802 | * Process a #pragma once directive |
| | 5803 | */ |
| | 5804 | void CTcTokenizer::pragma_once() |
| | 5805 | { |
| | 5806 | /* add this file to the ONCE list */ |
| | 5807 | add_include_once(str_->get_desc()->get_fname()); |
| | 5808 | |
| | 5809 | /* don't retain this pragma in the result */ |
| | 5810 | clear_linebuf(); |
| | 5811 | } |
| | 5812 | |
| | 5813 | /* |
| | 5814 | * Process a #pragma all_once directive |
| | 5815 | */ |
| | 5816 | void CTcTokenizer::pragma_all_once() |
| | 5817 | { |
| | 5818 | tc_toktyp_t tok; |
| | 5819 | |
| | 5820 | /* get the next token */ |
| | 5821 | tok = next_on_line(); |
| | 5822 | |
| | 5823 | /* |
| | 5824 | * "+" or empty (end of line or whitespace) indicates ALL_ONCE mode; |
| | 5825 | * '-' indicates standard mode |
| | 5826 | */ |
| | 5827 | if (tok == TOKT_PLUS || tok == TOKT_EOF) |
| | 5828 | all_once_ = TRUE; |
| | 5829 | else if (tok == TOKT_MINUS) |
| | 5830 | all_once_ = FALSE; |
| | 5831 | else |
| | 5832 | log_warning(TCERR_BAD_PRAGMA_SYNTAX); |
| | 5833 | |
| | 5834 | /* don't retain this pragma in the result */ |
| | 5835 | clear_linebuf(); |
| | 5836 | } |
| | 5837 | |
| | 5838 | /* |
| | 5839 | * Process a #pragma message directive |
| | 5840 | */ |
| | 5841 | void CTcTokenizer::pragma_message() |
| | 5842 | { |
| | 5843 | size_t startofs; |
| | 5844 | |
| | 5845 | /* |
| | 5846 | * copy the source line through the "message" token to the macro |
| | 5847 | * expansion buffer - we don't want to expand that part, but we want |
| | 5848 | * it to appear in the expansion, so just copy the original |
| | 5849 | */ |
| | 5850 | startofs = (curtok_.get_text() + curtok_.get_text_len() |
| | 5851 | - linebuf_.get_text()); |
| | 5852 | expbuf_.copy(linebuf_.get_text(), startofs); |
| | 5853 | |
| | 5854 | /* expand macros; don't allow reading additional lines */ |
| | 5855 | if (expand_macros_curline(FALSE, FALSE, TRUE)) |
| | 5856 | { |
| | 5857 | clear_linebuf(); |
| | 5858 | return; |
| | 5859 | } |
| | 5860 | |
| | 5861 | /* |
| | 5862 | * If we're in normal compilation mode, display the message. If we're |
| | 5863 | * in preprocess-only mode, simply retain the message in the |
| | 5864 | * preprocessed result, so that it shows up when the result is |
| | 5865 | * compiled. |
| | 5866 | * |
| | 5867 | * Ignore messages in list-includes mode. |
| | 5868 | */ |
| | 5869 | if (!pp_only_mode_ && !list_includes_mode_) |
| | 5870 | { |
| | 5871 | /* set up at the first post-processed token */ |
| | 5872 | start_new_line(&expbuf_, startofs); |
| | 5873 | |
| | 5874 | /* if there's an open paren, skip it */ |
| | 5875 | if (next_on_line_xlat(0) == TOKT_LPAR) |
| | 5876 | next_on_line_xlat(0); |
| | 5877 | else |
| | 5878 | log_warning(TCERR_BAD_PRAGMA_SYNTAX); |
| | 5879 | |
| | 5880 | /* keep going until we reach the closing paren */ |
| | 5881 | while (curtok_.gettyp() != TOKT_RPAR |
| | 5882 | && curtok_.gettyp() != TOKT_EOF) |
| | 5883 | { |
| | 5884 | /* display this token */ |
| | 5885 | switch(curtok_.gettyp()) |
| | 5886 | { |
| | 5887 | case TOKT_SSTR: |
| | 5888 | case TOKT_DSTR: |
| | 5889 | case TOKT_SYM: |
| | 5890 | /* display the text of the token */ |
| | 5891 | msg_str(curtok_.get_text(), curtok_.get_text_len()); |
| | 5892 | break; |
| | 5893 | |
| | 5894 | case TOKT_INT: |
| | 5895 | /* display the integer */ |
| | 5896 | msg_long(curtok_.get_int_val()); |
| | 5897 | break; |
| | 5898 | |
| | 5899 | default: |
| | 5900 | /* ignore anything else */ |
| | 5901 | break; |
| | 5902 | } |
| | 5903 | |
| | 5904 | /* get the next token */ |
| | 5905 | next_on_line_xlat(0); |
| | 5906 | } |
| | 5907 | |
| | 5908 | /* end the line */ |
| | 5909 | msg_str("\n", 1); |
| | 5910 | |
| | 5911 | /* remove the message from the result text */ |
| | 5912 | clear_linebuf(); |
| | 5913 | } |
| | 5914 | else |
| | 5915 | { |
| | 5916 | /* preprocessing - copy expanded text to line buffer */ |
| | 5917 | linebuf_.copy(expbuf_.get_text(), expbuf_.get_text_len()); |
| | 5918 | } |
| | 5919 | } |
| | 5920 | |
| | 5921 | /* |
| | 5922 | * Process a #pragma newline_spacing(on/off) directive |
| | 5923 | */ |
| | 5924 | void CTcTokenizer::pragma_newline_spacing() |
| | 5925 | { |
| | 5926 | int f; |
| | 5927 | |
| | 5928 | /* if we're in preprocess-only mode, just pass the pragma through */ |
| | 5929 | if (pp_only_mode_) |
| | 5930 | return; |
| | 5931 | |
| | 5932 | /* get the '(' token and the on/off token */ |
| | 5933 | if (next_on_line() != TOKT_LPAR || next_on_line() != TOKT_SYM) |
| | 5934 | { |
| | 5935 | log_warning(TCERR_BAD_PRAGMA_SYNTAX); |
| | 5936 | goto done; |
| | 5937 | } |
| | 5938 | |
| | 5939 | /* note the new mode flag */ |
| | 5940 | if (curtok_.get_text_len() == 2 |
| | 5941 | && memcmp(curtok_.get_text(), "on", 2) == 0) |
| | 5942 | { |
| | 5943 | /* it's 'on' */ |
| | 5944 | f = TRUE; |
| | 5945 | } |
| | 5946 | else if (curtok_.get_text_len() == 3 |
| | 5947 | && memcmp(curtok_.get_text(), "off", 3) == 0) |
| | 5948 | { |
| | 5949 | /* it's 'off' */ |
| | 5950 | f = FALSE; |
| | 5951 | } |
| | 5952 | else |
| | 5953 | { |
| | 5954 | log_warning(TCERR_BAD_PRAGMA_SYNTAX); |
| | 5955 | goto done; |
| | 5956 | } |
| | 5957 | |
| | 5958 | /* make sure we have the ')' token */ |
| | 5959 | if (next_on_line() != TOKT_RPAR) |
| | 5960 | { |
| | 5961 | log_warning(TCERR_BAD_PRAGMA_SYNTAX); |
| | 5962 | goto done; |
| | 5963 | } |
| | 5964 | |
| | 5965 | /* set the new mode */ |
| | 5966 | string_newline_spacing_ = f; |
| | 5967 | |
| | 5968 | done: |
| | 5969 | /* done - discard this line buffer */ |
| | 5970 | clear_linebuf(); |
| | 5971 | } |
| | 5972 | |
| | 5973 | |
| | 5974 | /* |
| | 5975 | * Process a #pragma sourceTextGroup(on/off) directive |
| | 5976 | */ |
| | 5977 | void CTcTokenizer::pragma_source_text_group() |
| | 5978 | { |
| | 5979 | tc_toktyp_t tok; |
| | 5980 | int f; |
| | 5981 | |
| | 5982 | /* if we're in preprocess-only mode, just pass the pragma through */ |
| | 5983 | if (pp_only_mode_) |
| | 5984 | return; |
| | 5985 | |
| | 5986 | /* get the '(' token and the on/off token, if present */ |
| | 5987 | if ((tok = next_on_line()) == TOKT_EOF) |
| | 5988 | { |
| | 5989 | /* no on/off - by default it's on */ |
| | 5990 | f = TRUE; |
| | 5991 | } |
| | 5992 | else if (tok == TOKT_LPAR && next_on_line() == TOKT_SYM) |
| | 5993 | { |
| | 5994 | /* get the on/off mode */ |
| | 5995 | if (curtok_.get_text_len() == 2 |
| | 5996 | && memcmp(curtok_.get_text(), "on", 2) == 0) |
| | 5997 | { |
| | 5998 | /* it's 'on' */ |
| | 5999 | f = TRUE; |
| | 6000 | } |
| | 6001 | else if (curtok_.get_text_len() == 3 |
| | 6002 | && memcmp(curtok_.get_text(), "off", 3) == 0) |
| | 6003 | { |
| | 6004 | /* it's 'off' */ |
| | 6005 | f = FALSE; |
| | 6006 | } |
| | 6007 | else |
| | 6008 | { |
| | 6009 | log_warning(TCERR_BAD_PRAGMA_SYNTAX); |
| | 6010 | goto done; |
| | 6011 | } |
| | 6012 | |
| | 6013 | /* make sure we have the ')' token */ |
| | 6014 | if (next_on_line() != TOKT_RPAR) |
| | 6015 | { |
| | 6016 | log_warning(TCERR_BAD_PRAGMA_SYNTAX); |
| | 6017 | goto done; |
| | 6018 | } |
| | 6019 | } |
| | 6020 | else |
| | 6021 | { |
| | 6022 | /* anything else is invalid syntax */ |
| | 6023 | log_warning(TCERR_BAD_PRAGMA_SYNTAX); |
| | 6024 | goto done; |
| | 6025 | } |
| | 6026 | |
| | 6027 | /* set the new mode in the parser */ |
| | 6028 | G_prs->set_source_text_group_mode(f); |
| | 6029 | |
| | 6030 | done: |
| | 6031 | /* done - discard this line buffer */ |
| | 6032 | clear_linebuf(); |
| | 6033 | } |
| | 6034 | |
| | 6035 | |
| | 6036 | /* ------------------------------------------------------------------------ */ |
| | 6037 | /* |
| | 6038 | * Process a #charset directive |
| | 6039 | */ |
| | 6040 | void CTcTokenizer::pp_charset() |
| | 6041 | { |
| | 6042 | /* |
| | 6043 | * Encountering a #charset directive within the tokenizer is always |
| | 6044 | * an error. If the file opener managed to use a #charset, we'll |
| | 6045 | * never see it, because the file opener will have skipped it before |
| | 6046 | * giving us the file. |
| | 6047 | * |
| | 6048 | * If we flagged a #charset error when opening the file, indicate |
| | 6049 | * that the problem is that the character set given was unloadable; |
| | 6050 | * otherwise, the problem is that #charset is in the wrong place. |
| | 6051 | */ |
| | 6052 | log_error(str_ != 0 && str_->get_charset_error() |
| | 6053 | ? TCERR_CANT_LOAD_CHARSET : TCERR_UNEXPECTED_CHARSET); |
| | 6054 | |
| | 6055 | /* don't retain this pragma in the result */ |
| | 6056 | clear_linebuf(); |
| | 6057 | } |
| | 6058 | |
| | 6059 | /* ------------------------------------------------------------------------ */ |
| | 6060 | /* |
| | 6061 | * Process a #include directive |
| | 6062 | */ |
| | 6063 | void CTcTokenizer::pp_include() |
| | 6064 | { |
| | 6065 | wchar_t match; |
| | 6066 | int is_local; |
| | 6067 | int is_absolute; |
| | 6068 | utf8_ptr fname; |
| | 6069 | CTcSrcFile *new_src; |
| | 6070 | int charset_error; |
| | 6071 | int default_charset_error; |
| | 6072 | char full_name[OSFNMAX]; |
| | 6073 | char lcl_name[OSFNMAX]; |
| | 6074 | int found; |
| | 6075 | CTcTokFileDesc *desc; |
| | 6076 | int expand; |
| | 6077 | utf8_ptr start; |
| | 6078 | |
| | 6079 | /* presume we'll expand macros */ |
| | 6080 | expand = TRUE; |
| | 6081 | |
| | 6082 | /* |
| | 6083 | * Check to see if expansion is needed. Macro expansion is needed |
| | 6084 | * only if the source line is not of one of the following forms: |
| | 6085 | * |
| | 6086 | *. #include "filename" |
| | 6087 | *. #include <filename> |
| | 6088 | */ |
| | 6089 | for (start = p_ ; is_space(p_.getch()) ; p_.inc()) ; |
| | 6090 | switch(p_.getch()) |
| | 6091 | { |
| | 6092 | case '<': |
| | 6093 | /* look for a matching '>' */ |
| | 6094 | match = '>'; |
| | 6095 | goto find_match; |
| | 6096 | |
| | 6097 | case '"': |
| | 6098 | /* look for a matching '"' */ |
| | 6099 | match = '"'; |
| | 6100 | goto find_match; |
| | 6101 | |
| | 6102 | find_match: |
| | 6103 | /* find the matching character */ |
| | 6104 | for (p_.inc() ; p_.getch() != '\0' && p_.getch() != match ; |
| | 6105 | p_.inc()) ; |
| | 6106 | |
| | 6107 | /* if we found it, check for other characters on the line */ |
| | 6108 | if (p_.getch() == match) |
| | 6109 | { |
| | 6110 | /* skip the matching character */ |
| | 6111 | p_.inc(); |
| | 6112 | |
| | 6113 | /* skip whitespace */ |
| | 6114 | while (is_space(p_.getch())) |
| | 6115 | p_.inc(); |
| | 6116 | |
| | 6117 | /* |
| | 6118 | * make sure there's nothing else on the line - if not, it's |
| | 6119 | * one of the approved formats, so there's no need to do |
| | 6120 | * macro expansion |
| | 6121 | */ |
| | 6122 | if (p_.getch() == 0) |
| | 6123 | expand = FALSE; |
| | 6124 | } |
| | 6125 | break; |
| | 6126 | } |
| | 6127 | |
| | 6128 | /* go back to read from the original starting point */ |
| | 6129 | p_ = start; |
| | 6130 | |
| | 6131 | /* expand macros if necessary */ |
| | 6132 | if (expand) |
| | 6133 | { |
| | 6134 | /* do the expansion */ |
| | 6135 | if (expand_macros_curline(FALSE, FALSE, FALSE)) |
| | 6136 | { |
| | 6137 | /* clear the buffer and abort */ |
| | 6138 | clear_linebuf(); |
| | 6139 | return; |
| | 6140 | } |
| | 6141 | |
| | 6142 | /* |
| | 6143 | * remove any expansion flags, so that we don't have to worry about |
| | 6144 | * parsing or skipping them |
| | 6145 | */ |
| | 6146 | remove_expansion_flags(&expbuf_); |
| | 6147 | |
| | 6148 | /* read from the expansion buffer */ |
| | 6149 | start_new_line(&expbuf_, 0); |
| | 6150 | } |
| | 6151 | |
| | 6152 | /* skip leading whitespace */ |
| | 6153 | for ( ; is_space(p_.getch()) ; p_.inc()) ; |
| | 6154 | |
| | 6155 | /* we have to be looking at at '"' or '<' character */ |
| | 6156 | if (p_.getch() == '"') |
| | 6157 | { |
| | 6158 | /* look for a matching quote, and look for a local file */ |
| | 6159 | match = '"'; |
| | 6160 | is_local = TRUE; |
| | 6161 | } |
| | 6162 | else if (p_.getch() == '<') |
| | 6163 | { |
| | 6164 | /* look for a matching angle bracket, and look for a system file */ |
| | 6165 | match = '>'; |
| | 6166 | is_local = FALSE; |
| | 6167 | } |
| | 6168 | else |
| | 6169 | { |
| | 6170 | /* invalid syntax - log an error and ignore the line */ |
| | 6171 | log_error(TCERR_BAD_INC_SYNTAX); |
| | 6172 | clear_linebuf(); |
| | 6173 | return; |
| | 6174 | } |
| | 6175 | |
| | 6176 | /* skip the open quote, and remember where the filename starts */ |
| | 6177 | p_.inc(); |
| | 6178 | fname = p_; |
| | 6179 | |
| | 6180 | /* find the matching quote */ |
| | 6181 | for ( ; p_.getch() != '\0' && p_.getch() != match ; p_.inc()) ; |
| | 6182 | |
| | 6183 | /* if we didn't find the match, log an error and ignore the line */ |
| | 6184 | if (p_.getch() == '\0') |
| | 6185 | { |
| | 6186 | log_error(TCERR_BAD_INC_SYNTAX); |
| | 6187 | clear_linebuf(); |
| | 6188 | return; |
| | 6189 | } |
| | 6190 | else |
| | 6191 | { |
| | 6192 | /* |
| | 6193 | * We found the close quote. Before we parse the filename, make |
| | 6194 | * one last check: if there's anything further on the line apart |
| | 6195 | * from whitespace, it's extraneous, so issue a warning. |
| | 6196 | */ |
| | 6197 | |
| | 6198 | /* remember where the close quote is */ |
| | 6199 | utf8_ptr closep = p_; |
| | 6200 | |
| | 6201 | /* skip it, and then skip any trailing whitespace */ |
| | 6202 | for (p_.inc() ; is_space(p_.getch()) ; p_.inc()) ; |
| | 6203 | |
| | 6204 | /* if we're not at the end of the line, issue a warning */ |
| | 6205 | if (p_.getch() != '\0') |
| | 6206 | log_warning(TCERR_EXTRA_INC_SYNTAX); |
| | 6207 | |
| | 6208 | /* |
| | 6209 | * Null-terminate the filename. (We know there's nothing else |
| | 6210 | * interesting in the buffer after the filename at this point, so |
| | 6211 | * we don't care about overwriting the quote or anything that might |
| | 6212 | * come after it.) |
| | 6213 | */ |
| | 6214 | closep.setch('\0'); |
| | 6215 | } |
| | 6216 | |
| | 6217 | /* check to see if the filename is absolute */ |
| | 6218 | is_absolute = os_is_file_absolute(fname.getptr()); |
| | 6219 | |
| | 6220 | /* we have yet to find the file */ |
| | 6221 | found = FALSE; |
| | 6222 | |
| | 6223 | /* |
| | 6224 | * in case the name is in portable URL notation, convert from URL |
| | 6225 | * notation to local notation; we'll consider this form of the name |
| | 6226 | * first, and only if we can't find it in this form will we try |
| | 6227 | * treating the name as using local filename conventions |
| | 6228 | */ |
| | 6229 | os_cvt_url_dir(lcl_name, sizeof(lcl_name), fname.getptr(), FALSE); |
| | 6230 | |
| | 6231 | /* |
| | 6232 | * Search for the included file. |
| | 6233 | * |
| | 6234 | * First, if it's a local file (in quotes rather than angle |
| | 6235 | * brackets), start the search in the directory containing the |
| | 6236 | * current file, then look in the directory containing the parent |
| | 6237 | * file, and so on. If we fail to find it, proceed as for a |
| | 6238 | * non-local file. |
| | 6239 | */ |
| | 6240 | if (is_local && last_desc_ != 0) |
| | 6241 | { |
| | 6242 | CTcTokStream *cur_str; |
| | 6243 | char pathbuf[OSFNMAX]; |
| | 6244 | |
| | 6245 | /* start with the current file, and search parents */ |
| | 6246 | for (cur_str = str_ ; cur_str != 0 ; cur_str = cur_str->get_parent()) |
| | 6247 | { |
| | 6248 | /* get the path to the current file */ |
| | 6249 | os_get_path_name(pathbuf, sizeof(pathbuf), |
| | 6250 | last_desc_->get_fname()); |
| | 6251 | |
| | 6252 | /* |
| | 6253 | * try the URL-converted name first - this takes precedence |
| | 6254 | * over a local interpretation of the name |
| | 6255 | */ |
| | 6256 | os_build_full_path(full_name, sizeof(full_name), |
| | 6257 | pathbuf, lcl_name); |
| | 6258 | if (!osfacc(full_name)) |
| | 6259 | { |
| | 6260 | found = TRUE; |
| | 6261 | break; |
| | 6262 | } |
| | 6263 | |
| | 6264 | /* if it's a relative local name, try again with local naming */ |
| | 6265 | if (!is_absolute) |
| | 6266 | { |
| | 6267 | /* |
| | 6268 | * build the full filename, treating the name as using |
| | 6269 | * local system conventions |
| | 6270 | */ |
| | 6271 | os_build_full_path(full_name, sizeof(full_name), |
| | 6272 | pathbuf, fname.getptr()); |
| | 6273 | |
| | 6274 | /* if we found it, so note and stop searching */ |
| | 6275 | if (!osfacc(full_name)) |
| | 6276 | { |
| | 6277 | found = TRUE; |
| | 6278 | break; |
| | 6279 | } |
| | 6280 | } |
| | 6281 | } |
| | 6282 | } |
| | 6283 | |
| | 6284 | /* |
| | 6285 | * If we still haven't found the file (or if it's a non-local file, |
| | 6286 | * in angle brackets), search the include path. |
| | 6287 | */ |
| | 6288 | if (!found) |
| | 6289 | { |
| | 6290 | tctok_incpath_t *inc_path; |
| | 6291 | |
| | 6292 | /* scan the include path */ |
| | 6293 | for (inc_path = incpath_head_ ; inc_path != 0 ; |
| | 6294 | inc_path = inc_path->nxt) |
| | 6295 | { |
| | 6296 | /* try the URL-converted local name first */ |
| | 6297 | os_build_full_path(full_name, sizeof(full_name), |
| | 6298 | inc_path->path, lcl_name); |
| | 6299 | if (!osfacc(full_name)) |
| | 6300 | { |
| | 6301 | found = TRUE; |
| | 6302 | break; |
| | 6303 | } |
| | 6304 | |
| | 6305 | /* try with the local name, if it's a relative local name */ |
| | 6306 | if (!is_absolute) |
| | 6307 | { |
| | 6308 | /* build the full name for the file in this directory */ |
| | 6309 | os_build_full_path(full_name, sizeof(full_name), |
| | 6310 | inc_path->path, fname.getptr()); |
| | 6311 | |
| | 6312 | /* if we found it, stop searching */ |
| | 6313 | if (!osfacc(full_name)) |
| | 6314 | { |
| | 6315 | found = TRUE; |
| | 6316 | break; |
| | 6317 | } |
| | 6318 | } |
| | 6319 | } |
| | 6320 | } |
| | 6321 | |
| | 6322 | /* |
| | 6323 | * If the filename specified an absolute path, and we didn't find a |
| | 6324 | * file with any of the local interpretations, look at the absolute |
| | 6325 | * path. Note that our portable URL-style notation doesn't allow |
| | 6326 | * absolute notation, so we use only the exact name as specified in |
| | 6327 | * the #include directive as the absolute form. |
| | 6328 | */ |
| | 6329 | if (is_absolute && !found) |
| | 6330 | { |
| | 6331 | /* use the original filename as the full name */ |
| | 6332 | strcpy(full_name, fname.getptr()); |
| | 6333 | |
| | 6334 | /* try finding the file */ |
| | 6335 | found = !osfacc(full_name); |
| | 6336 | } |
| | 6337 | |
| | 6338 | /* |
| | 6339 | * we have our copy of the filename now; we don't want to retain |
| | 6340 | * this directive in the preprocessed source, so clear out the line |
| | 6341 | * buffer now |
| | 6342 | */ |
| | 6343 | clear_linebuf(); |
| | 6344 | |
| | 6345 | /* |
| | 6346 | * if we didn't find the file anywhere, show an error and ignore the |
| | 6347 | * #include directive |
| | 6348 | */ |
| | 6349 | if (!found) |
| | 6350 | { |
| | 6351 | log_error(TCERR_INC_NOT_FOUND, |
| | 6352 | (int)strlen(fname.getptr()), fname.getptr()); |
| | 6353 | return; |
| | 6354 | } |
| | 6355 | |
| | 6356 | /* |
| | 6357 | * Check the list of included files that are marked for inclusion |
| | 6358 | * only once. If we've already included this file, ignore this |
| | 6359 | * redundant inclusion. Check based on the full filename that we |
| | 6360 | * resolved from the search path. |
| | 6361 | */ |
| | 6362 | if (find_include_once(full_name)) |
| | 6363 | { |
| | 6364 | /* log an error if appropriate */ |
| | 6365 | if (warn_on_ignore_incl_) |
| | 6366 | log_warning(TCERR_REDUNDANT_INCLUDE, |
| | 6367 | (int)strlen(full_name), full_name); |
| | 6368 | |
| | 6369 | /* ignore this #include directive */ |
| | 6370 | return; |
| | 6371 | } |
| | 6372 | |
| | 6373 | /* open a file source to read the file */ |
| | 6374 | new_src = CTcSrcFile::open_source(full_name, res_loader_, |
| | 6375 | default_charset_, &charset_error, |
| | 6376 | &default_charset_error); |
| | 6377 | |
| | 6378 | /* if we couldn't open the file, log an error and ignore the line */ |
| | 6379 | if (new_src == 0) |
| | 6380 | { |
| | 6381 | /* |
| | 6382 | * if the error was due to the default character set, log that |
| | 6383 | * problem; otherwise, log the general file-open problem |
| | 6384 | */ |
| | 6385 | if (default_charset_error) |
| | 6386 | log_error(TCERR_CANT_LOAD_DEFAULT_CHARSET, default_charset_); |
| | 6387 | else |
| | 6388 | log_error(TCERR_INC_NOT_FOUND, |
| | 6389 | (int)strlen(full_name), full_name); |
| | 6390 | |
| | 6391 | /* we can go no further */ |
| | 6392 | return; |
| | 6393 | } |
| | 6394 | |
| | 6395 | /* get the descriptor for the source file */ |
| | 6396 | desc = get_file_desc(full_name, strlen(full_name), FALSE, |
| | 6397 | fname.getptr(), |
| | 6398 | fname.getptr() != 0 ? strlen(fname.getptr()) : 0); |
| | 6399 | |
| | 6400 | /* |
| | 6401 | * remember the current #pragma newline_spacing mode, so we can restore |
| | 6402 | * it when we reinstate the current stream |
| | 6403 | */ |
| | 6404 | str_->set_newline_spacing(string_newline_spacing_); |
| | 6405 | |
| | 6406 | /* |
| | 6407 | * Create and install the new file reader stream object. By |
| | 6408 | * installing it as the current reader, we'll activate it so that |
| | 6409 | * the next line read will come from the new stream. Note that the |
| | 6410 | * current stream becomes the parent of the new stream, so that we |
| | 6411 | * revert to the current stream when the new stream is exhausted; |
| | 6412 | * this will allow us to pick up reading from the current stream at |
| | 6413 | * the next line after the #include directive when we've finished |
| | 6414 | * including the new file. |
| | 6415 | */ |
| | 6416 | str_ = new CTcTokStream(desc, new_src, str_, charset_error, if_sp_); |
| | 6417 | |
| | 6418 | /* |
| | 6419 | * If we're in ALL_ONCE mode, it means that every single file we |
| | 6420 | * include should be included only once. |
| | 6421 | */ |
| | 6422 | if (all_once_) |
| | 6423 | add_include_once(full_name); |
| | 6424 | |
| | 6425 | /* |
| | 6426 | * if we're in list-includes mode, write the name of the include file |
| | 6427 | * to the standard output |
| | 6428 | */ |
| | 6429 | if (list_includes_mode_) |
| | 6430 | G_hostifc->print_msg("#include %s\n", full_name); |
| | 6431 | } |
| | 6432 | |
| | 6433 | /* ------------------------------------------------------------------------ */ |
| | 6434 | /* |
| | 6435 | * Add a file to the include-once list. Once a file is in this list, we |
| | 6436 | * won't include it again. |
| | 6437 | */ |
| | 6438 | void CTcTokenizer::add_include_once(const char *fname) |
| | 6439 | { |
| | 6440 | tctok_incfile_t *prvinc; |
| | 6441 | |
| | 6442 | /* if the file is already in the list, don't add it again */ |
| | 6443 | if (find_include_once(fname)) |
| | 6444 | return; |
| | 6445 | |
| | 6446 | /* create a new entry for the filename */ |
| | 6447 | prvinc = (tctok_incfile_t *)t3malloc(sizeof(tctok_incfile_t) |
| | 6448 | + strlen(fname)); |
| | 6449 | |
| | 6450 | /* save the filename */ |
| | 6451 | strcpy(prvinc->fname, fname); |
| | 6452 | |
| | 6453 | /* link the new entry into our list */ |
| | 6454 | prvinc->nxt = prev_includes_; |
| | 6455 | prev_includes_ = prvinc; |
| | 6456 | } |
| | 6457 | |
| | 6458 | /* |
| | 6459 | * Find a file in the list of files to be included only once. Returns |
| | 6460 | * true if the file is in the list, false if not. |
| | 6461 | */ |
| | 6462 | int CTcTokenizer::find_include_once(const char *fname) |
| | 6463 | { |
| | 6464 | tctok_incfile_t *prvinc; |
| | 6465 | |
| | 6466 | /* search the list */ |
| | 6467 | for (prvinc = prev_includes_ ; prvinc != 0 ; prvinc = prvinc->nxt) |
| | 6468 | { |
| | 6469 | /* if this one matches, we found it, so return true */ |
| | 6470 | if (strcmp(fname, prvinc->fname) == 0) |
| | 6471 | return TRUE; |
| | 6472 | } |
| | 6473 | |
| | 6474 | /* we didn't find the file */ |
| | 6475 | return FALSE; |
| | 6476 | } |
| | 6477 | |
| | 6478 | /* ------------------------------------------------------------------------ */ |
| | 6479 | /* |
| | 6480 | * Process a #define directive |
| | 6481 | */ |
| | 6482 | void CTcTokenizer::pp_define() |
| | 6483 | { |
| | 6484 | const char *macro_name; |
| | 6485 | size_t macro_len; |
| | 6486 | const char *argv[TOK_MAX_MACRO_ARGS]; |
| | 6487 | size_t argvlen[TOK_MAX_MACRO_ARGS]; |
| | 6488 | int argc; |
| | 6489 | int has_args; |
| | 6490 | const char *expan; |
| | 6491 | size_t expan_len; |
| | 6492 | CTcHashEntryPp *entry; |
| | 6493 | int has_varargs; |
| | 6494 | |
| | 6495 | /* get the macro name */ |
| | 6496 | if (next_on_line() != TOKT_SYM) |
| | 6497 | { |
| | 6498 | log_error(TCERR_BAD_DEFINE_SYM, |
| | 6499 | (int)curtok_.get_text_len(), curtok_.get_text()); |
| | 6500 | clear_linebuf(); |
| | 6501 | return; |
| | 6502 | } |
| | 6503 | |
| | 6504 | /* make a copy of the macro name */ |
| | 6505 | macro_name = curtok_.get_text(); |
| | 6506 | macro_len = curtok_.get_text_len(); |
| | 6507 | |
| | 6508 | /* no arguments yet */ |
| | 6509 | argc = 0; |
| | 6510 | |
| | 6511 | /* presume we won't find a varargs marker */ |
| | 6512 | has_varargs = FALSE; |
| | 6513 | |
| | 6514 | /* |
| | 6515 | * If there's a '(' immediately after the macro name, without any |
| | 6516 | * intervening whitespace, it has arguments; otherwise, it has no |
| | 6517 | * arguments. Note which case we have. |
| | 6518 | */ |
| | 6519 | if (p_.getch() == '(') |
| | 6520 | { |
| | 6521 | int done; |
| | 6522 | tc_toktyp_t tok; |
| | 6523 | |
| | 6524 | /* note that we have an argument list */ |
| | 6525 | has_args = TRUE; |
| | 6526 | |
| | 6527 | /* assume we're not done yet */ |
| | 6528 | done = FALSE; |
| | 6529 | |
| | 6530 | /* skip the paren and get the next token */ |
| | 6531 | p_.inc(); |
| | 6532 | tok = next_on_line(); |
| | 6533 | |
| | 6534 | /* check for an empty argument list */ |
| | 6535 | if (tok == TOKT_RPAR) |
| | 6536 | { |
| | 6537 | /* note that we're done with the arguments */ |
| | 6538 | done = TRUE; |
| | 6539 | } |
| | 6540 | |
| | 6541 | /* scan the argument list */ |
| | 6542 | while (!done) |
| | 6543 | { |
| | 6544 | /* if we have too many arguments, it's an error */ |
| | 6545 | if (argc >= TOK_MAX_MACRO_ARGS) |
| | 6546 | { |
| | 6547 | log_error(TCERR_TOO_MANY_MAC_PARMS, |
| | 6548 | macro_name, macro_len, TOK_MAX_MACRO_ARGS); |
| | 6549 | clear_linebuf(); |
| | 6550 | return; |
| | 6551 | } |
| | 6552 | |
| | 6553 | /* if we're at the end of the macro, it's an error */ |
| | 6554 | if (tok == TOKT_EOF) |
| | 6555 | { |
| | 6556 | /* log the error and ignore the line */ |
| | 6557 | log_error(TCERR_MACRO_NO_RPAR); |
| | 6558 | clear_linebuf(); |
| | 6559 | return; |
| | 6560 | } |
| | 6561 | |
| | 6562 | /* check for a valid initial symbol character */ |
| | 6563 | if (tok != TOKT_SYM) |
| | 6564 | { |
| | 6565 | log_error_curtok(TCERR_BAD_MACRO_ARG_NAME); |
| | 6566 | clear_linebuf(); |
| | 6567 | return; |
| | 6568 | } |
| | 6569 | |
| | 6570 | /* remember the argument name */ |
| | 6571 | argvlen[argc] = curtok_.get_text_len(); |
| | 6572 | argv[argc++] = curtok_.get_text(); |
| | 6573 | |
| | 6574 | /* get the next token */ |
| | 6575 | tok = next_on_line(); |
| | 6576 | |
| | 6577 | /* make sure we have a comma or paren following */ |
| | 6578 | if (tok == TOKT_COMMA) |
| | 6579 | { |
| | 6580 | /* we have more arguments - skip the comma */ |
| | 6581 | tok = next_on_line(); |
| | 6582 | } |
| | 6583 | else if (tok == TOKT_ELLIPSIS) |
| | 6584 | { |
| | 6585 | /* skip the ellipsis */ |
| | 6586 | tok = next_on_line(); |
| | 6587 | |
| | 6588 | /* note the varargs marker */ |
| | 6589 | has_varargs = TRUE; |
| | 6590 | |
| | 6591 | /* this must be the last argument */ |
| | 6592 | if (tok != TOKT_RPAR) |
| | 6593 | { |
| | 6594 | /* log the error */ |
| | 6595 | log_error_curtok(TCERR_MACRO_ELLIPSIS_REQ_RPAR); |
| | 6596 | |
| | 6597 | /* discard the line and give up */ |
| | 6598 | clear_linebuf(); |
| | 6599 | return; |
| | 6600 | } |
| | 6601 | |
| | 6602 | /* that's the last argument - we can stop now */ |
| | 6603 | done = TRUE; |
| | 6604 | } |
| | 6605 | else if (tok == TOKT_RPAR) |
| | 6606 | { |
| | 6607 | /* no more arguments - note that we can stop now */ |
| | 6608 | done = TRUE; |
| | 6609 | } |
| | 6610 | else |
| | 6611 | { |
| | 6612 | /* invalid argument - log an error and discard the line */ |
| | 6613 | log_error_curtok(TCERR_MACRO_EXP_COMMA); |
| | 6614 | clear_linebuf(); |
| | 6615 | return; |
| | 6616 | } |
| | 6617 | } |
| | 6618 | } |
| | 6619 | else |
| | 6620 | { |
| | 6621 | /* |
| | 6622 | * there are no arguments - the macro's expansion starts |
| | 6623 | * immediately after the end of the name and any subsequent |
| | 6624 | * whitespace |
| | 6625 | */ |
| | 6626 | has_args = FALSE; |
| | 6627 | } |
| | 6628 | |
| | 6629 | /* skip whitespace leading up to the expansion */ |
| | 6630 | while (is_space(p_.getch())) |
| | 6631 | p_.inc(); |
| | 6632 | |
| | 6633 | /* the rest of the line is the expansion */ |
| | 6634 | expan = p_.getptr(); |
| | 6635 | |
| | 6636 | /* don't allow defining "defined" */ |
| | 6637 | if (macro_len == 7 && memcmp(macro_name, "defined", 7) == 0) |
| | 6638 | { |
| | 6639 | /* log an error */ |
| | 6640 | log_error(TCERR_REDEF_OP_DEFINED); |
| | 6641 | |
| | 6642 | /* don't retain the directive in the preprocessed result */ |
| | 6643 | clear_linebuf(); |
| | 6644 | |
| | 6645 | /* ignore the definition */ |
| | 6646 | return; |
| | 6647 | } |
| | 6648 | |
| | 6649 | /* get the length of the expansion text */ |
| | 6650 | expan_len = strlen(expan); |
| | 6651 | |
| | 6652 | /* |
| | 6653 | * remove any trailing whitespace from the expansion text; however, |
| | 6654 | * leave a trailing space if it's preceded by a backslash |
| | 6655 | */ |
| | 6656 | while (expan_len > 0 |
| | 6657 | && is_space(expan[expan_len-1]) |
| | 6658 | && !(expan_len > 1 && expan[expan_len-2] == '\\')) |
| | 6659 | --expan_len; |
| | 6660 | |
| | 6661 | /* |
| | 6662 | * If there are arguments, scan the expansion for formal parameter |
| | 6663 | * names. For each one we find, replace it with the special |
| | 6664 | * TOK_MACRO_FORMAL_FLAG character followed by a one-byte value |
| | 6665 | * giving the argument index. This special sequence is less costly |
| | 6666 | * to find when we're expanding the macros - by doing the search |
| | 6667 | * here, we only need to do it once, rather than each time we expand |
| | 6668 | * the macro. |
| | 6669 | */ |
| | 6670 | if (argc != 0) |
| | 6671 | { |
| | 6672 | utf8_ptr src; |
| | 6673 | size_t dstofs; |
| | 6674 | tc_toktyp_t typ; |
| | 6675 | CTcToken tok; |
| | 6676 | const char *start; |
| | 6677 | int in_embedding = FALSE; |
| | 6678 | |
| | 6679 | /* |
| | 6680 | * Generate our modified expansion text in the macro expansion |
| | 6681 | * buffer. Initially, make sure we have room for a copy of the |
| | 6682 | * text; we'll resize the buffer later if we find we need even |
| | 6683 | * more. |
| | 6684 | */ |
| | 6685 | expbuf_.ensure_space(expan_len); |
| | 6686 | |
| | 6687 | /* scan for argument names, and replace them */ |
| | 6688 | for (start = expan, dstofs = 0, src.set((char *)expan) ;; ) |
| | 6689 | { |
| | 6690 | /* get the next token */ |
| | 6691 | typ = next_on_line(&src, &tok, &in_embedding, FALSE); |
| | 6692 | |
| | 6693 | /* if we've reached the end of the expansion, we're done */ |
| | 6694 | if (typ == TOKT_EOF) |
| | 6695 | break; |
| | 6696 | |
| | 6697 | /* |
| | 6698 | * If this is a formal parameter name, we'll replace it with |
| | 6699 | * a special two-byte sequence; otherwise, we'll keep it |
| | 6700 | * unchanged. |
| | 6701 | */ |
| | 6702 | if (typ == TOKT_SYM) |
| | 6703 | { |
| | 6704 | int i; |
| | 6705 | |
| | 6706 | /* find it in the table */ |
| | 6707 | for (i = 0 ; i < argc ; ++i) |
| | 6708 | { |
| | 6709 | /* does it match this argument name? */ |
| | 6710 | if (argvlen[i] == tok.get_text_len() |
| | 6711 | && memcmp(argv[i], tok.get_text(), |
| | 6712 | tok.get_text_len()) == 0) |
| | 6713 | { |
| | 6714 | size_t new_len; |
| | 6715 | size_t arg_len; |
| | 6716 | size_t repl_len; |
| | 6717 | char flag_byte; |
| | 6718 | |
| | 6719 | /* get the length of the formal name */ |
| | 6720 | arg_len = argvlen[i]; |
| | 6721 | |
| | 6722 | /* |
| | 6723 | * the normal replacement length for a formal |
| | 6724 | * parameter is two bytes - one byte for the flag, |
| | 6725 | * and one for the formal parameter index |
| | 6726 | */ |
| | 6727 | repl_len = 2; |
| | 6728 | |
| | 6729 | /* by default, the flag byte is the formal flag */ |
| | 6730 | flag_byte = TOK_MACRO_FORMAL_FLAG; |
| | 6731 | |
| | 6732 | /* |
| | 6733 | * Check for special varargs control suffixes. If |
| | 6734 | * we matched the last argument name, and this is |
| | 6735 | * a varargs macro, we might have a suffix. |
| | 6736 | */ |
| | 6737 | if (has_varargs |
| | 6738 | && i == argc - 1 |
| | 6739 | && src.getch() == '#') |
| | 6740 | { |
| | 6741 | /* check for the various suffixes */ |
| | 6742 | if (memcmp(src.getptr() + 1, "foreach", 7) == 0 |
| | 6743 | && !is_sym(src.getch_at(8))) |
| | 6744 | { |
| | 6745 | /* |
| | 6746 | * include the suffix length in the token |
| | 6747 | * length |
| | 6748 | */ |
| | 6749 | arg_len += 8; |
| | 6750 | |
| | 6751 | /* |
| | 6752 | * the flag byte is the #foreach flag, |
| | 6753 | * which is a one-byte sequence |
| | 6754 | */ |
| | 6755 | flag_byte = TOK_MACRO_FOREACH_FLAG; |
| | 6756 | repl_len = 1; |
| | 6757 | } |
| | 6758 | else if (memcmp(src.getptr() + 1, |
| | 6759 | "argcount", 8) == 0 |
| | 6760 | && !is_sym(src.getch_at(9))) |
| | 6761 | { |
| | 6762 | /* |
| | 6763 | * include the suffix length in the token |
| | 6764 | * length |
| | 6765 | */ |
| | 6766 | arg_len += 9; |
| | 6767 | |
| | 6768 | /* |
| | 6769 | * the flag byte is the #argcount flag, |
| | 6770 | * which is a one-byte sequence |
| | 6771 | */ |
| | 6772 | flag_byte = TOK_MACRO_ARGCOUNT_FLAG; |
| | 6773 | repl_len = 1; |
| | 6774 | } |
| | 6775 | else if (memcmp(src.getptr() + 1, |
| | 6776 | "ifempty", 7) == 0 |
| | 6777 | && !is_sym(src.getch_at(8))) |
| | 6778 | { |
| | 6779 | /* include the length */ |
| | 6780 | arg_len += 8; |
| | 6781 | |
| | 6782 | /* set the one-byte flag */ |
| | 6783 | flag_byte = TOK_MACRO_IFEMPTY_FLAG; |
| | 6784 | repl_len = 1; |
| | 6785 | } |
| | 6786 | else if (memcmp(src.getptr() + 1, |
| | 6787 | "ifnempty", 8) == 0 |
| | 6788 | && !is_sym(src.getch_at(9))) |
| | 6789 | { |
| | 6790 | /* include the length */ |
| | 6791 | arg_len += 9; |
| | 6792 | |
| | 6793 | /* set the one-byte flag */ |
| | 6794 | flag_byte = TOK_MACRO_IFNEMPTY_FLAG; |
| | 6795 | repl_len = 1; |
| | 6796 | } |
| | 6797 | } |
| | 6798 | |
| | 6799 | /* |
| | 6800 | * calculate the new length - we're removing the |
| | 6801 | * argument name and adding the replacement string |
| | 6802 | * in its place |
| | 6803 | */ |
| | 6804 | new_len = expan_len + repl_len - arg_len; |
| | 6805 | |
| | 6806 | /* |
| | 6807 | * we need two bytes for the replacement - if |
| | 6808 | * this is more than we're replacing, make sure |
| | 6809 | * we have room for the extra |
| | 6810 | */ |
| | 6811 | if (new_len > expan_len) |
| | 6812 | expbuf_.ensure_space(new_len); |
| | 6813 | |
| | 6814 | /* |
| | 6815 | * copy everything up to but not including the |
| | 6816 | * formal name |
| | 6817 | */ |
| | 6818 | if (tok.get_text() > start) |
| | 6819 | { |
| | 6820 | /* store the text */ |
| | 6821 | memcpy(expbuf_.get_buf() + dstofs, |
| | 6822 | start, tok.get_text() - start); |
| | 6823 | |
| | 6824 | /* move past the stored text in the output */ |
| | 6825 | dstofs += tok.get_text() - start; |
| | 6826 | } |
| | 6827 | |
| | 6828 | /* the next segment starts after this token */ |
| | 6829 | start = tok.get_text() + arg_len; |
| | 6830 | |
| | 6831 | /* store the flag byte */ |
| | 6832 | expbuf_.get_buf()[dstofs++] = flag_byte; |
| | 6833 | |
| | 6834 | /* |
| | 6835 | * If appropriate, store the argument index - this |
| | 6836 | * always fits in one byte because our hard limit |
| | 6837 | * on formal parameters is less than 128 per |
| | 6838 | * macro. Note that we add one to the index so |
| | 6839 | * that we never store a zero byte, to avoid any |
| | 6840 | * potential confusion with a null terminator |
| | 6841 | * byte. |
| | 6842 | */ |
| | 6843 | if (repl_len > 1) |
| | 6844 | expbuf_.get_buf()[dstofs++] = (char)(i + 1); |
| | 6845 | |
| | 6846 | /* remember the new length */ |
| | 6847 | expan_len = new_len; |
| | 6848 | |
| | 6849 | /* no need to search further for it */ |
| | 6850 | break; |
| | 6851 | } |
| | 6852 | } |
| | 6853 | } |
| | 6854 | } |
| | 6855 | |
| | 6856 | /* copy the last segment */ |
| | 6857 | if (tok.get_text() > start) |
| | 6858 | { |
| | 6859 | /* store the text */ |
| | 6860 | memcpy(expbuf_.get_buf() + dstofs, start, |
| | 6861 | tok.get_text() - start); |
| | 6862 | } |
| | 6863 | |
| | 6864 | /* set the new length */ |
| | 6865 | expbuf_.set_text_len(expan_len); |
| | 6866 | |
| | 6867 | /* use the modified expansion text instead of the original */ |
| | 6868 | expan = expbuf_.get_text(); |
| | 6869 | } |
| | 6870 | |
| | 6871 | /* |
| | 6872 | * check the symbol table to see if this symbol is already defined - |
| | 6873 | * if so, show a warning, but honor the new definition |
| | 6874 | */ |
| | 6875 | entry = find_define(macro_name, macro_len); |
| | 6876 | if (entry != 0) |
| | 6877 | { |
| | 6878 | /* |
| | 6879 | * Check for a trivial redefinition - if the number of arguments |
| | 6880 | * is the same, and the type (object-like or function-like) is |
| | 6881 | * the same, and the expansion string is identical, there's no |
| | 6882 | * need to warn, because the redefinition has no effect and can |
| | 6883 | * thus be safely ignored. Note that we must ignore any |
| | 6884 | * differences in the whitespace in the expansions for this |
| | 6885 | * comparision. |
| | 6886 | */ |
| | 6887 | if ((entry->has_args() != 0) == (has_args != 0) |
| | 6888 | && entry->get_argc() == argc |
| | 6889 | && lib_strequal_collapse_spaces(expan, expan_len, |
| | 6890 | entry->get_expansion(), |
| | 6891 | entry->get_expan_len())) |
| | 6892 | { |
| | 6893 | /* it's a non-trivial redefinition - ignore it */ |
| | 6894 | goto done; |
| | 6895 | } |
| | 6896 | |
| | 6897 | /* log a warning about the redefinition */ |
| | 6898 | log_warning(TCERR_MACRO_REDEF, (int)macro_len, macro_name); |
| | 6899 | |
| | 6900 | /* remove and delete the old entry */ |
| | 6901 | defines_->remove(entry); |
| | 6902 | |
| | 6903 | /* if the item isn't already in the #undef table, add it */ |
| | 6904 | if (find_undef(macro_name, macro_len) == 0) |
| | 6905 | { |
| | 6906 | /* |
| | 6907 | * move the entry to the #undef table so that we can keep track |
| | 6908 | * of the fact that this macro's definition has changed in the |
| | 6909 | * course of the compilation |
| | 6910 | */ |
| | 6911 | undefs_->add(entry); |
| | 6912 | } |
| | 6913 | else |
| | 6914 | { |
| | 6915 | /* |
| | 6916 | * the name is already in the #undef table, so we don't need |
| | 6917 | * another copy - just forget about the old entry entirely |
| | 6918 | */ |
| | 6919 | delete entry; |
| | 6920 | } |
| | 6921 | } |
| | 6922 | |
| | 6923 | /* create an entry for the new macro */ |
| | 6924 | entry = new CTcHashEntryPpDefine(macro_name, macro_len, TRUE, |
| | 6925 | has_args, argc, has_varargs, |
| | 6926 | argv, argvlen, expan, expan_len); |
| | 6927 | |
| | 6928 | /* add it to the hash table */ |
| | 6929 | defines_->add(entry); |
| | 6930 | |
| | 6931 | done: |
| | 6932 | /* don't retain the directive in the preprocessed source */ |
| | 6933 | clear_linebuf(); |
| | 6934 | } |
| | 6935 | |
| | 6936 | /* ------------------------------------------------------------------------ */ |
| | 6937 | /* |
| | 6938 | * Process a #ifdef directive |
| | 6939 | */ |
| | 6940 | void CTcTokenizer::pp_ifdef() |
| | 6941 | { |
| | 6942 | /* process the ifdef/ifndef with a positive sense */ |
| | 6943 | pp_ifdef_or_ifndef(TRUE); |
| | 6944 | } |
| | 6945 | |
| | 6946 | /* |
| | 6947 | * Process a #ifndef directive |
| | 6948 | */ |
| | 6949 | void CTcTokenizer::pp_ifndef() |
| | 6950 | { |
| | 6951 | /* process the ifdef/ifndef with a negative sense */ |
| | 6952 | pp_ifdef_or_ifndef(FALSE); |
| | 6953 | } |
| | 6954 | |
| | 6955 | /* |
| | 6956 | * Process a #ifdef or #ifndef. If 'sense' is true, we'll take the |
| | 6957 | * branch if the symbol is defined (hence #ifdef), otherwise we'll take |
| | 6958 | * it if the symbol isn't defined (hence #ifndef). |
| | 6959 | */ |
| | 6960 | void CTcTokenizer::pp_ifdef_or_ifndef(int sense) |
| | 6961 | { |
| | 6962 | char macro_name[TOK_SYM_MAX_BUFFER]; |
| | 6963 | int found; |
| | 6964 | tok_if_t state; |
| | 6965 | |
| | 6966 | /* make sure we have a valid symbol */ |
| | 6967 | if (pp_get_lone_ident(macro_name, sizeof(macro_name))) |
| | 6968 | { |
| | 6969 | /* clear the line buffer */ |
| | 6970 | clear_linebuf(); |
| | 6971 | |
| | 6972 | /* |
| | 6973 | * push a true if to avoid cascading errors for matching #endif |
| | 6974 | * or #else |
| | 6975 | */ |
| | 6976 | push_if(TOKIF_IF_YES); |
| | 6977 | |
| | 6978 | /* we're done */ |
| | 6979 | return; |
| | 6980 | } |
| | 6981 | |
| | 6982 | /* check to see if it's defined */ |
| | 6983 | found = (find_define(macro_name, strlen(macro_name)) != 0); |
| | 6984 | |
| | 6985 | /* |
| | 6986 | * if we found it and they wanted it found, or we didn't find it and |
| | 6987 | * they didn't want it found, take a true branch; otherwise, take a |
| | 6988 | * false branch |
| | 6989 | */ |
| | 6990 | if ((sense != 0) == (found != 0)) |
| | 6991 | state = TOKIF_IF_YES; |
| | 6992 | else |
| | 6993 | state = TOKIF_IF_NO; |
| | 6994 | |
| | 6995 | /* push the new #if state */ |
| | 6996 | push_if(state); |
| | 6997 | |
| | 6998 | /* don't retain the directive in the preprocessed source */ |
| | 6999 | clear_linebuf(); |
| | 7000 | } |
| | 7001 | |
| | 7002 | /* ------------------------------------------------------------------------ */ |
| | 7003 | /* |
| | 7004 | * Process a #if directive |
| | 7005 | */ |
| | 7006 | void CTcTokenizer::pp_if() |
| | 7007 | { |
| | 7008 | CTcConstVal val; |
| | 7009 | |
| | 7010 | /* expand macros; don't allow reading additional lines */ |
| | 7011 | if (expand_macros_curline(FALSE, TRUE, FALSE)) |
| | 7012 | goto do_error; |
| | 7013 | |
| | 7014 | /* |
| | 7015 | * we don't need the original source line any more, and we don't |
| | 7016 | * want to copy it to the preprocessed output, so clear it |
| | 7017 | */ |
| | 7018 | clear_linebuf(); |
| | 7019 | |
| | 7020 | /* parse out of the expansion buffer */ |
| | 7021 | start_new_line(&expbuf_, 0); |
| | 7022 | |
| | 7023 | /* parse the preprocessor expression */ |
| | 7024 | if (pp_parse_expr(&val, TRUE, TRUE, TRUE)) |
| | 7025 | { |
| | 7026 | /* |
| | 7027 | * we can't get a value; treat the expression as true and |
| | 7028 | * continue parsing, so that we don't throw off the #if nesting |
| | 7029 | * level |
| | 7030 | */ |
| | 7031 | val.set_bool(TRUE); |
| | 7032 | } |
| | 7033 | |
| | 7034 | /* push the new state according to the value of the expression */ |
| | 7035 | push_if(val.get_val_bool() ? TOKIF_IF_YES : TOKIF_IF_NO); |
| | 7036 | |
| | 7037 | /* done */ |
| | 7038 | return; |
| | 7039 | |
| | 7040 | do_error: |
| | 7041 | /* clear the line buffer */ |
| | 7042 | clear_linebuf(); |
| | 7043 | |
| | 7044 | /* |
| | 7045 | * push a true if - even though we can't evaluate the condition, we |
| | 7046 | * can at least avoid a cascade of errors for the matching #endif |
| | 7047 | * and #else |
| | 7048 | */ |
| | 7049 | push_if(TOKIF_IF_YES); |
| | 7050 | } |
| | 7051 | |
| | 7052 | /* ------------------------------------------------------------------------ */ |
| | 7053 | /* |
| | 7054 | * Process a #elif directive |
| | 7055 | */ |
| | 7056 | void CTcTokenizer::pp_elif() |
| | 7057 | { |
| | 7058 | CTcConstVal val; |
| | 7059 | |
| | 7060 | /* expand macros; don't allow reading additional lines */ |
| | 7061 | if (expand_macros_curline(FALSE, TRUE, FALSE)) |
| | 7062 | { |
| | 7063 | clear_linebuf(); |
| | 7064 | return; |
| | 7065 | } |
| | 7066 | |
| | 7067 | /* parse out of the expansion buffer */ |
| | 7068 | start_new_line(&expbuf_, 0); |
| | 7069 | |
| | 7070 | /* parse the preprocessor expression */ |
| | 7071 | if (pp_parse_expr(&val, TRUE, TRUE, TRUE)) |
| | 7072 | { |
| | 7073 | clear_linebuf(); |
| | 7074 | return; |
| | 7075 | } |
| | 7076 | |
| | 7077 | /* |
| | 7078 | * make sure that the #elif occurs in the same file as the |
| | 7079 | * corresponding #if |
| | 7080 | */ |
| | 7081 | if (if_sp_ <= str_->get_init_if_level()) |
| | 7082 | { |
| | 7083 | /* log the error */ |
| | 7084 | log_error(TCERR_PP_ELIF_NOT_IN_SAME_FILE); |
| | 7085 | |
| | 7086 | /* clear the text and abort */ |
| | 7087 | clear_linebuf(); |
| | 7088 | return; |
| | 7089 | } |
| | 7090 | |
| | 7091 | /* check the current #if state */ |
| | 7092 | switch(get_if_state()) |
| | 7093 | { |
| | 7094 | case TOKIF_IF_YES: |
| | 7095 | /* |
| | 7096 | * we just took the #if branch, so don't take this or any |
| | 7097 | * subsequent #elif or #else branch, regardless of the value of |
| | 7098 | * the condition - set the state to DONE to indicate that we're |
| | 7099 | * skipping everything through the endif |
| | 7100 | */ |
| | 7101 | change_if_state(TOKIF_IF_DONE); |
| | 7102 | break; |
| | 7103 | |
| | 7104 | case TOKIF_IF_NO: |
| | 7105 | /* |
| | 7106 | * We haven't yet taken a #if or #elif branch, so we can take |
| | 7107 | * this branch if its condition is true. If this branch's |
| | 7108 | * condition is false, stay with NO so that we will consider |
| | 7109 | * future #elif and #else branches. |
| | 7110 | */ |
| | 7111 | if (val.get_val_bool()) |
| | 7112 | change_if_state(TOKIF_IF_YES); |
| | 7113 | break; |
| | 7114 | |
| | 7115 | case TOKIF_IF_DONE: |
| | 7116 | /* |
| | 7117 | * we've already taken a #if or #elif branch, so we must ignore |
| | 7118 | * this and subsequent #elif and #else branches until we get to |
| | 7119 | * our #endif - just stay in state DONE |
| | 7120 | */ |
| | 7121 | break; |
| | 7122 | |
| | 7123 | case TOKIF_NONE: |
| | 7124 | case TOKIF_ELSE_YES: |
| | 7125 | case TOKIF_ELSE_NO: |
| | 7126 | /* |
| | 7127 | * we're not in a #if branch at all, or we're inside a #else; a |
| | 7128 | * #elif is not legal here |
| | 7129 | */ |
| | 7130 | log_error(TCERR_PP_ELIF_WITHOUT_IF); |
| | 7131 | break; |
| | 7132 | } |
| | 7133 | |
| | 7134 | /* don't retain the directive in the preprocessed source */ |
| | 7135 | clear_linebuf(); |
| | 7136 | } |
| | 7137 | |
| | 7138 | /* ------------------------------------------------------------------------ */ |
| | 7139 | /* |
| | 7140 | * Process a #else directive |
| | 7141 | */ |
| | 7142 | void CTcTokenizer::pp_else() |
| | 7143 | { |
| | 7144 | /* make sure there's nothing but whitespace on the line */ |
| | 7145 | if (next_on_line() != TOKT_EOF) |
| | 7146 | log_error(TCERR_PP_EXTRA); |
| | 7147 | |
| | 7148 | /* |
| | 7149 | * make sure that the #else occurs in the same file as the |
| | 7150 | * corresponding #if |
| | 7151 | */ |
| | 7152 | if (if_sp_ <= str_->get_init_if_level()) |
| | 7153 | { |
| | 7154 | /* log the error */ |
| | 7155 | log_error(TCERR_PP_ELSE_NOT_IN_SAME_FILE); |
| | 7156 | |
| | 7157 | /* clear the text and abort */ |
| | 7158 | clear_linebuf(); |
| | 7159 | return; |
| | 7160 | } |
| | 7161 | |
| | 7162 | /* check our current #if state */ |
| | 7163 | switch(get_if_state()) |
| | 7164 | { |
| | 7165 | case TOKIF_IF_YES: |
| | 7166 | case TOKIF_IF_DONE: |
| | 7167 | /* |
| | 7168 | * we've already taken a true #if branch, so we don't want to |
| | 7169 | * process the #else part - switch to a false #else branch |
| | 7170 | */ |
| | 7171 | change_if_state(TOKIF_ELSE_NO); |
| | 7172 | break; |
| | 7173 | |
| | 7174 | case TOKIF_IF_NO: |
| | 7175 | /* |
| | 7176 | * we haven't yet found a true #if branch, so take the #else |
| | 7177 | * branch -- switch to a true #else branch |
| | 7178 | */ |
| | 7179 | change_if_state(TOKIF_ELSE_YES); |
| | 7180 | break; |
| | 7181 | |
| | 7182 | case TOKIF_NONE: |
| | 7183 | case TOKIF_ELSE_YES: |
| | 7184 | case TOKIF_ELSE_NO: |
| | 7185 | /* |
| | 7186 | * we're not in a #if at all, or we're in a #else - log an error |
| | 7187 | * and ignore it |
| | 7188 | */ |
| | 7189 | log_error(TCERR_PP_ELSE_WITHOUT_IF); |
| | 7190 | break; |
| | 7191 | } |
| | 7192 | |
| | 7193 | /* don't retain the directive in the preprocessed source */ |
| | 7194 | clear_linebuf(); |
| | 7195 | } |
| | 7196 | |
| | 7197 | /* ------------------------------------------------------------------------ */ |
| | 7198 | /* |
| | 7199 | * Process a #endif directive |
| | 7200 | */ |
| | 7201 | void CTcTokenizer::pp_endif() |
| | 7202 | { |
| | 7203 | /* make sure the rest of the line is blank */ |
| | 7204 | if (next_on_line() != TOKT_EOF) |
| | 7205 | log_error(TCERR_PP_EXTRA); |
| | 7206 | |
| | 7207 | /* ignore the rest of the line */ |
| | 7208 | clear_linebuf(); |
| | 7209 | |
| | 7210 | /* if we're not in a #if in the same file it's an error */ |
| | 7211 | if (if_sp_ == 0) |
| | 7212 | { |
| | 7213 | log_error(TCERR_PP_ENDIF_WITHOUT_IF); |
| | 7214 | return; |
| | 7215 | } |
| | 7216 | else if (if_sp_ <= str_->get_init_if_level()) |
| | 7217 | { |
| | 7218 | log_error(TCERR_PP_ENDIF_NOT_IN_SAME_FILE); |
| | 7219 | return; |
| | 7220 | } |
| | 7221 | |
| | 7222 | /* pop a #if level */ |
| | 7223 | pop_if(); |
| | 7224 | |
| | 7225 | /* don't retain the directive in the preprocessed source */ |
| | 7226 | clear_linebuf(); |
| | 7227 | } |
| | 7228 | |
| | 7229 | /* ------------------------------------------------------------------------ */ |
| | 7230 | /* |
| | 7231 | * Process a #error directive |
| | 7232 | */ |
| | 7233 | void CTcTokenizer::pp_error() |
| | 7234 | { |
| | 7235 | size_t startofs; |
| | 7236 | |
| | 7237 | /* |
| | 7238 | * copy the source line through the "error" token to the macro |
| | 7239 | * expansion buffer - we don't want to expand that part, but we want |
| | 7240 | * it to appear in the expansion, so just copy the original |
| | 7241 | */ |
| | 7242 | startofs = (curtok_.get_text() + curtok_.get_text_len() |
| | 7243 | - linebuf_.get_text()); |
| | 7244 | expbuf_.copy(linebuf_.get_text(), startofs); |
| | 7245 | |
| | 7246 | /* expand macros; don't allow reading additional lines */ |
| | 7247 | if (expand_macros_curline(FALSE, FALSE, TRUE)) |
| | 7248 | { |
| | 7249 | clear_linebuf(); |
| | 7250 | return; |
| | 7251 | } |
| | 7252 | |
| | 7253 | /* clean up any expansion flags embedded in the buffer */ |
| | 7254 | remove_expansion_flags(&expbuf_); |
| | 7255 | |
| | 7256 | /* |
| | 7257 | * If we're in preprocess-only mode, simply retain the text in the |
| | 7258 | * processed result, so that the error is processed on a subsequent |
| | 7259 | * compilation of the result; otherwise, display the error. |
| | 7260 | * |
| | 7261 | * Ignore #error directives in list-includes mode as well. |
| | 7262 | */ |
| | 7263 | if (!pp_only_mode_ && !list_includes_mode_) |
| | 7264 | { |
| | 7265 | /* display the error */ |
| | 7266 | log_error(TCERR_ERROR_DIRECTIVE, |
| | 7267 | (int)expbuf_.get_text_len() - startofs, |
| | 7268 | expbuf_.get_text() + startofs); |
| | 7269 | |
| | 7270 | /* clear the directive from the result */ |
| | 7271 | clear_linebuf(); |
| | 7272 | } |
| | 7273 | else |
| | 7274 | { |
| | 7275 | /* preprocessing - copy expanded text to line buffer */ |
| | 7276 | linebuf_.copy(expbuf_.get_text(), expbuf_.get_text_len()); |
| | 7277 | } |
| | 7278 | } |
| | 7279 | |
| | 7280 | /* ------------------------------------------------------------------------ */ |
| | 7281 | /* |
| | 7282 | * Process a #undef directive |
| | 7283 | */ |
| | 7284 | void CTcTokenizer::pp_undef() |
| | 7285 | { |
| | 7286 | char macro_name[TOK_SYM_MAX_BUFFER]; |
| | 7287 | |
| | 7288 | /* get the macro name */ |
| | 7289 | if (pp_get_lone_ident(macro_name, sizeof(macro_name))) |
| | 7290 | { |
| | 7291 | clear_linebuf(); |
| | 7292 | return; |
| | 7293 | } |
| | 7294 | |
| | 7295 | /* remove it */ |
| | 7296 | undefine(macro_name); |
| | 7297 | |
| | 7298 | /* don't retain the directive in the preprocessed source */ |
| | 7299 | clear_linebuf(); |
| | 7300 | } |
| | 7301 | |
| | 7302 | /* |
| | 7303 | * Programmatically delete a preprocesor symbol |
| | 7304 | */ |
| | 7305 | void CTcTokenizer::undefine(const char *sym, size_t len) |
| | 7306 | { |
| | 7307 | CTcHashEntryPp *entry; |
| | 7308 | |
| | 7309 | /* |
| | 7310 | * find the macro - if it wasn't defined, silently ignore it, since |
| | 7311 | * it's legal to #undef a symbol that wasn't previously defined |
| | 7312 | */ |
| | 7313 | entry = find_define(sym, len); |
| | 7314 | if (entry != 0 && entry->is_undefable()) |
| | 7315 | { |
| | 7316 | /* remove it */ |
| | 7317 | defines_->remove(entry); |
| | 7318 | |
| | 7319 | /* if it's not already in the #undef table, move it there */ |
| | 7320 | if (find_undef(sym, len) == 0) |
| | 7321 | { |
| | 7322 | /* move it to the #undef table */ |
| | 7323 | undefs_->add(entry); |
| | 7324 | } |
| | 7325 | else |
| | 7326 | { |
| | 7327 | /* |
| | 7328 | * the name is already in the #undef table, so we don't need to |
| | 7329 | * add it again - we can forget about this entry entirely |
| | 7330 | */ |
| | 7331 | delete entry; |
| | 7332 | } |
| | 7333 | } |
| | 7334 | } |
| | 7335 | |
| | 7336 | /* ------------------------------------------------------------------------ */ |
| | 7337 | /* |
| | 7338 | * Process a #line directive |
| | 7339 | */ |
| | 7340 | void CTcTokenizer::pp_line() |
| | 7341 | { |
| | 7342 | CTcConstVal val_line; |
| | 7343 | CTcConstVal val_fname; |
| | 7344 | CTcTokFileDesc *desc; |
| | 7345 | |
| | 7346 | /* expand macros; don't allow reading additional lines */ |
| | 7347 | if (expand_macros_curline(FALSE, TRUE, FALSE)) |
| | 7348 | { |
| | 7349 | clear_linebuf(); |
| | 7350 | return; |
| | 7351 | } |
| | 7352 | |
| | 7353 | /* |
| | 7354 | * we don't need the original source line any more, and we don't |
| | 7355 | * want to copy it to the preprocessed output, so clear it |
| | 7356 | */ |
| | 7357 | clear_linebuf(); |
| | 7358 | |
| | 7359 | /* set up to parse from the expansion */ |
| | 7360 | start_new_line(&expbuf_, 0); |
| | 7361 | |
| | 7362 | /* evaluate the line number expression */ |
| | 7363 | if (pp_parse_expr(&val_line, TRUE, FALSE, TRUE)) |
| | 7364 | return; |
| | 7365 | |
| | 7366 | /* if it's not an integer constant, it's an error */ |
| | 7367 | if (val_line.get_type() != TC_CVT_INT) |
| | 7368 | { |
| | 7369 | log_error(TCERR_LINE_REQ_INT); |
| | 7370 | return; |
| | 7371 | } |
| | 7372 | |
| | 7373 | /* evaluate the filename expression */ |
| | 7374 | if (pp_parse_expr(&val_fname, FALSE, TRUE, TRUE)) |
| | 7375 | return; |
| | 7376 | |
| | 7377 | /* the filename must be a string expression */ |
| | 7378 | if (val_fname.get_type() != TC_CVT_SSTR) |
| | 7379 | { |
| | 7380 | log_error(TCERR_LINE_FILE_REQ_STR); |
| | 7381 | return; |
| | 7382 | } |
| | 7383 | |
| | 7384 | /* find or create a descriptor for the filename */ |
| | 7385 | desc = get_file_desc(val_fname.get_val_str(), |
| | 7386 | val_fname.get_val_str_len(), FALSE, 0, 0); |
| | 7387 | |
| | 7388 | /* set the new line number and descriptor in the current stream */ |
| | 7389 | if (str_ != 0) |
| | 7390 | { |
| | 7391 | str_->set_next_linenum(val_line.get_val_int()); |
| | 7392 | str_->set_desc(desc); |
| | 7393 | } |
| | 7394 | |
| | 7395 | /* |
| | 7396 | * retain the pragma in the result if we're in preprocess-only mode, |
| | 7397 | * otherwise remove it |
| | 7398 | */ |
| | 7399 | if (!pp_only_mode_) |
| | 7400 | clear_linebuf(); |
| | 7401 | } |
| | 7402 | |
| | 7403 | /* ------------------------------------------------------------------------ */ |
| | 7404 | /* |
| | 7405 | * Look up a symbol in the #define symbol table |
| | 7406 | */ |
| | 7407 | CTcHashEntryPp *CTcTokenizer::find_define(const char *sym, size_t len) const |
| | 7408 | { |
| | 7409 | /* look it up in the #define symbol table and return the result */ |
| | 7410 | return (CTcHashEntryPp *)defines_->find(sym, len); |
| | 7411 | } |
| | 7412 | |
| | 7413 | /* |
| | 7414 | * Look up a symbol in the #undef table |
| | 7415 | */ |
| | 7416 | CTcHashEntryPp *CTcTokenizer::find_undef(const char *sym, size_t len) const |
| | 7417 | { |
| | 7418 | /* look it up in the #define symbol table and return the result */ |
| | 7419 | return (CTcHashEntryPp *)undefs_->find(sym, len); |
| | 7420 | } |
| | 7421 | |
| | 7422 | /* |
| | 7423 | * Add a preprocessor macro definition |
| | 7424 | */ |
| | 7425 | void CTcTokenizer::add_define(const char *sym, size_t len, |
| | 7426 | const char *expansion, size_t expan_len) |
| | 7427 | { |
| | 7428 | CTcHashEntryPp *entry; |
| | 7429 | |
| | 7430 | /* create an entry for the macro, with no argument list */ |
| | 7431 | entry = new CTcHashEntryPpDefine(sym, len, TRUE, FALSE, 0, FALSE, 0, 0, |
| | 7432 | expansion, expan_len); |
| | 7433 | |
| | 7434 | /* add the new entry to the table */ |
| | 7435 | defines_->add(entry); |
| | 7436 | } |
| | 7437 | |
| | 7438 | /* |
| | 7439 | * Add a preprocessor macro definition |
| | 7440 | */ |
| | 7441 | void CTcTokenizer::add_define(CTcHashEntryPp *entry) |
| | 7442 | { |
| | 7443 | /* add the entry to our symbol table */ |
| | 7444 | defines_->add(entry); |
| | 7445 | } |
| | 7446 | |
| | 7447 | /* |
| | 7448 | * parse an expression |
| | 7449 | */ |
| | 7450 | int CTcTokenizer::pp_parse_expr(CTcConstVal *val, int read_first, |
| | 7451 | int last_on_line, int add_line_ending) |
| | 7452 | { |
| | 7453 | CTcPrsNode *expr_tree; |
| | 7454 | char ch; |
| | 7455 | |
| | 7456 | /* add the line ending marker if required */ |
| | 7457 | if (add_line_ending) |
| | 7458 | { |
| | 7459 | /* |
| | 7460 | * append the special end-of-preprocess-line to the macro |
| | 7461 | * expansion buffer |
| | 7462 | */ |
| | 7463 | ch = TOK_END_PP_LINE; |
| | 7464 | expbuf_.append(&ch, 1); |
| | 7465 | } |
| | 7466 | |
| | 7467 | /* |
| | 7468 | * note that we're pasing a preprocessor expression; this affects |
| | 7469 | * error logging in certain cases |
| | 7470 | */ |
| | 7471 | in_pp_expr_ = TRUE; |
| | 7472 | |
| | 7473 | /* |
| | 7474 | * parse the expression in preprocessor mode, so that double-quoted |
| | 7475 | * strings can be concatenated and compared |
| | 7476 | */ |
| | 7477 | G_prs->set_pp_expr_mode(TRUE); |
| | 7478 | |
| | 7479 | /* get the first token on the line if desired */ |
| | 7480 | if (read_first) |
| | 7481 | next(); |
| | 7482 | |
| | 7483 | /* parse the expression */ |
| | 7484 | expr_tree = G_prs->parse_expr(); |
| | 7485 | |
| | 7486 | /* make sure we're at the end of the line if desired */ |
| | 7487 | if (last_on_line && next() != TOKT_EOF) |
| | 7488 | log_error(TCERR_PP_EXPR_EXTRA); |
| | 7489 | |
| | 7490 | /* if we added the special pp-line-ending marker, remove it */ |
| | 7491 | if (add_line_ending) |
| | 7492 | { |
| | 7493 | /* |
| | 7494 | * the marker is always the last character - remove it simply by |
| | 7495 | * shortening the buffer by a character |
| | 7496 | */ |
| | 7497 | expbuf_.set_text_len(expbuf_.get_text_len() - 1); |
| | 7498 | } |
| | 7499 | |
| | 7500 | /* return to normal expression mode */ |
| | 7501 | G_prs->set_pp_expr_mode(FALSE); |
| | 7502 | |
| | 7503 | /* return to normal tokenizing mode */ |
| | 7504 | in_pp_expr_ = FALSE; |
| | 7505 | |
| | 7506 | /* if we didn't get a valid expression, return failure */ |
| | 7507 | if (expr_tree == 0) |
| | 7508 | return 1; |
| | 7509 | |
| | 7510 | /* make sure we got a constant */ |
| | 7511 | if (!expr_tree->is_const()) |
| | 7512 | { |
| | 7513 | log_error(TCERR_PP_EXPR_NOT_CONST); |
| | 7514 | return 1; |
| | 7515 | } |
| | 7516 | |
| | 7517 | /* fill in the caller's value */ |
| | 7518 | *val = *expr_tree->get_const_val(); |
| | 7519 | |
| | 7520 | /* success */ |
| | 7521 | return 0; |
| | 7522 | } |
| | 7523 | |
| | 7524 | /* ------------------------------------------------------------------------ */ |
| | 7525 | /* |
| | 7526 | * #define enumeration callback context |
| | 7527 | */ |
| | 7528 | struct def_enum_cb_t |
| | 7529 | { |
| | 7530 | /* original callback function */ |
| | 7531 | void (*cb)(void *, CTcHashEntryPp *); |
| | 7532 | |
| | 7533 | /* original callback context */ |
| | 7534 | void *ctx; |
| | 7535 | }; |
| | 7536 | |
| | 7537 | /* |
| | 7538 | * #define enumeration callback. This is a simple impedence matcher on the |
| | 7539 | * way to the real callbac; we cast the generic hash entry type to the |
| | 7540 | * CTcHashEntryPp subclass for the benefit of the real callback. |
| | 7541 | */ |
| | 7542 | static void enum_defines_cb(void *ctx0, CVmHashEntry *entry) |
| | 7543 | { |
| | 7544 | def_enum_cb_t *ctx; |
| | 7545 | |
| | 7546 | /* get our real context */ |
| | 7547 | ctx = (def_enum_cb_t *)ctx0; |
| | 7548 | |
| | 7549 | /* invoke the real callback, casting the entry reference appropriately */ |
| | 7550 | (*ctx->cb)(ctx->ctx, (CTcHashEntryPp *)entry); |
| | 7551 | } |
| | 7552 | |
| | 7553 | /* |
| | 7554 | * Enumerate the entries in the #define table through a callback |
| | 7555 | */ |
| | 7556 | void CTcTokenizer::enum_defines(void (*cb)(void *, CTcHashEntryPp *), |
| | 7557 | void *ctx) |
| | 7558 | { |
| | 7559 | def_enum_cb_t myctx; |
| | 7560 | |
| | 7561 | /* set up our impedence-matcher context with the real callback info */ |
| | 7562 | myctx.cb = cb; |
| | 7563 | myctx.ctx = ctx; |
| | 7564 | |
| | 7565 | /* enumerate through our impedence-matcher callback */ |
| | 7566 | defines_->enum_entries(&enum_defines_cb, &myctx); |
| | 7567 | } |
| | 7568 | |
| | 7569 | /* ------------------------------------------------------------------------ */ |
| | 7570 | /* |
| | 7571 | * Get a lone identifier for a preprocessor directive. The identifier |
| | 7572 | * must be the only thing left on the line; we'll generate an error if |
| | 7573 | * extra characters follow on the line. |
| | 7574 | * |
| | 7575 | * If there's no identifier on the line, or there's more information |
| | 7576 | * after the identifier, logs an error and returns non-zero; returns |
| | 7577 | * zero on success. |
| | 7578 | */ |
| | 7579 | int CTcTokenizer::pp_get_lone_ident(char *buf, size_t bufl) |
| | 7580 | { |
| | 7581 | /* get the next token, and make sure it's a symbol */ |
| | 7582 | if (next_on_line() != TOKT_SYM) |
| | 7583 | { |
| | 7584 | log_error_curtok(TCERR_BAD_DEFINE_SYM); |
| | 7585 | return 1; |
| | 7586 | } |
| | 7587 | |
| | 7588 | /* return an error if it doesn't fit */ |
| | 7589 | if (curtok_.get_text_len() > bufl) |
| | 7590 | return 1; |
| | 7591 | |
| | 7592 | /* copy the text */ |
| | 7593 | memcpy(buf, curtok_.get_text(), curtok_.get_text_len()); |
| | 7594 | buf[curtok_.get_text_len()] = '\0'; |
| | 7595 | |
| | 7596 | /* make sure there's nothing else on the line but whitespace */ |
| | 7597 | if (next_on_line() != TOKT_EOF) |
| | 7598 | { |
| | 7599 | log_error(TCERR_PP_EXTRA); |
| | 7600 | return 1; |
| | 7601 | } |
| | 7602 | |
| | 7603 | /* success */ |
| | 7604 | return 0; |
| | 7605 | } |
| | 7606 | |
| | 7607 | /* ------------------------------------------------------------------------ */ |
| | 7608 | /* |
| | 7609 | * Push a new #if level |
| | 7610 | */ |
| | 7611 | void CTcTokenizer::push_if(tok_if_t state) |
| | 7612 | { |
| | 7613 | /* if we're out of space in the stack, throw a fatal error */ |
| | 7614 | if (if_sp_ == TOK_MAX_IF_NESTING) |
| | 7615 | throw_fatal_error(TCERR_IF_NESTING_OVERFLOW); |
| | 7616 | |
| | 7617 | /* |
| | 7618 | * if we're in a nested #if in a false #if, increase the nested |
| | 7619 | * false #if level |
| | 7620 | */ |
| | 7621 | if (in_false_if()) |
| | 7622 | ++if_false_level_; |
| | 7623 | |
| | 7624 | /* push the state, remembering where the #if was defined */ |
| | 7625 | if_stack_[if_sp_].desc = last_desc_; |
| | 7626 | if_stack_[if_sp_].linenum = last_linenum_; |
| | 7627 | if_stack_[if_sp_++].state = state; |
| | 7628 | } |
| | 7629 | |
| | 7630 | /* |
| | 7631 | * Pop a #if level |
| | 7632 | */ |
| | 7633 | void CTcTokenizer::pop_if() |
| | 7634 | { |
| | 7635 | /* if we're in a nested #if in a false #if, pop the nesting level */ |
| | 7636 | if (if_false_level_ != 0) |
| | 7637 | --if_false_level_; |
| | 7638 | |
| | 7639 | /* pop the main if level */ |
| | 7640 | if (if_sp_ != 0) |
| | 7641 | --if_sp_; |
| | 7642 | } |
| | 7643 | |
| | 7644 | |
| | 7645 | /* ------------------------------------------------------------------------ */ |
| | 7646 | /* |
| | 7647 | * Log an error |
| | 7648 | */ |
| | 7649 | void CTcTokenizer::log_error(int errnum, ...) |
| | 7650 | { |
| | 7651 | va_list marker; |
| | 7652 | |
| | 7653 | /* display the message */ |
| | 7654 | va_start(marker, errnum); |
| | 7655 | G_tcmain->v_log_error(G_tok->get_last_desc(), G_tok->get_last_linenum(), |
| | 7656 | TC_SEV_ERROR, errnum, marker); |
| | 7657 | va_end(marker); |
| | 7658 | } |
| | 7659 | |
| | 7660 | /* |
| | 7661 | * Log an error with the current token's text as the parameter data, |
| | 7662 | * suitable for use with a "%.*s" display format entry |
| | 7663 | */ |
| | 7664 | void CTcTokenizer::log_error_curtok(int errnum) |
| | 7665 | { |
| | 7666 | /* |
| | 7667 | * display the message, passing "%.*s" parameter data for the |
| | 7668 | * current token text: an integer giving the length of the token |
| | 7669 | * text, and a pointer to the token text |
| | 7670 | */ |
| | 7671 | log_error_or_warning_curtok(TC_SEV_ERROR, errnum); |
| | 7672 | } |
| | 7673 | |
| | 7674 | /* |
| | 7675 | * Log an error or warning for the current token |
| | 7676 | */ |
| | 7677 | void CTcTokenizer::log_error_or_warning_curtok(tc_severity_t sev, int errnum) |
| | 7678 | { |
| | 7679 | /* log the error with our current token */ |
| | 7680 | log_error_or_warning_with_tok(sev, errnum, getcur()); |
| | 7681 | } |
| | 7682 | |
| | 7683 | /* |
| | 7684 | * Log an error or warning with the given token |
| | 7685 | */ |
| | 7686 | void CTcTokenizer::log_error_or_warning_with_tok( |
| | 7687 | tc_severity_t sev, int errnum, const CTcToken *tok) |
| | 7688 | { |
| | 7689 | const char *tok_txt; |
| | 7690 | size_t tok_len; |
| | 7691 | char buf[128]; |
| | 7692 | const char *prefix; |
| | 7693 | const char *suffix; |
| | 7694 | utf8_ptr src; |
| | 7695 | utf8_ptr dst; |
| | 7696 | size_t rem; |
| | 7697 | size_t outchars; |
| | 7698 | |
| | 7699 | /* see what we have */ |
| | 7700 | switch(tok->gettyp()) |
| | 7701 | { |
| | 7702 | case TOKT_SSTR: |
| | 7703 | /* show the string in quotes, but limit the length */ |
| | 7704 | prefix = "'"; |
| | 7705 | suffix = "'"; |
| | 7706 | goto format_string; |
| | 7707 | |
| | 7708 | case TOKT_DSTR: |
| | 7709 | prefix = "\""; |
| | 7710 | suffix = "\""; |
| | 7711 | goto format_string; |
| | 7712 | |
| | 7713 | case TOKT_DSTR_START: |
| | 7714 | prefix = "\""; |
| | 7715 | suffix = "<<"; |
| | 7716 | goto format_string; |
| | 7717 | |
| | 7718 | case TOKT_DSTR_MID: |
| | 7719 | prefix = ">>"; |
| | 7720 | suffix = "<<"; |
| | 7721 | goto format_string; |
| | 7722 | |
| | 7723 | case TOKT_DSTR_END: |
| | 7724 | prefix = ">>"; |
| | 7725 | suffix = "\""; |
| | 7726 | goto format_string; |
| | 7727 | |
| | 7728 | format_string: |
| | 7729 | /* set the prefix */ |
| | 7730 | strcpy(buf, prefix); |
| | 7731 | |
| | 7732 | /* |
| | 7733 | * show the string, but limit the length, and convert control |
| | 7734 | * characters to escaped representation |
| | 7735 | */ |
| | 7736 | src.set((char *)tok->get_text()); |
| | 7737 | rem = tok->get_text_len(); |
| | 7738 | for (dst.set(buf + strlen(buf)), outchars = 0 ; |
| | 7739 | rem != 0 && outchars < 20 ; src.inc(&rem), ++outchars) |
| | 7740 | { |
| | 7741 | /* if this is a control character, escape it */ |
| | 7742 | if (src.getch() < 32) |
| | 7743 | { |
| | 7744 | dst.setch('\\'); |
| | 7745 | |
| | 7746 | switch(src.getch()) |
| | 7747 | { |
| | 7748 | case 10: |
| | 7749 | dst.setch('n'); |
| | 7750 | break; |
| | 7751 | |
| | 7752 | case 0x000F: |
| | 7753 | dst.setch('^'); |
| | 7754 | break; |
| | 7755 | |
| | 7756 | case 0x000E: |
| | 7757 | dst.setch('v'); |
| | 7758 | break; |
| | 7759 | |
| | 7760 | case 0x000B: |
| | 7761 | dst.setch('b'); |
| | 7762 | break; |
| | 7763 | |
| | 7764 | case 0x0015: |
| | 7765 | dst.setch(' '); |
| | 7766 | break; |
| | 7767 | |
| | 7768 | case 9: |
| | 7769 | dst.setch('t'); |
| | 7770 | break; |
| | 7771 | |
| | 7772 | default: |
| | 7773 | dst.setch('x'); |
| | 7774 | dst.setch('0' + (src.getch() >> 12) & 0xf); |
| | 7775 | dst.setch('0' + (src.getch() >> 8) & 0xf); |
| | 7776 | dst.setch('0' + (src.getch() >> 4) & 0xf); |
| | 7777 | dst.setch('0' + (src.getch()) & 0xf); |
| | 7778 | break; |
| | 7779 | } |
| | 7780 | } |
| | 7781 | else |
| | 7782 | { |
| | 7783 | /* put this character as-is */ |
| | 7784 | dst.setch(src.getch()); |
| | 7785 | } |
| | 7786 | } |
| | 7787 | |
| | 7788 | /* if there's more string left, add "..." */ |
| | 7789 | if (rem != 0) |
| | 7790 | { |
| | 7791 | dst.setch('.'); |
| | 7792 | dst.setch('.'); |
| | 7793 | dst.setch('.'); |
| | 7794 | } |
| | 7795 | |
| | 7796 | /* add the suffix */ |
| | 7797 | strcpy(dst.getptr(), suffix); |
| | 7798 | |
| | 7799 | /* use this buffer as the token string to display */ |
| | 7800 | tok_txt = buf; |
| | 7801 | tok_len = strlen(tok_txt); |
| | 7802 | break; |
| | 7803 | |
| | 7804 | case TOKT_EOF: |
| | 7805 | /* show a special "<End Of File>" marker */ |
| | 7806 | tok_txt = "<End Of File>"; |
| | 7807 | tok_len = strlen(tok_txt); |
| | 7808 | break; |
| | 7809 | |
| | 7810 | default: |
| | 7811 | /* just show the current token text */ |
| | 7812 | tok_txt = tok->get_text(); |
| | 7813 | tok_len = tok->get_text_len(); |
| | 7814 | break; |
| | 7815 | } |
| | 7816 | |
| | 7817 | /* log the error */ |
| | 7818 | G_tcmain->log_error(get_last_desc(), get_last_linenum(), |
| | 7819 | sev, errnum, tok_len, tok_txt); |
| | 7820 | } |
| | 7821 | |
| | 7822 | /* |
| | 7823 | * Log a warning |
| | 7824 | */ |
| | 7825 | void CTcTokenizer::log_warning(int errnum, ...) |
| | 7826 | { |
| | 7827 | va_list marker; |
| | 7828 | |
| | 7829 | /* display the message */ |
| | 7830 | va_start(marker, errnum); |
| | 7831 | G_tcmain->v_log_error(G_tok->get_last_desc(), G_tok->get_last_linenum(), |
| | 7832 | TC_SEV_WARNING, errnum, marker); |
| | 7833 | va_end(marker); |
| | 7834 | } |
| | 7835 | |
| | 7836 | /* |
| | 7837 | * Log a warning with the current token's text as the parameter data, |
| | 7838 | * suitable for use with a "%.*s" display format entry |
| | 7839 | */ |
| | 7840 | void CTcTokenizer::log_warning_curtok(int errnum) |
| | 7841 | { |
| | 7842 | /* |
| | 7843 | * display the warning message, passing "%.*s" parameter data for |
| | 7844 | * the current token text: an integer giving the length of the token |
| | 7845 | * text, and a pointer to the token text |
| | 7846 | */ |
| | 7847 | log_error_or_warning_curtok(TC_SEV_WARNING, errnum); |
| | 7848 | } |
| | 7849 | |
| | 7850 | /* |
| | 7851 | * Log and throw an internal error |
| | 7852 | */ |
| | 7853 | void CTcTokenizer::throw_internal_error(int errnum, ...) |
| | 7854 | { |
| | 7855 | va_list marker; |
| | 7856 | |
| | 7857 | /* display the message */ |
| | 7858 | va_start(marker, errnum); |
| | 7859 | G_tcmain->v_log_error(G_tok->get_last_desc(), G_tok->get_last_linenum(), |
| | 7860 | TC_SEV_INTERNAL, errnum, marker); |
| | 7861 | va_end(marker); |
| | 7862 | |
| | 7863 | /* throw the generic internal error, since we've logged this */ |
| | 7864 | err_throw(TCERR_INTERNAL_ERROR); |
| | 7865 | } |
| | 7866 | |
| | 7867 | /* |
| | 7868 | * Log and throw a fatal error |
| | 7869 | */ |
| | 7870 | void CTcTokenizer::throw_fatal_error(int errnum, ...) |
| | 7871 | { |
| | 7872 | va_list marker; |
| | 7873 | |
| | 7874 | /* display the message */ |
| | 7875 | va_start(marker, errnum); |
| | 7876 | G_tcmain->v_log_error(G_tok->get_last_desc(), G_tok->get_last_linenum(), |
| | 7877 | TC_SEV_FATAL, errnum, marker); |
| | 7878 | va_end(marker); |
| | 7879 | |
| | 7880 | /* throw the generic fatal error, since we've logged this */ |
| | 7881 | err_throw(TCERR_FATAL_ERROR); |
| | 7882 | } |
| | 7883 | |
| | 7884 | /* |
| | 7885 | * display a string value |
| | 7886 | */ |
| | 7887 | void CTcTokenizer::msg_str(const char *str, size_t len) const |
| | 7888 | { |
| | 7889 | /* display the string through the host interface */ |
| | 7890 | G_hostifc->print_msg("%.*s", (int)len, str); |
| | 7891 | } |
| | 7892 | |
| | 7893 | /* |
| | 7894 | * display a numeric value |
| | 7895 | */ |
| | 7896 | void CTcTokenizer::msg_long(long val) const |
| | 7897 | { |
| | 7898 | /* display the number through the host interface */ |
| | 7899 | G_hostifc->print_msg("%ld", val); |
| | 7900 | } |
| | 7901 | |
| | 7902 | /* ------------------------------------------------------------------------ */ |
| | 7903 | /* |
| | 7904 | * Tokenizer Input Stream implementation |
| | 7905 | */ |
| | 7906 | |
| | 7907 | /* |
| | 7908 | * create a token input stream |
| | 7909 | */ |
| | 7910 | CTcTokStream::CTcTokStream(CTcTokFileDesc *desc, CTcSrcObject *src, |
| | 7911 | CTcTokStream *parent, int charset_error, |
| | 7912 | int init_if_level) |
| | 7913 | { |
| | 7914 | /* remember the underlying source file */ |
| | 7915 | src_ = src; |
| | 7916 | |
| | 7917 | /* remember the file descriptor */ |
| | 7918 | desc_ = desc; |
| | 7919 | |
| | 7920 | /* remember the containing stream */ |
| | 7921 | parent_ = parent; |
| | 7922 | |
| | 7923 | /* the next line to read is line number 1 */ |
| | 7924 | next_linenum_ = 1; |
| | 7925 | |
| | 7926 | /* remember if there was a #charset error */ |
| | 7927 | charset_error_ = charset_error; |
| | 7928 | |
| | 7929 | /* we're not in a comment yet */ |
| | 7930 | in_comment_ = FALSE; |
| | 7931 | |
| | 7932 | /* remember the starting #if level */ |
| | 7933 | init_if_level_ = init_if_level; |
| | 7934 | |
| | 7935 | #if 0 // #pragma C is not currently used |
| | 7936 | /* |
| | 7937 | * start out in parent's pragma C mode, or in non-C mode if we have |
| | 7938 | * no parent |
| | 7939 | */ |
| | 7940 | if (parent != 0) |
| | 7941 | pragma_c_ = parent->is_pragma_c(); |
| | 7942 | else |
| | 7943 | pragma_c_ = TRUE; |
| | 7944 | #endif |
| | 7945 | } |
| | 7946 | |
| | 7947 | /* |
| | 7948 | * delete a token input stream |
| | 7949 | */ |
| | 7950 | CTcTokStream::~CTcTokStream() |
| | 7951 | { |
| | 7952 | /* we own the underlying file, so delete it */ |
| | 7953 | if (src_ != 0) |
| | 7954 | delete src_; |
| | 7955 | } |
| | 7956 | |
| | 7957 | /* ------------------------------------------------------------------------ */ |
| | 7958 | /* |
| | 7959 | * File Descriptor |
| | 7960 | */ |
| | 7961 | |
| | 7962 | /* |
| | 7963 | * Get the length of a string with each instance of the given quote |
| | 7964 | * character escaped with a backslash. We'll also count the escapes we |
| | 7965 | * need for each backslash. |
| | 7966 | */ |
| | 7967 | static size_t get_quoted_len(const char *str, wchar_t qu) |
| | 7968 | { |
| | 7969 | utf8_ptr p; |
| | 7970 | size_t len; |
| | 7971 | |
| | 7972 | /* |
| | 7973 | * scan the string for instances of the quote mark; each one adds an |
| | 7974 | * extra byte to the length needed, since each one requires a |
| | 7975 | * backslash character to escape the quote mark |
| | 7976 | */ |
| | 7977 | for (p.set((char *)str), len = strlen(str) ; p.getch() != '\0' ; p.inc()) |
| | 7978 | { |
| | 7979 | wchar_t ch; |
| | 7980 | |
| | 7981 | /* |
| | 7982 | * check to see if this character is quotable - it is quotable if |
| | 7983 | * it's a backslash or it's the quote character we're escaping |
| | 7984 | */ |
| | 7985 | ch = p.getch(); |
| | 7986 | if (ch == qu || ch == '\\') |
| | 7987 | { |
| | 7988 | /* |
| | 7989 | * we need to escape this character, so add a byte for the |
| | 7990 | * backslash we'll need to insert |
| | 7991 | */ |
| | 7992 | ++len; |
| | 7993 | } |
| | 7994 | } |
| | 7995 | |
| | 7996 | /* return the length we calculated */ |
| | 7997 | return len; |
| | 7998 | } |
| | 7999 | |
| | 8000 | /* |
| | 8001 | * Build a quoted string. Fills in dst with the source string with each |
| | 8002 | * of the given quote marks and each backslash escaped with a backslash. |
| | 8003 | * Use get_quoted_len() to determine how much space to allocate for the |
| | 8004 | * destination buffer. |
| | 8005 | */ |
| | 8006 | static void build_quoted_str(char *dstbuf, const char *src, wchar_t qu) |
| | 8007 | { |
| | 8008 | utf8_ptr p; |
| | 8009 | utf8_ptr dst; |
| | 8010 | |
| | 8011 | /* scan the source string for escapable characters */ |
| | 8012 | for (p.set((char *)src), dst.set(dstbuf), dst.setch(qu) ; |
| | 8013 | p.getch() != '\0' ; p.inc()) |
| | 8014 | { |
| | 8015 | wchar_t ch; |
| | 8016 | |
| | 8017 | /* get this source character */ |
| | 8018 | ch = p.getch(); |
| | 8019 | |
| | 8020 | /* add a quote if we have a backslash or the quote character */ |
| | 8021 | if (ch == '\\' || ch == qu) |
| | 8022 | { |
| | 8023 | /* add a backslash to escape the character */ |
| | 8024 | dst.setch('\\'); |
| | 8025 | } |
| | 8026 | |
| | 8027 | /* add the character */ |
| | 8028 | dst.setch(ch); |
| | 8029 | } |
| | 8030 | |
| | 8031 | /* add the close quote and trailing null */ |
| | 8032 | dst.setch(qu); |
| | 8033 | dst.setch('\0'); |
| | 8034 | } |
| | 8035 | |
| | 8036 | /* |
| | 8037 | * create a file descriptor |
| | 8038 | */ |
| | 8039 | CTcTokFileDesc::CTcTokFileDesc(const char *fname, size_t fname_len, |
| | 8040 | int index, CTcTokFileDesc *orig_desc, |
| | 8041 | const char *orig_fname, size_t orig_fname_len) |
| | 8042 | { |
| | 8043 | const char *rootname; |
| | 8044 | |
| | 8045 | /* no source pages are allocated yet */ |
| | 8046 | src_pages_ = 0; |
| | 8047 | src_pages_alo_ = 0; |
| | 8048 | |
| | 8049 | /* remember the first instance of this filename in the list */ |
| | 8050 | orig_ = orig_desc; |
| | 8051 | |
| | 8052 | /* there's nothing else in our chain yet */ |
| | 8053 | next_ = 0; |
| | 8054 | |
| | 8055 | /* remember my index in the master list */ |
| | 8056 | index_ = index; |
| | 8057 | |
| | 8058 | /* if there's a filename, save a copy of the name */ |
| | 8059 | fname_ = lib_copy_str(fname, fname_len); |
| | 8060 | |
| | 8061 | /* if there's an original filename save it as well */ |
| | 8062 | orig_fname_ = lib_copy_str(orig_fname, orig_fname_len); |
| | 8063 | |
| | 8064 | /* |
| | 8065 | * get the root filename, since we need to build a quoted version of |
| | 8066 | * that as well as of the basic filename |
| | 8067 | */ |
| | 8068 | rootname = os_get_root_name(fname_); |
| | 8069 | |
| | 8070 | /* |
| | 8071 | * Allocate space for the quoted versions of the filename - make room |
| | 8072 | * for the filename plus the quotes (one on each end) and a null |
| | 8073 | * terminator byte. |
| | 8074 | */ |
| | 8075 | dquoted_fname_ = (char *)t3malloc(get_quoted_len(fname_, '"') + 3); |
| | 8076 | squoted_fname_ = (char *)t3malloc(get_quoted_len(fname_, '\'') + 3); |
| | 8077 | dquoted_rootname_ = (char *)t3malloc(get_quoted_len(rootname, '"') + 3); |
| | 8078 | squoted_rootname_ = (char *)t3malloc(get_quoted_len(rootname, '\'') + 3); |
| | 8079 | |
| | 8080 | /* build the quoted version of the name */ |
| | 8081 | build_quoted_str(dquoted_fname_, fname_, '"'); |
| | 8082 | build_quoted_str(squoted_fname_, fname_, '\''); |
| | 8083 | build_quoted_str(dquoted_rootname_, rootname, '"'); |
| | 8084 | build_quoted_str(squoted_rootname_, rootname, '\''); |
| | 8085 | } |
| | 8086 | |
| | 8087 | /* |
| | 8088 | * delete the descriptor |
| | 8089 | */ |
| | 8090 | CTcTokFileDesc::~CTcTokFileDesc() |
| | 8091 | { |
| | 8092 | /* delete the filename and original filename strings */ |
| | 8093 | lib_free_str(fname_); |
| | 8094 | lib_free_str(orig_fname_); |
| | 8095 | |
| | 8096 | /* delete the quotable filename strings */ |
| | 8097 | t3free(dquoted_fname_); |
| | 8098 | t3free(squoted_fname_); |
| | 8099 | t3free(dquoted_rootname_); |
| | 8100 | t3free(squoted_rootname_); |
| | 8101 | |
| | 8102 | /* delete each source page we've allocated */ |
| | 8103 | if (src_pages_ != 0) |
| | 8104 | { |
| | 8105 | size_t i; |
| | 8106 | |
| | 8107 | /* go through the index array and delete each allocated page */ |
| | 8108 | for (i = 0 ; i < src_pages_alo_ ; ++i) |
| | 8109 | { |
| | 8110 | /* if this page was allocated, delete it */ |
| | 8111 | if (src_pages_[i] != 0) |
| | 8112 | t3free(src_pages_[i]); |
| | 8113 | } |
| | 8114 | |
| | 8115 | /* delete the source page index array */ |
| | 8116 | t3free(src_pages_); |
| | 8117 | } |
| | 8118 | } |
| | 8119 | |
| | 8120 | /* |
| | 8121 | * Source page structure. Each page tracks a block of source lines. |
| | 8122 | */ |
| | 8123 | const size_t TCTOK_SRC_PAGE_CNT = 1024; |
| | 8124 | struct CTcTokSrcPage |
| | 8125 | { |
| | 8126 | /* |
| | 8127 | * Array of line entries on this page. Each entry is zero if it |
| | 8128 | * hasn't been assigned yet, and contains the absolute image file |
| | 8129 | * address of the generated code for the source line if it has been |
| | 8130 | * assigned. |
| | 8131 | */ |
| | 8132 | ulong ofs[TCTOK_SRC_PAGE_CNT]; |
| | 8133 | }; |
| | 8134 | |
| | 8135 | |
| | 8136 | /* |
| | 8137 | * Add a source line |
| | 8138 | */ |
| | 8139 | void CTcTokFileDesc::add_source_line(ulong linenum, ulong line_addr) |
| | 8140 | { |
| | 8141 | size_t page_idx; |
| | 8142 | size_t idx; |
| | 8143 | |
| | 8144 | /* get the index of the page containing this source line */ |
| | 8145 | page_idx = linenum / TCTOK_SRC_PAGE_CNT; |
| | 8146 | |
| | 8147 | /* get the index of the entry within the page */ |
| | 8148 | idx = linenum % TCTOK_SRC_PAGE_CNT; |
| | 8149 | |
| | 8150 | /* |
| | 8151 | * determine if our page index table is large enough, and expand it |
| | 8152 | * if not |
| | 8153 | */ |
| | 8154 | if (page_idx >= src_pages_alo_) |
| | 8155 | { |
| | 8156 | size_t siz; |
| | 8157 | size_t new_alo; |
| | 8158 | |
| | 8159 | /* allocate or expand the source pages array */ |
| | 8160 | new_alo = page_idx + 16; |
| | 8161 | siz = new_alo * sizeof(src_pages_[0]); |
| | 8162 | if (src_pages_ == 0) |
| | 8163 | src_pages_ = (CTcTokSrcPage **)t3malloc(siz); |
| | 8164 | else |
| | 8165 | src_pages_ = (CTcTokSrcPage **)t3realloc(src_pages_, siz); |
| | 8166 | |
| | 8167 | /* clear the new part */ |
| | 8168 | memset(src_pages_ + src_pages_alo_, 0, |
| | 8169 | (new_alo - src_pages_alo_) * sizeof(src_pages_[0])); |
| | 8170 | |
| | 8171 | /* remember the new allocation size */ |
| | 8172 | src_pages_alo_ = new_alo; |
| | 8173 | } |
| | 8174 | |
| | 8175 | /* if this page isn't allocated, do so now */ |
| | 8176 | if (src_pages_[page_idx] == 0) |
| | 8177 | { |
| | 8178 | /* allocate the new page */ |
| | 8179 | src_pages_[page_idx] = (CTcTokSrcPage *) |
| | 8180 | t3malloc(sizeof(CTcTokSrcPage)); |
| | 8181 | |
| | 8182 | /* clear it */ |
| | 8183 | memset(src_pages_[page_idx], 0, sizeof(CTcTokSrcPage)); |
| | 8184 | } |
| | 8185 | |
| | 8186 | /* |
| | 8187 | * if this source line entry has been previously set, don't change |
| | 8188 | * it; otherwise, store the new setting |
| | 8189 | */ |
| | 8190 | if (src_pages_[page_idx]->ofs[idx] == 0) |
| | 8191 | src_pages_[page_idx]->ofs[idx] = line_addr; |
| | 8192 | } |
| | 8193 | |
| | 8194 | /* |
| | 8195 | * Enumerate source lines |
| | 8196 | */ |
| | 8197 | void CTcTokFileDesc::enum_source_lines(void (*cbfunc)(void *, ulong, ulong), |
| | 8198 | void *cbctx) |
| | 8199 | { |
| | 8200 | size_t page_idx; |
| | 8201 | CTcTokSrcPage **pg; |
| | 8202 | |
| | 8203 | /* loop over all of the pages */ |
| | 8204 | for (page_idx = 0, pg = src_pages_ ; page_idx < src_pages_alo_ ; |
| | 8205 | ++page_idx, ++pg) |
| | 8206 | { |
| | 8207 | size_t i; |
| | 8208 | ulong linenum; |
| | 8209 | ulong *p; |
| | 8210 | |
| | 8211 | /* if this page is not populated, skip it */ |
| | 8212 | if (*pg == 0) |
| | 8213 | continue; |
| | 8214 | |
| | 8215 | /* calculate the starting line number for this page */ |
| | 8216 | linenum = page_idx * TCTOK_SRC_PAGE_CNT; |
| | 8217 | |
| | 8218 | /* loop over the entries on this page */ |
| | 8219 | for (i = 0, p = (*pg)->ofs ; i < TCTOK_SRC_PAGE_CNT ; |
| | 8220 | ++i, ++p, ++linenum) |
| | 8221 | { |
| | 8222 | /* if this entry has been set, call the callback */ |
| | 8223 | if (*p != 0) |
| | 8224 | (*cbfunc)(cbctx, linenum, *p); |
| | 8225 | } |
| | 8226 | } |
| | 8227 | } |
| | 8228 | |
| | 8229 | /* ------------------------------------------------------------------------ */ |
| | 8230 | /* |
| | 8231 | * #define symbol table hash entry |
| | 8232 | */ |
| | 8233 | |
| | 8234 | /* |
| | 8235 | * create an entry |
| | 8236 | */ |
| | 8237 | CTcHashEntryPpDefine::CTcHashEntryPpDefine(const textchar_t *str, size_t len, |
| | 8238 | int copy, int has_args, int argc, |
| | 8239 | int has_varargs, |
| | 8240 | const char **argv, |
| | 8241 | const size_t *argvlen, |
| | 8242 | const char *expansion, |
| | 8243 | size_t expan_len) |
| | 8244 | : CTcHashEntryPp(str, len, copy) |
| | 8245 | { |
| | 8246 | /* copy the argument list if necessary */ |
| | 8247 | has_args_ = has_args; |
| | 8248 | has_varargs_ = has_varargs; |
| | 8249 | argc_ = argc; |
| | 8250 | if (argc != 0) |
| | 8251 | { |
| | 8252 | int i; |
| | 8253 | |
| | 8254 | /* allocate the argument list */ |
| | 8255 | argv_ = (char **)t3malloc(argc * sizeof(*argv_)); |
| | 8256 | |
| | 8257 | /* allocate the parameters hash table */ |
| | 8258 | params_table_ = new CVmHashTable(16, new CVmHashFuncCS(), TRUE); |
| | 8259 | |
| | 8260 | /* allocate the entry list */ |
| | 8261 | arg_entry_ = (CTcHashEntryPpArg **) |
| | 8262 | t3malloc(argc * sizeof(arg_entry_[0])); |
| | 8263 | |
| | 8264 | /* copy the arguments */ |
| | 8265 | for (i = 0 ; i < argc ; ++i) |
| | 8266 | { |
| | 8267 | CTcHashEntryPpArg *entry; |
| | 8268 | |
| | 8269 | /* copy the argument name */ |
| | 8270 | argv_[i] = lib_copy_str(argv[i], argvlen[i]); |
| | 8271 | |
| | 8272 | /* |
| | 8273 | * Create the hash entries for this parameters. We'll use |
| | 8274 | * this entry to look up tokens in the expansion text for |
| | 8275 | * matches to the formal names when expanding the macro. |
| | 8276 | * |
| | 8277 | * Note that we'll refer directly to our local copy of the |
| | 8278 | * argument name, so we don't need to make another copy in |
| | 8279 | * the hash entry. |
| | 8280 | */ |
| | 8281 | entry = new CTcHashEntryPpArg(argv_[i], argvlen[i], FALSE, i); |
| | 8282 | params_table_->add(entry); |
| | 8283 | |
| | 8284 | /* add it to our by-index list */ |
| | 8285 | arg_entry_[i] = entry; |
| | 8286 | } |
| | 8287 | } |
| | 8288 | else |
| | 8289 | { |
| | 8290 | /* no arguments */ |
| | 8291 | argv_ = 0; |
| | 8292 | params_table_ = 0; |
| | 8293 | arg_entry_ = 0; |
| | 8294 | } |
| | 8295 | |
| | 8296 | /* save the expansion */ |
| | 8297 | expan_ = lib_copy_str(expansion, expan_len); |
| | 8298 | expan_len_ = expan_len; |
| | 8299 | } |
| | 8300 | |
| | 8301 | /* |
| | 8302 | * delete |
| | 8303 | */ |
| | 8304 | CTcHashEntryPpDefine::~CTcHashEntryPpDefine() |
| | 8305 | { |
| | 8306 | int i; |
| | 8307 | |
| | 8308 | /* delete the argument list */ |
| | 8309 | if (argv_ != 0) |
| | 8310 | { |
| | 8311 | /* delete each argument string */ |
| | 8312 | for (i = 0 ; i < argc_ ; ++i) |
| | 8313 | lib_free_str(argv_[i]); |
| | 8314 | |
| | 8315 | /* delete the argument vector */ |
| | 8316 | t3free(argv_); |
| | 8317 | |
| | 8318 | /* delete the argument entry list */ |
| | 8319 | t3free(arg_entry_); |
| | 8320 | |
| | 8321 | /* delete the hash table */ |
| | 8322 | delete params_table_; |
| | 8323 | } |
| | 8324 | |
| | 8325 | /* delete the expansion */ |
| | 8326 | lib_free_str(expan_); |
| | 8327 | } |
| | 8328 | |
| | 8329 | /* |
| | 8330 | * __LINE__ static buffer |
| | 8331 | */ |
| | 8332 | char CTcHashEntryPpLINE::buf_[20]; |
| | 8333 | |
| | 8334 | |
| | 8335 | /* ------------------------------------------------------------------------ */ |
| | 8336 | /* |
| | 8337 | * Load macro definitions from a file. |
| | 8338 | */ |
| | 8339 | int CTcTokenizer::load_macros_from_file(CVmStream *fp, |
| | 8340 | CTcTokLoadMacErr *err_handler) |
| | 8341 | { |
| | 8342 | long cnt; |
| | 8343 | long i; |
| | 8344 | size_t curarg; |
| | 8345 | char *argv[TOK_MAX_MACRO_ARGS]; |
| | 8346 | size_t argvlen[TOK_MAX_MACRO_ARGS]; |
| | 8347 | size_t maxarg; |
| | 8348 | int result; |
| | 8349 | char *expan; |
| | 8350 | size_t expmaxlen; |
| | 8351 | |
| | 8352 | /* we haven't allocated any argument buffers yet */ |
| | 8353 | maxarg = 0; |
| | 8354 | |
| | 8355 | /* allocate an initial expansion buffer */ |
| | 8356 | expmaxlen = 1024; |
| | 8357 | expan = (char *)t3malloc(expmaxlen); |
| | 8358 | |
| | 8359 | /* presume success */ |
| | 8360 | result = 0; |
| | 8361 | |
| | 8362 | /* read the number of macros */ |
| | 8363 | cnt = fp->read_uint4(); |
| | 8364 | |
| | 8365 | /* read each macro */ |
| | 8366 | for (i = 0 ; i < cnt ; ++i) |
| | 8367 | { |
| | 8368 | char namebuf[TOK_SYM_MAX_LEN]; |
| | 8369 | size_t namelen; |
| | 8370 | int flags; |
| | 8371 | size_t argc; |
| | 8372 | size_t explen; |
| | 8373 | CTcHashEntryPp *entry; |
| | 8374 | int has_args; |
| | 8375 | int has_varargs; |
| | 8376 | |
| | 8377 | /* read the name's length */ |
| | 8378 | namelen = fp->read_uint2(); |
| | 8379 | if (namelen > sizeof(namebuf)) |
| | 8380 | { |
| | 8381 | /* log an error through the handler */ |
| | 8382 | err_handler->log_error(1); |
| | 8383 | |
| | 8384 | /* give up - we can't read any more of the file */ |
| | 8385 | result = 1; |
| | 8386 | goto done; |
| | 8387 | } |
| | 8388 | |
| | 8389 | /* read the name */ |
| | 8390 | fp->read_bytes(namebuf, namelen); |
| | 8391 | |
| | 8392 | /* read and decode the flags */ |
| | 8393 | flags = fp->read_uint2(); |
| | 8394 | has_args = ((flags & 1) != 0); |
| | 8395 | has_varargs = ((flags & 2) != 0); |
| | 8396 | |
| | 8397 | /* read the number of arguments, and read each argument */ |
| | 8398 | argc = fp->read_uint2(); |
| | 8399 | for (curarg = 0 ; curarg < argc ; ++curarg) |
| | 8400 | { |
| | 8401 | /* read the length, and make sure it's valid */ |
| | 8402 | argvlen[curarg] = fp->read_uint2(); |
| | 8403 | if (argvlen[curarg] > TOK_SYM_MAX_LEN) |
| | 8404 | { |
| | 8405 | /* log an error */ |
| | 8406 | err_handler->log_error(2); |
| | 8407 | |
| | 8408 | /* give up - we can't read any more of the file */ |
| | 8409 | result = 2; |
| | 8410 | goto done; |
| | 8411 | } |
| | 8412 | |
| | 8413 | /* |
| | 8414 | * if we haven't allocated a buffer for this argument slot yet, |
| | 8415 | * allocate it now; allocate the buffer at the maximum symbol |
| | 8416 | * size, so we can reuse the same buffer for an argument of |
| | 8417 | * other macros we read later |
| | 8418 | */ |
| | 8419 | while (curarg >= maxarg) |
| | 8420 | argv[maxarg++] = (char *)t3malloc(TOK_SYM_MAX_LEN); |
| | 8421 | |
| | 8422 | /* read the argument text */ |
| | 8423 | fp->read_bytes(argv[curarg], argvlen[curarg]); |
| | 8424 | } |
| | 8425 | |
| | 8426 | /* read the expansion size */ |
| | 8427 | explen = (size_t)fp->read_uint4(); |
| | 8428 | |
| | 8429 | /* expand the expansion buffer if necessary */ |
| | 8430 | if (explen > expmaxlen) |
| | 8431 | { |
| | 8432 | /* |
| | 8433 | * overshoot a bit, so that we won't have to reallocate again |
| | 8434 | * if we find a slightly larger expansion for a future macro |
| | 8435 | */ |
| | 8436 | expmaxlen = explen + 512; |
| | 8437 | |
| | 8438 | /* allocate the new buffer */ |
| | 8439 | expan = (char *)t3realloc(expan, expmaxlen); |
| | 8440 | } |
| | 8441 | |
| | 8442 | /* read the expansion */ |
| | 8443 | fp->read_bytes(expan, explen); |
| | 8444 | |
| | 8445 | /* |
| | 8446 | * Before we create the entry, check to see if there's an existing |
| | 8447 | * entry with the same name. |
| | 8448 | */ |
| | 8449 | entry = find_define(namebuf, namelen); |
| | 8450 | if (entry != 0) |
| | 8451 | { |
| | 8452 | /* |
| | 8453 | * We have another entry. If the entry is exactly the same, |
| | 8454 | * then we can simply skip the current entry, because we simply |
| | 8455 | * want to keep one copy of each macro that's defined |
| | 8456 | * identically in mutiple compilation macros. If the entry is |
| | 8457 | * different from the new one, delete both - a macro which |
| | 8458 | * appears in two or more compilation units with different |
| | 8459 | * meanings is NOT a global macro, and thus we can't include it |
| | 8460 | * in the debugging records. |
| | 8461 | */ |
| | 8462 | if (entry->is_pseudo() |
| | 8463 | || entry->has_args() != has_args |
| | 8464 | || entry->has_varargs() != has_varargs |
| | 8465 | || entry->get_argc() != (int)argc |
| | 8466 | || entry->get_expan_len() != explen |
| | 8467 | || memcmp(entry->get_expansion(), expan, explen) != 0) |
| | 8468 | { |
| | 8469 | /* |
| | 8470 | * The existing entry is different from the new entry, so |
| | 8471 | * the macro has different meanings in different |
| | 8472 | * compilation units, hence we cannot keep *either* |
| | 8473 | * definition in the debug records. Delete the existing |
| | 8474 | * macro, and do not create the new macro. If the existing |
| | 8475 | * macro is a pseudo-macro, keep the old one (since it's |
| | 8476 | * provided by the compiler itself), but still discard the |
| | 8477 | * new one. |
| | 8478 | */ |
| | 8479 | if (!entry->is_pseudo()) |
| | 8480 | undefine(namebuf, namelen); |
| | 8481 | } |
| | 8482 | else |
| | 8483 | { |
| | 8484 | /* |
| | 8485 | * The new entry is identical to the old one, so keep it. |
| | 8486 | * We only need one copy of the entry, though, so simply |
| | 8487 | * keep the old one - there's no need to create a new entry |
| | 8488 | * for the object file data. |
| | 8489 | */ |
| | 8490 | } |
| | 8491 | } |
| | 8492 | else |
| | 8493 | { |
| | 8494 | /* |
| | 8495 | * There's no existing macro with the same name, so create a |
| | 8496 | * new entry based on the object file data. |
| | 8497 | */ |
| | 8498 | entry = new CTcHashEntryPpDefine(namebuf, namelen, TRUE, |
| | 8499 | has_args, argc, has_varargs, |
| | 8500 | (const char **)argv, argvlen, |
| | 8501 | expan, explen); |
| | 8502 | |
| | 8503 | /* add it to the preprocessor's macro symbol table */ |
| | 8504 | add_define(entry); |
| | 8505 | } |
| | 8506 | } |
| | 8507 | |
| | 8508 | done: |
| | 8509 | /* free the argument buffers we allocated */ |
| | 8510 | for (curarg = 0 ; curarg < maxarg ; ++curarg) |
| | 8511 | t3free(argv[curarg]); |
| | 8512 | |
| | 8513 | /* free the expansion buffer */ |
| | 8514 | t3free(expan); |
| | 8515 | |
| | 8516 | /* success */ |
| | 8517 | return result; |
| | 8518 | } |
| | 8519 | |
| | 8520 | /* ------------------------------------------------------------------------ */ |
| | 8521 | /* |
| | 8522 | * Callback context for writing enumerated #define symbols to a file |
| | 8523 | */ |
| | 8524 | struct write_macro_ctx_t |
| | 8525 | { |
| | 8526 | /* object file we're writing to */ |
| | 8527 | CVmFile *fp; |
| | 8528 | |
| | 8529 | /* number of symbols written so far */ |
| | 8530 | unsigned long cnt; |
| | 8531 | }; |
| | 8532 | |
| | 8533 | /* |
| | 8534 | * Enumeration callback for writing the #define symbols to a file |
| | 8535 | */ |
| | 8536 | static void write_macros_cb(void *ctx0, CTcHashEntryPp *entry) |
| | 8537 | { |
| | 8538 | write_macro_ctx_t *ctx = (write_macro_ctx_t *)ctx0; |
| | 8539 | int flags; |
| | 8540 | int i; |
| | 8541 | CVmFile *fp = ctx->fp; |
| | 8542 | |
| | 8543 | /* |
| | 8544 | * if this is a pseudo-macro (such as __LINE__ or __FILE__), ignore it |
| | 8545 | * - these macros do not have permanent global definitions, so they're |
| | 8546 | * not usable in the debugger |
| | 8547 | */ |
| | 8548 | if (entry->is_pseudo()) |
| | 8549 | return; |
| | 8550 | |
| | 8551 | /* |
| | 8552 | * If the macro was ever redefined or undefined, ignore it - the |
| | 8553 | * debugger can only use truly global macros, which are macros that |
| | 8554 | * have stable meanings throughout the compilation units where they |
| | 8555 | * appear (and which do not have different meanings in different |
| | 8556 | * compilation units, but that's not our concern at the moment). The |
| | 8557 | * preprocessor keeps an "undef" table of everything undefined |
| | 8558 | * (explicitly, or implicitly via redefinition), so look up this macro |
| | 8559 | * in the undef table, and ignore the macro if it we find it. |
| | 8560 | */ |
| | 8561 | if (G_tok->find_undef(entry->getstr(), entry->getlen()) != 0) |
| | 8562 | return; |
| | 8563 | |
| | 8564 | /* count this macro */ |
| | 8565 | ctx->cnt++; |
| | 8566 | |
| | 8567 | /* write the macro's name */ |
| | 8568 | fp->write_int2(entry->getlen()); |
| | 8569 | fp->write_bytes(entry->getstr(), entry->getlen()); |
| | 8570 | |
| | 8571 | /* write the flag bits */ |
| | 8572 | flags = 0; |
| | 8573 | if (entry->has_args()) flags |= 1; |
| | 8574 | if (entry->has_varargs()) flags |= 2; |
| | 8575 | fp->write_int2(flags); |
| | 8576 | |
| | 8577 | /* write the number of arguments, and write each argument */ |
| | 8578 | fp->write_int2(entry->get_argc()); |
| | 8579 | for (i = 0 ; i < entry->get_argc() ; ++i) |
| | 8580 | { |
| | 8581 | CTcHashEntryPpArg *arg; |
| | 8582 | |
| | 8583 | /* get the argument */ |
| | 8584 | arg = entry->get_arg_entry(i); |
| | 8585 | |
| | 8586 | /* write the parameter name */ |
| | 8587 | fp->write_int2(arg->getlen()); |
| | 8588 | fp->write_bytes(arg->getstr(), arg->getlen()); |
| | 8589 | } |
| | 8590 | |
| | 8591 | /* write the expansion */ |
| | 8592 | fp->write_int4(entry->get_expan_len()); |
| | 8593 | fp->write_bytes(entry->get_expansion(), entry->get_expan_len()); |
| | 8594 | } |
| | 8595 | |
| | 8596 | /* |
| | 8597 | * Write all #define symbols to a file, for debugging purposes. Writes |
| | 8598 | * only symbols that have never been undefined or redefined, since the |
| | 8599 | * debugger can only make use of global symbols (i.e., symbols with |
| | 8600 | * consistent meanings through all compilation units in which they |
| | 8601 | * appear). |
| | 8602 | */ |
| | 8603 | void CTcTokenizer::write_macros_to_file_for_debug(CVmFile *fp) |
| | 8604 | { |
| | 8605 | long pos; |
| | 8606 | long endpos; |
| | 8607 | write_macro_ctx_t ctx; |
| | 8608 | |
| | 8609 | /* write a placeholder for the symbol count */ |
| | 8610 | pos = fp->get_pos(); |
| | 8611 | fp->write_int4(0); |
| | 8612 | |
| | 8613 | /* write the symbols */ |
| | 8614 | ctx.fp = fp; |
| | 8615 | ctx.cnt = 0; |
| | 8616 | enum_defines(&write_macros_cb, &ctx); |
| | 8617 | |
| | 8618 | /* go back and fix up the symbol count */ |
| | 8619 | endpos = fp->get_pos(); |
| | 8620 | fp->set_pos(pos); |
| | 8621 | fp->write_int4(ctx.cnt); |
| | 8622 | |
| | 8623 | /* seek back to where we left off */ |
| | 8624 | fp->set_pos(endpos); |
| | 8625 | } |