| | 1 | /* $Header: d:/cvsroot/tads/tads3/tctok.h,v 1.5 1999/07/11 00:46:59 MJRoberts Exp $ */ |
| | 2 | |
| | 3 | /* |
| | 4 | * Copyright (c) 1999, 2002 Michael J. Roberts. All Rights Reserved. |
| | 5 | * |
| | 6 | * Please see the accompanying license file, LICENSE.TXT, for information |
| | 7 | * on using and copying this software. |
| | 8 | */ |
| | 9 | /* |
| | 10 | Name |
| | 11 | tctok.h - TADS3 compiler tokenizer and preprocessor |
| | 12 | Function |
| | 13 | |
| | 14 | Notes |
| | 15 | The tokenizer is layered with the preprocessor, so that the preprocessor |
| | 16 | can deal with include files, macro expansion, and preprocessor directives. |
| | 17 | Modified |
| | 18 | 04/12/99 MJRoberts - Creation |
| | 19 | */ |
| | 20 | |
| | 21 | #ifndef TCTOK_H |
| | 22 | #define TCTOK_H |
| | 23 | |
| | 24 | #include <stdlib.h> |
| | 25 | #include <string.h> |
| | 26 | #include <assert.h> |
| | 27 | |
| | 28 | #include "os.h" |
| | 29 | #include "t3std.h" |
| | 30 | #include "utf8.h" |
| | 31 | #include "vmhash.h" |
| | 32 | #include "vmerr.h" |
| | 33 | #include "tcerr.h" |
| | 34 | #include "tcerrnum.h" |
| | 35 | |
| | 36 | |
| | 37 | /* ------------------------------------------------------------------------ */ |
| | 38 | /* |
| | 39 | * Constants |
| | 40 | */ |
| | 41 | |
| | 42 | /* maximum length of a symbol name, in characters */ |
| | 43 | const size_t TOK_SYM_MAX_LEN = 80; |
| | 44 | |
| | 45 | /* |
| | 46 | * Maximum buffer required to hold a symbol, in bytes. Each UTF-8 |
| | 47 | * character may take up three bytes, plus we need a null terminator |
| | 48 | * byte. |
| | 49 | */ |
| | 50 | const size_t TOK_SYM_MAX_BUFFER = (3*TOK_SYM_MAX_LEN + 1); |
| | 51 | |
| | 52 | /* maximum #if nesting level */ |
| | 53 | const size_t TOK_MAX_IF_NESTING = 100; |
| | 54 | |
| | 55 | /* maximum number of parameters per macro */ |
| | 56 | const int TOK_MAX_MACRO_ARGS = 128; |
| | 57 | |
| | 58 | /* |
| | 59 | * Special token flag characters - these are a characters that can't |
| | 60 | * occur in an input file (we guarantee this by converting any |
| | 61 | * occurrences of this character to a space on reading input). We use |
| | 62 | * these to flag certain special properties of tokens in the input |
| | 63 | * buffer. |
| | 64 | * |
| | 65 | * We use ASCII characters in the control range (0x01 (^A) through 0x1A |
| | 66 | * (^Z), excluding 0x09 (tab), 0x0A (LF), 0x0D (CR), and 0x0C (Page |
| | 67 | * Feed); a well-formed source file would never use any of these |
| | 68 | * characters in input. Even if it does, we won't get confused, since |
| | 69 | * we'll always translate these to a space if we find them in input; but |
| | 70 | * choosing characters that *should* never occur in valid input will |
| | 71 | * ensure that we never alter the meaning of valid source by this |
| | 72 | * translation. |
| | 73 | */ |
| | 74 | |
| | 75 | /* |
| | 76 | * macro parameter flag - we use this in the internal storage of a |
| | 77 | * #define expansion to flag where the formal parameters are mentioned, |
| | 78 | * so that we can substitute the actuals when expanding the macro |
| | 79 | */ |
| | 80 | const char TOK_MACRO_FORMAL_FLAG = 0x01; |
| | 81 | |
| | 82 | /* |
| | 83 | * Token fully expanded flag. Whenever we detect that a particular |
| | 84 | * token has been fully expanded in the course of a particular macro |
| | 85 | * expansion, we'll insert this byte before the token; on subsequent |
| | 86 | * re-scans, whenever we see this flag, we'll realize that the token |
| | 87 | * needs no further consideration of expansion. |
| | 88 | */ |
| | 89 | const char TOK_FULLY_EXPANDED_FLAG = 0x02; |
| | 90 | |
| | 91 | /* |
| | 92 | * Macro substitution end marker. Each time we expand a macro, we'll |
| | 93 | * insert immediately after the macro expansion a special pseudo-token, |
| | 94 | * consisting of this flag followed by a pointer to the symbol table |
| | 95 | * entry for the symbol expanded. As we expand macros, we'll check to |
| | 96 | * see if any of these special flags appear in the buffer after the |
| | 97 | * macro about to be expanded. If we find such a flag matching the |
| | 98 | * symbol about to be expanded, we'll know the symbol has already been |
| | 99 | * fully expanded on a previous scan and thus must not be expanded |
| | 100 | * again. |
| | 101 | */ |
| | 102 | const char TOK_MACRO_EXP_END = 0x03; |
| | 103 | |
| | 104 | /* |
| | 105 | * End-of-line flag. This serves as a local end-of-file marker for |
| | 106 | * preprocessor lines. Because preprocessor lines must be considered in |
| | 107 | * isolation, we need some way when parsing one to tell the tokenizer |
| | 108 | * not to try to read another line when it reaches the end of the |
| | 109 | * current line. This flag serves this purpose: when the tokenizer |
| | 110 | * encounters one of these flags, it will simply return end-of-file |
| | 111 | * until the caller explicitly reads a new source line. |
| | 112 | */ |
| | 113 | const char TOK_END_PP_LINE = 0x04; |
| | 114 | |
| | 115 | /* |
| | 116 | * "#foreach" marker flag. This marks the presence of a #foreach token in |
| | 117 | * a macro's expansion. We leave the text of the expansion area intact, |
| | 118 | * but we replace the #foreach token with this marker character. |
| | 119 | */ |
| | 120 | const char TOK_MACRO_FOREACH_FLAG = 0x05; |
| | 121 | |
| | 122 | /* |
| | 123 | * "#argcount" marker flag. This marks the presence of a #argcount token |
| | 124 | * in a macro's expansion. |
| | 125 | */ |
| | 126 | const char TOK_MACRO_ARGCOUNT_FLAG = 0x06; |
| | 127 | |
| | 128 | /* |
| | 129 | * "#ifempty" and #ifnempty" marker flags |
| | 130 | */ |
| | 131 | const char TOK_MACRO_IFEMPTY_FLAG = 0x07; |
| | 132 | const char TOK_MACRO_IFNEMPTY_FLAG = 0x08; |
| | 133 | |
| | 134 | |
| | 135 | /* ------------------------------------------------------------------------ */ |
| | 136 | /* |
| | 137 | * #if state |
| | 138 | */ |
| | 139 | enum tok_if_t |
| | 140 | { |
| | 141 | TOKIF_NONE, /* not in a #if block at all */ |
| | 142 | TOKIF_IF_YES, /* processing a true #if branch */ |
| | 143 | TOKIF_IF_NO, /* processing a false #if branch */ |
| | 144 | TOKIF_IF_DONE, /* done with true #if/#elif; skip #elif's and #else */ |
| | 145 | TOKIF_ELSE_YES, /* processing a true #else branch */ |
| | 146 | TOKIF_ELSE_NO /* processing a false #else branch */ |
| | 147 | }; |
| | 148 | |
| | 149 | /* |
| | 150 | * #if stack entry |
| | 151 | */ |
| | 152 | struct tok_if_info_t |
| | 153 | { |
| | 154 | /* state */ |
| | 155 | tok_if_t state; |
| | 156 | |
| | 157 | /* file descriptor and line number of starting #if */ |
| | 158 | class CTcTokFileDesc *desc; |
| | 159 | long linenum; |
| | 160 | }; |
| | 161 | |
| | 162 | /* ------------------------------------------------------------------------ */ |
| | 163 | /* |
| | 164 | * Token Types |
| | 165 | */ |
| | 166 | |
| | 167 | enum tc_toktyp_t |
| | 168 | { |
| | 169 | TOKT_INVALID, /* invalid token */ |
| | 170 | TOKT_NULLTOK, /* null token - caller should read another token */ |
| | 171 | TOKT_EOF, /* end of file */ |
| | 172 | TOKT_MACRO_FORMAL, /* formal parameter replacement placeholder */ |
| | 173 | TOKT_MACRO_FOREACH, /* macro varargs #foreach placeholder */ |
| | 174 | TOKT_MACRO_ARGCOUNT, /* macro varargs #argcount placeholder */ |
| | 175 | TOKT_MACRO_IFEMPTY, /* #ifempty macro placeholder */ |
| | 176 | TOKT_MACRO_IFNEMPTY, /* #ifnempty macro placeholder */ |
| | 177 | TOKT_SYM, /* symbolic name */ |
| | 178 | TOKT_INT, /* integer */ |
| | 179 | TOKT_SSTR, /* single-quoted string */ |
| | 180 | TOKT_DSTR, /* double-quoted string */ |
| | 181 | TOKT_DSTR_START, /* start of a dstring with embedding - "...<< */ |
| | 182 | TOKT_DSTR_MID, /* middle of a dstring with embedding - >>...<< */ |
| | 183 | TOKT_DSTR_END, /* end of a dstring with embedding - >>..." */ |
| | 184 | TOKT_LPAR, /* left paren '(' */ |
| | 185 | TOKT_RPAR, /* right paren ')' */ |
| | 186 | TOKT_COMMA, /* comma ',' */ |
| | 187 | TOKT_DOT, /* period '.' */ |
| | 188 | TOKT_LBRACE, /* left brace '{' */ |
| | 189 | TOKT_RBRACE, /* right brace '}' */ |
| | 190 | TOKT_LBRACK, /* left square bracket '[' */ |
| | 191 | TOKT_RBRACK, /* right square bracket ']' */ |
| | 192 | TOKT_EQ, /* equals sign '=' */ |
| | 193 | TOKT_EQEQ, /* double-equals sign '==' */ |
| | 194 | TOKT_ASI, /* colon-equals assignment operator ':=' */ |
| | 195 | TOKT_PLUS, /* plus sign '+' */ |
| | 196 | TOKT_MINUS, /* minus sign '-' */ |
| | 197 | TOKT_TIMES, /* multiplication symbol '*' */ |
| | 198 | TOKT_DIV, /* division symbol '/' */ |
| | 199 | TOKT_MOD, /* modulo '%' */ |
| | 200 | TOKT_GT, /* greater-than sign '>' */ |
| | 201 | TOKT_LT, /* less-than sign '<' */ |
| | 202 | TOKT_GE, /* greater-or-equal sign '>=' */ |
| | 203 | TOKT_LE, /* less-or-equal sign '<=' */ |
| | 204 | TOKT_NE, /* not-equals sign '!=' or '<>' */ |
| | 205 | TOKT_ARROW, /* arrow symbol '->' */ |
| | 206 | TOKT_COLON, /* colon ':' */ |
| | 207 | TOKT_SEM, /* semicolon ';' */ |
| | 208 | TOKT_AND, /* bitwise AND '&' */ |
| | 209 | TOKT_ANDAND, /* logical AND '&&' */ |
| | 210 | TOKT_OR, /* bitwise OR '|' */ |
| | 211 | TOKT_OROR, /* logical OR '||' */ |
| | 212 | TOKT_XOR, /* bitwise XOR '^' */ |
| | 213 | TOKT_SHL, /* shift left '<<' */ |
| | 214 | TOKT_SHR, /* shift right '>>' */ |
| | 215 | TOKT_INC, /* increment '++' */ |
| | 216 | TOKT_DEC, /* decrement '--' */ |
| | 217 | TOKT_PLUSEQ, /* plus-equals '+=' */ |
| | 218 | TOKT_MINEQ, /* minus-equals '-=' */ |
| | 219 | TOKT_TIMESEQ, /* times-equals '*=' */ |
| | 220 | TOKT_DIVEQ, /* divide-equals '/=' */ |
| | 221 | TOKT_MODEQ, /* mod-equals '%=' */ |
| | 222 | TOKT_ANDEQ, /* and-equals '&=' */ |
| | 223 | TOKT_OREQ, /* or-equals '|=' */ |
| | 224 | TOKT_XOREQ, /* xor-equals '^=' */ |
| | 225 | TOKT_SHLEQ, /* shift-left-and-assign '<<=' */ |
| | 226 | TOKT_SHREQ, /* shift-right-and-assign '>>=' */ |
| | 227 | TOKT_NOT, /* logical not '!' */ |
| | 228 | TOKT_BNOT, /* bitwise not '~' */ |
| | 229 | TOKT_POUND, /* pound '#' */ |
| | 230 | TOKT_POUNDPOUND, /* double-pound '##' */ |
| | 231 | TOKT_POUNDAT, /* pound-at '#@' */ |
| | 232 | TOKT_ELLIPSIS, /* ellipsis '...' */ |
| | 233 | TOKT_QUESTION, /* question mark '?' */ |
| | 234 | TOKT_COLONCOLON, /* double-colon '::' */ |
| | 235 | TOKT_FLOAT, /* floating-point number */ |
| | 236 | TOKT_AT, /* at-sign */ |
| | 237 | |
| | 238 | /* keywords */ |
| | 239 | TOKT_SELF, |
| | 240 | TOKT_INHERITED, |
| | 241 | TOKT_ARGCOUNT, |
| | 242 | TOKT_IF, |
| | 243 | TOKT_ELSE, |
| | 244 | TOKT_FOR, |
| | 245 | TOKT_WHILE, |
| | 246 | TOKT_DO, |
| | 247 | TOKT_SWITCH, |
| | 248 | TOKT_CASE, |
| | 249 | TOKT_DEFAULT, |
| | 250 | TOKT_GOTO, |
| | 251 | TOKT_BREAK, |
| | 252 | TOKT_CONTINUE, |
| | 253 | TOKT_FUNCTION, |
| | 254 | TOKT_RETURN, |
| | 255 | TOKT_LOCAL, |
| | 256 | TOKT_OBJECT, |
| | 257 | TOKT_NIL, |
| | 258 | TOKT_TRUE, |
| | 259 | TOKT_PASS, |
| | 260 | TOKT_EXTERNAL, |
| | 261 | TOKT_EXTERN, |
| | 262 | TOKT_FORMATSTRING, |
| | 263 | TOKT_CLASS, |
| | 264 | TOKT_REPLACE, |
| | 265 | TOKT_MODIFY, |
| | 266 | TOKT_NEW, |
| | 267 | TOKT_DELETE, |
| | 268 | TOKT_THROW, |
| | 269 | TOKT_TRY, |
| | 270 | TOKT_CATCH, |
| | 271 | TOKT_FINALLY, |
| | 272 | TOKT_INTRINSIC, |
| | 273 | TOKT_DICTIONARY, |
| | 274 | TOKT_GRAMMAR, |
| | 275 | TOKT_ENUM, |
| | 276 | TOKT_TEMPLATE, |
| | 277 | TOKT_STATIC, |
| | 278 | TOKT_FOREACH, |
| | 279 | TOKT_EXPORT, |
| | 280 | TOKT_DELEGATED, |
| | 281 | TOKT_TARGETPROP, |
| | 282 | TOKT_PROPERTYSET, |
| | 283 | TOKT_TARGETOBJ, |
| | 284 | TOKT_DEFININGOBJ, |
| | 285 | TOKT_TRANSIENT, |
| | 286 | TOKT_REPLACED, |
| | 287 | TOKT_PROPERTY |
| | 288 | |
| | 289 | /* type names - formerly reserved but later withdrawn */ |
| | 290 | // TOKT_VOID, |
| | 291 | // TOKT_INTKW, |
| | 292 | // TOKT_STRING, |
| | 293 | // TOKT_LIST, |
| | 294 | // TOKT_BOOLEAN, |
| | 295 | // TOKT_ANY |
| | 296 | }; |
| | 297 | |
| | 298 | /* ------------------------------------------------------------------------ */ |
| | 299 | /* |
| | 300 | * Source Block. As we read the source file, we need to keep quoted |
| | 301 | * strings and symbol names around for later reference, in case they're |
| | 302 | * needed after reading more tokens and flushing the line buffer. We'll |
| | 303 | * copy needed text into our source blocks, which we keep in memory |
| | 304 | * throughout the compilation, so that we can be certain we can |
| | 305 | * reference these strings at any time. |
| | 306 | */ |
| | 307 | |
| | 308 | /* size of a source block */ |
| | 309 | const size_t TCTOK_SRC_BLOCK_SIZE = 50000; |
| | 310 | |
| | 311 | /* source block class */ |
| | 312 | class CTcTokSrcBlock |
| | 313 | { |
| | 314 | public: |
| | 315 | CTcTokSrcBlock() |
| | 316 | { |
| | 317 | /* no next block yet */ |
| | 318 | nxt_ = 0; |
| | 319 | } |
| | 320 | |
| | 321 | ~CTcTokSrcBlock() |
| | 322 | { |
| | 323 | /* delete the next block in line */ |
| | 324 | if (nxt_ != 0) |
| | 325 | delete nxt_; |
| | 326 | } |
| | 327 | |
| | 328 | /* get/set the next block */ |
| | 329 | CTcTokSrcBlock *get_next() const { return nxt_; } |
| | 330 | void set_next(CTcTokSrcBlock *blk) { nxt_ = blk; } |
| | 331 | |
| | 332 | /* get a pointer to the block's buffer */ |
| | 333 | char *get_buf() { return buf_; } |
| | 334 | |
| | 335 | private: |
| | 336 | /* the next block in the list */ |
| | 337 | CTcTokSrcBlock *nxt_; |
| | 338 | |
| | 339 | /* bytes of the list entry */ |
| | 340 | char buf_[TCTOK_SRC_BLOCK_SIZE]; |
| | 341 | }; |
| | 342 | |
| | 343 | |
| | 344 | /* ------------------------------------------------------------------------ */ |
| | 345 | /* |
| | 346 | * String Buffer. We use these buffers for reading input lines and |
| | 347 | * expanding macros. |
| | 348 | */ |
| | 349 | class CTcTokString |
| | 350 | { |
| | 351 | public: |
| | 352 | CTcTokString() |
| | 353 | { |
| | 354 | /* no buffer yet */ |
| | 355 | buf_ = 0; |
| | 356 | buf_len_ = 0; |
| | 357 | buf_size_ = 0; |
| | 358 | } |
| | 359 | |
| | 360 | virtual ~CTcTokString() |
| | 361 | { |
| | 362 | /* delete our buffer */ |
| | 363 | if (buf_ != 0) |
| | 364 | t3free(buf_); |
| | 365 | } |
| | 366 | |
| | 367 | /* ensure that a given amount of space if available */ |
| | 368 | virtual void ensure_space(size_t siz) |
| | 369 | { |
| | 370 | /* make sure there's room for the requested size plus a null byte */ |
| | 371 | if (buf_size_ < siz + 1) |
| | 372 | { |
| | 373 | /* increase to the next 4k increment */ |
| | 374 | buf_size_ = (siz + 4095 + 1) & ~4095; |
| | 375 | |
| | 376 | /* allocate or re-allocate the buffer */ |
| | 377 | if (buf_ == 0) |
| | 378 | buf_ = (char *)t3malloc(buf_size_); |
| | 379 | else |
| | 380 | buf_ = (char *)t3realloc(buf_, buf_size_); |
| | 381 | |
| | 382 | /* throw an error if that failed */ |
| | 383 | if (buf_ == 0) |
| | 384 | err_throw(TCERR_NO_STRBUF_MEM); |
| | 385 | } |
| | 386 | } |
| | 387 | |
| | 388 | /* expand the buffer */ |
| | 389 | void expand() |
| | 390 | { |
| | 391 | /* expand to the next 4k increment */ |
| | 392 | ensure_space(buf_size_ + 4096); |
| | 393 | } |
| | 394 | |
| | 395 | /* get the text and the length of the text */ |
| | 396 | const char *get_text() const { return buf_; } |
| | 397 | size_t get_text_len() const { return buf_len_; } |
| | 398 | |
| | 399 | /* get the end of the text */ |
| | 400 | const char *get_text_end() const { return buf_ + buf_len_; } |
| | 401 | |
| | 402 | /* append text to the buffer */ |
| | 403 | virtual void append(const char *p) { append(p, strlen(p)); } |
| | 404 | virtual void append(const char *p, size_t len) |
| | 405 | { |
| | 406 | /* make sure we have space available */ |
| | 407 | ensure_space(buf_len_ + len); |
| | 408 | |
| | 409 | /* copy the text onto the end of our buffer */ |
| | 410 | memcpy(buf_ + buf_len_, p, len); |
| | 411 | |
| | 412 | /* add it to the length of the text */ |
| | 413 | buf_len_ += len; |
| | 414 | |
| | 415 | /* null-terminte it */ |
| | 416 | buf_[buf_len_] = '\0'; |
| | 417 | } |
| | 418 | |
| | 419 | /* prepend text */ |
| | 420 | virtual void prepend(const char *p) { prepend(p, strlen(p)); } |
| | 421 | virtual void prepend(const char *p, size_t len) |
| | 422 | { |
| | 423 | /* make sure we have enough space */ |
| | 424 | ensure_space(buf_len_ + len); |
| | 425 | |
| | 426 | /* |
| | 427 | * move the existing text (including the null terminator) up in the |
| | 428 | * buffer to make room for the prepended text |
| | 429 | */ |
| | 430 | memmove(buf_ + len, buf_, buf_len_ + 1); |
| | 431 | |
| | 432 | /* copy the new text to the start of the buffer */ |
| | 433 | memcpy(buf_, p, len); |
| | 434 | |
| | 435 | /* count the new size */ |
| | 436 | buf_len_ += len; |
| | 437 | } |
| | 438 | |
| | 439 | /* |
| | 440 | * Append a string to the buffer, enclosing the text in single or |
| | 441 | * double quote (as given by 'qu', which must be either '"' or '\'') |
| | 442 | * and backslash-escaping any occurrences of the same quote character |
| | 443 | * found within the string. |
| | 444 | */ |
| | 445 | void append_qu(char qu, const char *p) { append_qu(qu, p, strlen(p)); } |
| | 446 | void append_qu(char qu, const char *p, size_t len) |
| | 447 | { |
| | 448 | const char *start; |
| | 449 | |
| | 450 | /* append the open quote */ |
| | 451 | append(&qu, 1); |
| | 452 | |
| | 453 | /* scan for quotes we'll need to escape */ |
| | 454 | while (len != 0) |
| | 455 | { |
| | 456 | size_t rem; |
| | 457 | |
| | 458 | /* skip to the next quote */ |
| | 459 | for (start = p, rem = len ; rem != 0 && *p != qu ; ++p, --rem) ; |
| | 460 | |
| | 461 | /* insert the chunk up to the quote */ |
| | 462 | if (p != start) |
| | 463 | append(start, p - start); |
| | 464 | |
| | 465 | /* if we did find a quote, append it with a backslash escape */ |
| | 466 | if (rem != 0) |
| | 467 | { |
| | 468 | /* append the backslash and the quote */ |
| | 469 | append("\\", 1); |
| | 470 | append(&qu, 1); |
| | 471 | |
| | 472 | /* skip the quote in the source */ |
| | 473 | ++p; |
| | 474 | --rem; |
| | 475 | } |
| | 476 | |
| | 477 | /* we now only have 'rem' left to consider */ |
| | 478 | len = rem; |
| | 479 | } |
| | 480 | |
| | 481 | /* finally, append the closing quote */ |
| | 482 | append(&qu, 1); |
| | 483 | } |
| | 484 | |
| | 485 | /* insert text into the buffer at the given offset */ |
| | 486 | virtual void insert(int ofs, const char *p, size_t len) |
| | 487 | { |
| | 488 | /* check to see if there's anything after the insertion point */ |
| | 489 | if ((size_t)ofs >= buf_len_) |
| | 490 | { |
| | 491 | /* |
| | 492 | * there's nothing after the insertion point, so this is simply |
| | 493 | * equivalent to 'append' - go do the append, and we're done |
| | 494 | */ |
| | 495 | append(p, len); |
| | 496 | return; |
| | 497 | } |
| | 498 | |
| | 499 | /* ensure there's space for the added text */ |
| | 500 | ensure_space(buf_len_ + len); |
| | 501 | |
| | 502 | /* |
| | 503 | * Move the existing text after the insertion point just far enough |
| | 504 | * to make room for the new text. Include the null terminator. |
| | 505 | */ |
| | 506 | memmove(buf_ + ofs + len, buf_ + ofs, buf_len_ - ofs + 1); |
| | 507 | |
| | 508 | /* copy the new text in at the given offset */ |
| | 509 | memcpy(buf_ + ofs, p, len); |
| | 510 | |
| | 511 | /* include the new text in our length */ |
| | 512 | buf_len_ += len; |
| | 513 | } |
| | 514 | |
| | 515 | /* copy text into the buffer, replacing existing text */ |
| | 516 | virtual void copy(const char *p, size_t len) |
| | 517 | { |
| | 518 | /* ensure we have enough space */ |
| | 519 | ensure_space(len); |
| | 520 | |
| | 521 | /* copy the text */ |
| | 522 | memcpy(buf_, p, len); |
| | 523 | |
| | 524 | /* set our length */ |
| | 525 | buf_len_ = len; |
| | 526 | |
| | 527 | /* null-terminate it */ |
| | 528 | buf_[buf_len_] = '\0'; |
| | 529 | } |
| | 530 | |
| | 531 | /* clear any existing text */ |
| | 532 | virtual void clear_text() |
| | 533 | { |
| | 534 | /* zero the length */ |
| | 535 | buf_len_ = 0; |
| | 536 | |
| | 537 | /* put a null terminator at the start of the buffer if possible */ |
| | 538 | if (buf_size_ > 0) |
| | 539 | buf_[0] = '\0'; |
| | 540 | } |
| | 541 | |
| | 542 | /* get the buffer, for copying text directly into it */ |
| | 543 | virtual char *get_buf() const { return buf_; } |
| | 544 | size_t get_buf_size() const { return buf_size_; } |
| | 545 | |
| | 546 | /* |
| | 547 | * Set the text length - use this after copying directly into the |
| | 548 | * buffer to set the length, excluding the null terminator. We'll |
| | 549 | * add a null terminator at the given length. |
| | 550 | */ |
| | 551 | virtual void set_text_len(size_t len) |
| | 552 | { |
| | 553 | /* set the new length */ |
| | 554 | buf_len_ = len; |
| | 555 | |
| | 556 | /* add a null terminator after the new length */ |
| | 557 | if (len < buf_size_) |
| | 558 | buf_[len] = '\0'; |
| | 559 | } |
| | 560 | |
| | 561 | protected: |
| | 562 | /* buffer */ |
| | 563 | char *buf_; |
| | 564 | |
| | 565 | /* size of the buffer */ |
| | 566 | size_t buf_size_; |
| | 567 | |
| | 568 | /* length of the text in the buffer (excluding trailing null) */ |
| | 569 | size_t buf_len_; |
| | 570 | }; |
| | 571 | |
| | 572 | |
| | 573 | /* |
| | 574 | * String buffer subclass for a non-allocated string that merely |
| | 575 | * references another buffer. This can be used anywhere a CTcString is |
| | 576 | * required, but does not require any allocation. |
| | 577 | * |
| | 578 | * These objects can only be used in 'const' contexts: the underlying |
| | 579 | * buffer cannot be changed or expanded, since we do not own the |
| | 580 | * underlying buffer. |
| | 581 | */ |
| | 582 | class CTcTokStringRef: public CTcTokString |
| | 583 | { |
| | 584 | public: |
| | 585 | CTcTokStringRef() |
| | 586 | { |
| | 587 | /* we have no referenced buffer yet */ |
| | 588 | buf_ = 0; |
| | 589 | buf_size_ = 0; |
| | 590 | buf_len_ = 0; |
| | 591 | } |
| | 592 | |
| | 593 | ~CTcTokStringRef() |
| | 594 | { |
| | 595 | /* we don't own the underlying buffer, so simply forget about it */ |
| | 596 | buf_ = 0; |
| | 597 | } |
| | 598 | |
| | 599 | /* we can't make any changes to the underlying buffer */ |
| | 600 | void ensure_space(size_t) { } |
| | 601 | void append(const char *) { assert(FALSE); } |
| | 602 | void append(const char *, size_t) { assert(FALSE); } |
| | 603 | void prepend(const char *) { assert(FALSE); } |
| | 604 | void prepend(const char *, size_t) { assert(FALSE); } |
| | 605 | void insert(int, const char *, size_t) { assert(FALSE); } |
| | 606 | void copy(const char *, size_t) { assert(FALSE); } |
| | 607 | void clear_text() { assert(FALSE); } |
| | 608 | char *get_buf() const { assert(FALSE); return 0; } |
| | 609 | void set_text_len(size_t) { assert(FALSE); } |
| | 610 | |
| | 611 | /* set my underlying buffer */ |
| | 612 | void set_buffer(const char *buf, size_t len) |
| | 613 | { |
| | 614 | buf_ = (char *)buf; |
| | 615 | buf_size_ = len + 1; |
| | 616 | buf_len_ = len; |
| | 617 | } |
| | 618 | }; |
| | 619 | |
| | 620 | /* ------------------------------------------------------------------------ */ |
| | 621 | /* |
| | 622 | * Token |
| | 623 | */ |
| | 624 | class CTcToken |
| | 625 | { |
| | 626 | public: |
| | 627 | /* get/set the token type */ |
| | 628 | tc_toktyp_t gettyp() const { return typ_; } |
| | 629 | void settyp(tc_toktyp_t typ) { typ_ = typ; } |
| | 630 | |
| | 631 | /* get/set the fully-expanded flag */ |
| | 632 | int get_fully_expanded() const { return fully_expanded_; } |
| | 633 | void set_fully_expanded(int flag) { fully_expanded_ = flag; } |
| | 634 | |
| | 635 | /* get/set the text pointer */ |
| | 636 | const char *get_text() const { return text_; } |
| | 637 | size_t get_text_len() const { return text_len_; } |
| | 638 | void set_text(const char *txt, size_t len) |
| | 639 | { |
| | 640 | text_ = txt; |
| | 641 | text_len_ = len; |
| | 642 | } |
| | 643 | |
| | 644 | /* get/set the integer value */ |
| | 645 | long get_int_val() const { return int_val_; } |
| | 646 | void set_int_val(long val) { typ_ = TOKT_INT; int_val_ = val; } |
| | 647 | |
| | 648 | /* |
| | 649 | * compare the text to the given string - returns true if the text |
| | 650 | * matches, false if not |
| | 651 | */ |
| | 652 | int text_matches(const char *txt, size_t len) const |
| | 653 | { |
| | 654 | return (len == text_len_ |
| | 655 | && memcmp(txt, text_, len) == 0); |
| | 656 | } |
| | 657 | |
| | 658 | private: |
| | 659 | /* token type */ |
| | 660 | tc_toktyp_t typ_; |
| | 661 | |
| | 662 | /* |
| | 663 | * Pointer to the token's text. This is a pointer into the |
| | 664 | * tokenizer's symbol table or into the token list itself, so this |
| | 665 | * pointer is valid as long as the tokenizer and its token list are |
| | 666 | * valid. |
| | 667 | */ |
| | 668 | const char *text_; |
| | 669 | size_t text_len_; |
| | 670 | |
| | 671 | /* integer value - valid when the token type is TOKT_INT */ |
| | 672 | long int_val_; |
| | 673 | |
| | 674 | /* |
| | 675 | * flag: the token has been fully expanded, and should not be |
| | 676 | * expanded further on any subsequent rescan for macros |
| | 677 | */ |
| | 678 | uint fully_expanded_ : 1; |
| | 679 | }; |
| | 680 | |
| | 681 | |
| | 682 | /* ------------------------------------------------------------------------ */ |
| | 683 | /* |
| | 684 | * Macro Expansion Resource object. This object is a collection of |
| | 685 | * resources that are needed for a macro expansion. To avoid frequent |
| | 686 | * allocating and freeing of these resources, we keep a pool of these |
| | 687 | * objects around so that we can re-use them as needed. We'll |
| | 688 | * dynamically expand the pool as necessary, so this doesn't impose any |
| | 689 | * pre-set limits; it simply avoids lots of memory allocation activity. |
| | 690 | */ |
| | 691 | class CTcMacroRsc |
| | 692 | { |
| | 693 | public: |
| | 694 | CTcMacroRsc() |
| | 695 | { |
| | 696 | /* we're not in any lists yet */ |
| | 697 | next_avail_ = 0; |
| | 698 | next_ = 0; |
| | 699 | } |
| | 700 | |
| | 701 | /* buffer for expansion of the whole line */ |
| | 702 | CTcTokString line_exp_; |
| | 703 | |
| | 704 | /* buffer for expansion of current macro on line */ |
| | 705 | CTcTokString macro_exp_; |
| | 706 | |
| | 707 | /* buffer for expansion of an actual parameter value */ |
| | 708 | CTcTokString actual_exp_buf_; |
| | 709 | |
| | 710 | /* next resource object in the "available" list */ |
| | 711 | CTcMacroRsc *next_avail_; |
| | 712 | |
| | 713 | /* next resource object in the master list */ |
| | 714 | CTcMacroRsc *next_; |
| | 715 | }; |
| | 716 | |
| | 717 | |
| | 718 | /* ------------------------------------------------------------------------ */ |
| | 719 | /* |
| | 720 | * Abstract token source interface. This is used to allow external code |
| | 721 | * to inject their own substreams into the main token stream. |
| | 722 | */ |
| | 723 | class CTcTokenSource |
| | 724 | { |
| | 725 | public: |
| | 726 | /* |
| | 727 | * Get the next token from the source. Returns null if there are no |
| | 728 | * more tokens. |
| | 729 | */ |
| | 730 | virtual const CTcToken *get_next_token() = 0; |
| | 731 | |
| | 732 | /* set the enclosing external token source and current token */ |
| | 733 | void set_enclosing_source(CTcTokenSource *src, const CTcToken *tok) |
| | 734 | { |
| | 735 | /* remember the enclosing source */ |
| | 736 | enclosing_src_ = src; |
| | 737 | |
| | 738 | /* remember the current token */ |
| | 739 | enclosing_curtok_ = *tok; |
| | 740 | } |
| | 741 | |
| | 742 | /* get the enclosing external token source */ |
| | 743 | CTcTokenSource *get_enclosing_source() const |
| | 744 | { return enclosing_src_; } |
| | 745 | |
| | 746 | /* get the token that was current when this source was inserted */ |
| | 747 | const CTcToken *get_enclosing_curtok() const |
| | 748 | { return &enclosing_curtok_; } |
| | 749 | |
| | 750 | protected: |
| | 751 | /* the enclosing external token source */ |
| | 752 | CTcTokenSource *enclosing_src_; |
| | 753 | |
| | 754 | /* |
| | 755 | * the current token in effect enclosing this source - this is the |
| | 756 | * token that comes immediately after the source's tokens, because a |
| | 757 | * source is inserted before the current token |
| | 758 | */ |
| | 759 | CTcToken enclosing_curtok_; |
| | 760 | }; |
| | 761 | |
| | 762 | |
| | 763 | /* ------------------------------------------------------------------------ */ |
| | 764 | /* |
| | 765 | * Tokenizer. This object reads a file and constructs a representation |
| | 766 | * of the file as a token list in memory. The tokenizer interprets |
| | 767 | * preprocessor directives and expands macros. |
| | 768 | */ |
| | 769 | class CTcTokenizer |
| | 770 | { |
| | 771 | public: |
| | 772 | /* |
| | 773 | * Create the tokenizer and start reading from the given file. The |
| | 774 | * default character set is generally specified by the user (on the |
| | 775 | * compiler command line, for example), or obtained from the |
| | 776 | * operating system. |
| | 777 | */ |
| | 778 | CTcTokenizer(class CResLoader *res_loader, const char *default_charset); |
| | 779 | |
| | 780 | /* destroy the tokenizer */ |
| | 781 | ~CTcTokenizer(); |
| | 782 | |
| | 783 | /* |
| | 784 | * Reset the tokenizer. Deletes the current source object and all |
| | 785 | * saved token text. This can be used after compilation of a unit |
| | 786 | * is completed and the intermediate parser state can be completely |
| | 787 | * discarded. |
| | 788 | */ |
| | 789 | void reset(); |
| | 790 | |
| | 791 | /* |
| | 792 | * Set the source file. 'src_filename' is the fully-resolved local |
| | 793 | * filename of the source file; 'orig_name' is the original name as |
| | 794 | * given on the command line, in the makefile, or wherever it came |
| | 795 | * from. We keep track of the original name so that we can pass |
| | 796 | * information to the debugger indicating the name as it was originally |
| | 797 | * given; this is more useful than the resolved filename, because we |
| | 798 | * might want to run the debugger on another machine with a different |
| | 799 | * local directory structure. |
| | 800 | */ |
| | 801 | int set_source(const char *src_filename, const char *orig_name); |
| | 802 | |
| | 803 | /* set the source to a memory buffer */ |
| | 804 | void set_source_buf(const char *buf); |
| | 805 | |
| | 806 | /* |
| | 807 | * Add a #include directory to the include path. We search the |
| | 808 | * include path in the order in which they were defined. |
| | 809 | */ |
| | 810 | void add_inc_path(const char *path); |
| | 811 | |
| | 812 | /* |
| | 813 | * Set preprocess-only mode. In this mode, we'll retain |
| | 814 | * preprocessor directives that will be needed if the preprocessed |
| | 815 | * result is itself compiled; for example, we'll retain #line, |
| | 816 | * #pragma C, #error, and #pragma message directives. |
| | 817 | */ |
| | 818 | void set_mode_pp_only(int flag) { pp_only_mode_ = flag; } |
| | 819 | |
| | 820 | /* |
| | 821 | * Set list-includes mode. In this mode, we'll simply scan source |
| | 822 | * files and write to the standard output a list of the names of all |
| | 823 | * of the #include files. |
| | 824 | */ |
| | 825 | void set_list_includes_mode(int flag) { list_includes_mode_ = flag; } |
| | 826 | |
| | 827 | /* |
| | 828 | * Get/set the test-report mode. In this mode, we'll expand __FILE__ |
| | 829 | * macros with the root name only. |
| | 830 | */ |
| | 831 | int get_test_report_mode() const { return test_report_mode_; } |
| | 832 | void set_test_report_mode(int flag) { test_report_mode_ = flag; } |
| | 833 | |
| | 834 | /* enable or disable preprocessing directives */ |
| | 835 | void enable_pp(int enable) { allow_pp_ = enable; } |
| | 836 | |
| | 837 | /* get the type of the current token */ |
| | 838 | tc_toktyp_t cur() const { return curtok_.gettyp(); } |
| | 839 | |
| | 840 | /* get the next token, reading a new line of source if necessary */ |
| | 841 | tc_toktyp_t next(); |
| | 842 | |
| | 843 | /* |
| | 844 | * Un-get the current token and back up to the previous token. The |
| | 845 | * maximum un-get depth is one token - after un-getting one token, |
| | 846 | * another token must not be un-gotten until after reading another |
| | 847 | * token. |
| | 848 | * |
| | 849 | * Tokens un-got with this routine are accessible only to next(), |
| | 850 | * not to any of the lower-level token readers. |
| | 851 | */ |
| | 852 | void unget(); |
| | 853 | |
| | 854 | /* get the current token */ |
| | 855 | const class CTcToken *getcur() const { return &curtok_; } |
| | 856 | |
| | 857 | /* |
| | 858 | * Copy the current token. This makes a copy of the token's text in |
| | 859 | * tokenizer source memory, to ensure that the reference to the text |
| | 860 | * buffer the caller is keeping will remain valid forever. |
| | 861 | */ |
| | 862 | const class CTcToken *copycur(); |
| | 863 | |
| | 864 | /* make a safely storable copy of a given token */ |
| | 865 | void copytok(class CTcToken *dst, const class CTcToken *src); |
| | 866 | |
| | 867 | /* check to see if the current token matches the given text */ |
| | 868 | int cur_tok_matches(const char *txt, size_t len); |
| | 869 | |
| | 870 | /* |
| | 871 | * Set an external token source. We'll read tokens from this source |
| | 872 | * until it is exhausted, at which point we'll revert to the enclosing |
| | 873 | * source. |
| | 874 | * |
| | 875 | * The new source is inserted before the current token, so the current |
| | 876 | * token will become current once again when this source is exhausted. |
| | 877 | * We'll automatically advance to the next token, which (unless we |
| | 878 | * have an ungotten token stashed) will go to the first token in the |
| | 879 | * new source. |
| | 880 | */ |
| | 881 | void set_external_source(CTcTokenSource *src) |
| | 882 | { |
| | 883 | /* |
| | 884 | * store the old source in the new source, so we can restore the |
| | 885 | * old source when we have exhausted the new source |
| | 886 | */ |
| | 887 | src->set_enclosing_source(ext_src_, &curtok_); |
| | 888 | |
| | 889 | /* set the new external source */ |
| | 890 | ext_src_ = src; |
| | 891 | |
| | 892 | /* skip to the next token */ |
| | 893 | next(); |
| | 894 | } |
| | 895 | |
| | 896 | /* clear all external sources, returning to the real token stream */ |
| | 897 | void clear_external_sources(); |
| | 898 | |
| | 899 | /* |
| | 900 | * assume that we should have found '>>' sequence after an embedded |
| | 901 | * expression in a string - used by parsers to resynchronize after |
| | 902 | * an apparent syntax error |
| | 903 | */ |
| | 904 | void assume_missing_dstr_cont(); |
| | 905 | |
| | 906 | /* define a macro */ |
| | 907 | void add_define(const char *sym, size_t len, const char *expansion, |
| | 908 | size_t expan_len); |
| | 909 | |
| | 910 | void add_define(const char *sym, const char *expansion, size_t expan_len) |
| | 911 | { add_define(sym, strlen(sym), expansion, expan_len); } |
| | 912 | |
| | 913 | void add_define(const char *sym, const char *expansion) |
| | 914 | { add_define(sym, strlen(sym), expansion, strlen(expansion)); } |
| | 915 | |
| | 916 | /* add a macro, given the symbol entry */ |
| | 917 | void add_define(class CTcHashEntryPp *entry); |
| | 918 | |
| | 919 | /* undefine a previously defined macro */ |
| | 920 | void undefine(const char *sym, size_t len); |
| | 921 | void undefine(const char *sym) { undefine(sym, strlen(sym)); } |
| | 922 | |
| | 923 | /* find a #define symbol */ |
| | 924 | class CTcHashEntryPp *find_define(const char *sym, size_t len) const; |
| | 925 | |
| | 926 | /* find an #undef symbol */ |
| | 927 | class CTcHashEntryPp *find_undef(const char *sym, size_t len) const; |
| | 928 | |
| | 929 | /* enumerate all of the #define symbols through a callback */ |
| | 930 | void enum_defines(void (*func)(void *ctx, class CTcHashEntryPp *entry), |
| | 931 | void *ctx); |
| | 932 | |
| | 933 | /* read the next line and handle preprocessor directives */ |
| | 934 | int read_line_pp(); |
| | 935 | |
| | 936 | /* get the file descriptor and line number of the last line read */ |
| | 937 | class CTcTokFileDesc *get_last_desc() const { return last_desc_; } |
| | 938 | long get_last_linenum() const { return last_linenum_; } |
| | 939 | void get_last_pos(class CTcTokFileDesc **desc, long *linenum) const |
| | 940 | { |
| | 941 | *desc = last_desc_; |
| | 942 | *linenum = last_linenum_; |
| | 943 | } |
| | 944 | |
| | 945 | /* |
| | 946 | * set the current file descriptor and line number -- this can be |
| | 947 | * used to force the line position to a previously-saved value |
| | 948 | * (during code generation, for example) for error-reporting and |
| | 949 | * debug-record purposes |
| | 950 | */ |
| | 951 | void set_line_info(class CTcTokFileDesc *desc, long linenum) |
| | 952 | { |
| | 953 | last_desc_ = desc; |
| | 954 | last_linenum_ = linenum; |
| | 955 | } |
| | 956 | |
| | 957 | /* |
| | 958 | * Parse a preprocessor constant expression. We always parse out of |
| | 959 | * the macro expansion buffer (expbuf_), but the caller must set p_ |
| | 960 | * to point to the starting point on the expansion line prior to |
| | 961 | * calling this routine. |
| | 962 | * |
| | 963 | * If 'read_first' is true, we'll read a token into curtok_ before |
| | 964 | * parsing; otherwise, we'll assume the caller has already primed |
| | 965 | * the pump by reading the first token. |
| | 966 | * |
| | 967 | * If 'last_on_line' is true, we'll flag an error if anything is |
| | 968 | * left on the line after we finish parsing the expression. |
| | 969 | * |
| | 970 | * If 'add_line_ending' is true, we'll add an end-of-line marker to |
| | 971 | * the expansion buffer, so that the tokenizer won't attempt to read |
| | 972 | * past the end of the line. Since a preprocessor expression must |
| | 973 | * be contained entirely on a single logical line, we must never try |
| | 974 | * to read past the end of the current line when parsing a |
| | 975 | * preprocessor expression. |
| | 976 | */ |
| | 977 | int pp_parse_expr(class CTcConstVal *result, |
| | 978 | int read_first, int last_on_line, int add_line_ending); |
| | 979 | |
| | 980 | /* log an error, optionally with parameters */ |
| | 981 | static void log_error(int errnum, ...); |
| | 982 | |
| | 983 | /* |
| | 984 | * log an error with the current token text as the parameter, |
| | 985 | * suitable for a "%.*s" format list entry (hence we'll provide two |
| | 986 | * parameters: an integer with the length of the token text, and a |
| | 987 | * pointer to the token text string) |
| | 988 | */ |
| | 989 | void log_error_curtok(int errnum); |
| | 990 | |
| | 991 | /* log a warning, optionally with parameters */ |
| | 992 | static void log_warning(int errnum, ...); |
| | 993 | |
| | 994 | /* log a warning with the current token as the parameter */ |
| | 995 | void log_warning_curtok(int errnum); |
| | 996 | |
| | 997 | /* log a warning or error for the current token */ |
| | 998 | void log_error_or_warning_curtok(tc_severity_t sev, int errnum); |
| | 999 | |
| | 1000 | /* log a warning or error for a given token */ |
| | 1001 | void log_error_or_warning_with_tok(tc_severity_t sev, int errnum, |
| | 1002 | const CTcToken *tok); |
| | 1003 | |
| | 1004 | /* |
| | 1005 | * log then throw a fatal error (this is different from an internal |
| | 1006 | * error in that it indicates an unrecoverable error in the input; |
| | 1007 | * an internal error indicates that something is wrong with the |
| | 1008 | * compiler itself) |
| | 1009 | */ |
| | 1010 | static void throw_fatal_error(int errnum, ...); |
| | 1011 | |
| | 1012 | /* |
| | 1013 | * log then throw an internal error (internal errors are always |
| | 1014 | * fatal: these indicate that something has gone wrong in the |
| | 1015 | * compiler, and are equivalent to an assert failure) |
| | 1016 | */ |
| | 1017 | static void throw_internal_error(int errnum, ...); |
| | 1018 | |
| | 1019 | /* display a string/number value */ |
| | 1020 | void msg_str(const char *str, size_t len) const; |
| | 1021 | void msg_long(long val) const; |
| | 1022 | |
| | 1023 | /* get the current line */ |
| | 1024 | const char *get_cur_line() const { return linebuf_.get_text(); } |
| | 1025 | size_t get_cur_line_len() const { return linebuf_.get_text_len(); } |
| | 1026 | |
| | 1027 | /* get the #define hash table */ |
| | 1028 | class CVmHashTable *get_defines_table() const { return defines_; } |
| | 1029 | |
| | 1030 | /* |
| | 1031 | * look up a token as a keyword; returns true and fills in 'kw' with |
| | 1032 | * the keyword token ID if the token is in fact a keyword, or |
| | 1033 | * returns false if it's not a keyword |
| | 1034 | */ |
| | 1035 | int look_up_keyword(const CTcToken *tok, tc_toktyp_t *kw); |
| | 1036 | |
| | 1037 | /* |
| | 1038 | * Get the next token on the line, filling in the token object. |
| | 1039 | * Advances the pointer to the character immediately following the |
| | 1040 | * token. |
| | 1041 | * |
| | 1042 | * If the token is a string, and the string contains backslash |
| | 1043 | * sequences, we'll modify the source string by translating each |
| | 1044 | * backslash sequences; for example, a "\n" sequence is changed into an |
| | 1045 | * ASCII 10. |
| | 1046 | * |
| | 1047 | * 'expanding' indicates whether or not we're in the initial macro |
| | 1048 | * expansion pass. If this is true, we'll suppress error messages |
| | 1049 | * during this pass, as we'll encounter the same tokens again when we |
| | 1050 | * parse the expanded form of the line. |
| | 1051 | */ |
| | 1052 | static tc_toktyp_t next_on_line(utf8_ptr *p, CTcToken *tok, |
| | 1053 | int *in_embedding, int expanding); |
| | 1054 | |
| | 1055 | /* |
| | 1056 | * Get the text of an operator token. Returns a pointer to a |
| | 1057 | * constant, static, null-terminated string, suitable for use in |
| | 1058 | * error messages. |
| | 1059 | */ |
| | 1060 | static const char *get_op_text(tc_toktyp_t op); |
| | 1061 | |
| | 1062 | /* |
| | 1063 | * Store text in the source list. Text stored here is available |
| | 1064 | * throughout compilation. This routine automatically reserves the |
| | 1065 | * space needed, so do not call 'reserve' or 'commit' separately. |
| | 1066 | */ |
| | 1067 | const char *store_source(const char *txt, size_t len); |
| | 1068 | |
| | 1069 | /* reserve space for text in the source list */ |
| | 1070 | void reserve_source(size_t len); |
| | 1071 | |
| | 1072 | /* |
| | 1073 | * Store a piece of text into pre-reserved space in the source list. |
| | 1074 | * This can be used to build up a string from several pieces. You must |
| | 1075 | * call 'reserve' first to allocate the space, and you must explicitly |
| | 1076 | * add a null terminator at the end of the string. Do not call |
| | 1077 | * 'commit'; this automatically commits the space as each substring is |
| | 1078 | * added. |
| | 1079 | */ |
| | 1080 | const char *store_source_partial(const char *txt, size_t len); |
| | 1081 | |
| | 1082 | /* |
| | 1083 | * Get the index of the next source file descriptor that will be |
| | 1084 | * created. The linker can use this information to fix up |
| | 1085 | * references to file descriptors in an object file when loading |
| | 1086 | * multiple object files. |
| | 1087 | */ |
| | 1088 | int get_next_filedesc_index() const { return next_filedesc_id_; } |
| | 1089 | |
| | 1090 | /* get the number of source file descriptors in the master list */ |
| | 1091 | int get_filedesc_count() const { return next_filedesc_id_; } |
| | 1092 | |
| | 1093 | /* get the file descriptor at the given (0-based) index */ |
| | 1094 | class CTcTokFileDesc *get_filedesc(size_t idx) const |
| | 1095 | { |
| | 1096 | /* return the array entry at the index, if the index is valid */ |
| | 1097 | return (idx < desc_list_cnt_ ? desc_list_[idx] : 0); |
| | 1098 | } |
| | 1099 | |
| | 1100 | /* get the head of the master source file descriptor list */ |
| | 1101 | class CTcTokFileDesc *get_first_filedesc() const { return desc_head_; } |
| | 1102 | |
| | 1103 | /* |
| | 1104 | * Create a new file descriptor and add it to the master list. This |
| | 1105 | * creates the new descriptor unconditionally, even if a descriptor |
| | 1106 | * for the same source file already exists. |
| | 1107 | */ |
| | 1108 | class CTcTokFileDesc *create_file_desc(const char *fname, size_t len) |
| | 1109 | { return get_file_desc(fname, len, TRUE, fname, len); } |
| | 1110 | |
| | 1111 | /* |
| | 1112 | * Set the string capture file. Once this is set, we'll write the |
| | 1113 | * contents of each string token that we encounter to this file, |
| | 1114 | * with a newline after each token. |
| | 1115 | */ |
| | 1116 | void set_string_capture(osfildef *fp); |
| | 1117 | |
| | 1118 | /* write macros to a file, for debugger use */ |
| | 1119 | void write_macros_to_file_for_debug(class CVmFile *fp); |
| | 1120 | |
| | 1121 | /* |
| | 1122 | * Load macros from a file. If any errors occur, we'll flag them |
| | 1123 | * through the error handler object and return a non-zero value. |
| | 1124 | * Returns zero on success. |
| | 1125 | */ |
| | 1126 | int load_macros_from_file(class CVmStream *fp, |
| | 1127 | class CTcTokLoadMacErr *err_handler); |
| | 1128 | |
| | 1129 | /* receive notification that the compiler is done with all parsing */ |
| | 1130 | void parsing_done() |
| | 1131 | { |
| | 1132 | /* forget any input file position */ |
| | 1133 | set_line_info(0, 0); |
| | 1134 | } |
| | 1135 | |
| | 1136 | /* |
| | 1137 | * Stuff text into the tokenizer source stream. The new text is |
| | 1138 | * inserted at the current read pointer, so that the next token we |
| | 1139 | * fetch will come from the start of the inserted text. If 'expand' is |
| | 1140 | * true, we'll expand macros in the text; if not, we'll insert the text |
| | 1141 | * exactly as is with no macro expansion. |
| | 1142 | */ |
| | 1143 | void stuff_text(const char *txt, size_t len, int expand); |
| | 1144 | |
| | 1145 | private: |
| | 1146 | /* skip whitespace and token markers */ |
| | 1147 | static void skip_ws_and_markers(utf8_ptr *p); |
| | 1148 | |
| | 1149 | /* |
| | 1150 | * get the next token on the line; if we go past the end of the |
| | 1151 | * string buffer, we'll return EOF |
| | 1152 | */ |
| | 1153 | static tc_toktyp_t next_on_line(const CTcTokString *srcbuf, utf8_ptr *p, |
| | 1154 | CTcToken *tok, int *in_embedding, |
| | 1155 | int expanding); |
| | 1156 | |
| | 1157 | /* |
| | 1158 | * get the next token on the current line, updating the internal |
| | 1159 | * character position pointer to point just past the token, and filling |
| | 1160 | * in the internal current token object with the toen data |
| | 1161 | */ |
| | 1162 | tc_toktyp_t next_on_line() |
| | 1163 | { return next_on_line(&p_, &curtok_, 0, FALSE); } |
| | 1164 | |
| | 1165 | /* get the next token on the line, with string translation */ |
| | 1166 | tc_toktyp_t next_on_line_xlat(int *in_embedding) |
| | 1167 | { return next_on_line_xlat(&p_, &curtok_, in_embedding); } |
| | 1168 | |
| | 1169 | /* |
| | 1170 | * get the next token, translating strings and storing string and |
| | 1171 | * symbol text in the source block list |
| | 1172 | */ |
| | 1173 | tc_toktyp_t next_on_line_xlat_keep(); |
| | 1174 | |
| | 1175 | /* |
| | 1176 | * get the next token on the line, translating strings to internal |
| | 1177 | * format |
| | 1178 | */ |
| | 1179 | tc_toktyp_t next_on_line_xlat(utf8_ptr *p, CTcToken *tok, |
| | 1180 | int *in_embedding); |
| | 1181 | |
| | 1182 | /* |
| | 1183 | * translate a string to internal format by converting escape |
| | 1184 | * sequences; overwrites the original buffer |
| | 1185 | */ |
| | 1186 | tc_toktyp_t xlat_string(utf8_ptr *p, CTcToken *tok, |
| | 1187 | int *in_embedding); |
| | 1188 | |
| | 1189 | /* |
| | 1190 | * translate a string into a given buffer; if 'force_embed_end' is |
| | 1191 | * true, we'll act as though we're continuing the string after the |
| | 1192 | * '>>' after an embedded expression, no matter what the actual |
| | 1193 | * input looks like |
| | 1194 | */ |
| | 1195 | tc_toktyp_t xlat_string_to(char *dst, utf8_ptr *p, CTcToken *tok, |
| | 1196 | int *in_embedding, int force_embed_end); |
| | 1197 | |
| | 1198 | /* |
| | 1199 | * Translate a string, saving the translated version in the source |
| | 1200 | * block list. If 'force_end_embed' is true, we'll act as though we |
| | 1201 | * were looking at '>>' (or, more precisely, we'll act as though |
| | 1202 | * '>>' immediately preceded the current input), regardless of what |
| | 1203 | * the actual input looks like. |
| | 1204 | */ |
| | 1205 | tc_toktyp_t xlat_string_to_src(int *in_embedding, int force_end_embed); |
| | 1206 | |
| | 1207 | /* initialize the source block list */ |
| | 1208 | void init_src_block_list(); |
| | 1209 | |
| | 1210 | /* delete current source file, including all including parents */ |
| | 1211 | void delete_source(); |
| | 1212 | |
| | 1213 | /* |
| | 1214 | * Read the next line; processes comments, but does not expand macros |
| | 1215 | * or parse preprocessor directives. This always reads into linebuf_; |
| | 1216 | * the return value is the offset within linebuf_ of the new text. A |
| | 1217 | * return value of -1 indicates that we're at end of file. |
| | 1218 | */ |
| | 1219 | int read_line(int append); |
| | 1220 | |
| | 1221 | /* |
| | 1222 | * Set the source read pointer to the start of a new line, given the |
| | 1223 | * CTcTokString object containing the buffer, and the offset within |
| | 1224 | * that buffer. |
| | 1225 | */ |
| | 1226 | void start_new_line(CTcTokString *str, int ofs) |
| | 1227 | { |
| | 1228 | /* remember the buffer we're reading out of */ |
| | 1229 | curbuf_ = str; |
| | 1230 | |
| | 1231 | /* set the read pointer to the start of the new line's text */ |
| | 1232 | p_.set((char *)str->get_text() + ofs); |
| | 1233 | } |
| | 1234 | |
| | 1235 | /* unsplice text from the current line and make it the next line */ |
| | 1236 | void unsplice_line(const char *new_line_start); |
| | 1237 | |
| | 1238 | /* |
| | 1239 | * Commit space in the source list - this is used when text is directly |
| | 1240 | * stored after reserving space. The size reserved may be greater than |
| | 1241 | * the size committed, because it is sometimes more efficient to make a |
| | 1242 | * guess that may overestimate the amount we actually end up needing. |
| | 1243 | */ |
| | 1244 | void commit_source(size_t len); |
| | 1245 | |
| | 1246 | /* parse a string */ |
| | 1247 | static tc_toktyp_t tokenize_string(utf8_ptr *p, CTcToken *tok, |
| | 1248 | int *in_embedding); |
| | 1249 | |
| | 1250 | /* process comments */ |
| | 1251 | void process_comments(size_t start_ofs); |
| | 1252 | |
| | 1253 | /* splice lines for a string that runs across multiple lines */ |
| | 1254 | void splice_string(); |
| | 1255 | |
| | 1256 | /* expand macros in the current line */ |
| | 1257 | int expand_macros_curline(int read_more, int allow_defined, |
| | 1258 | int append_to_expbuf); |
| | 1259 | |
| | 1260 | /* |
| | 1261 | * Expand the macros in the given text, filling in the given |
| | 1262 | * CTcTokString with the results. The expansion will clear out any |
| | 1263 | * existing text in the result buffer. Returns zero on success, or |
| | 1264 | * non-zero on error. |
| | 1265 | */ |
| | 1266 | int expand_macros(class CTcTokString *dest, const char *str, size_t len) |
| | 1267 | { |
| | 1268 | CTcTokStringRef srcbuf; |
| | 1269 | |
| | 1270 | /* set up a CTcTokString for the source */ |
| | 1271 | srcbuf.set_buffer(str, len); |
| | 1272 | |
| | 1273 | /* go expand macros */ |
| | 1274 | return expand_macros(&srcbuf, 0, dest, FALSE, FALSE, FALSE); |
| | 1275 | } |
| | 1276 | |
| | 1277 | /* expand all of the macros in the given text */ |
| | 1278 | int expand_macros(class CTcTokString *srcbuf, utf8_ptr *src, |
| | 1279 | class CTcTokString *expbuf, int read_more, |
| | 1280 | int allow_defined, int append); |
| | 1281 | |
| | 1282 | /* expand the macro at the current token on the current line */ |
| | 1283 | int expand_macro(class CTcMacroRsc *res, class CTcTokString *expbuf, |
| | 1284 | const class CTcTokString *srcbuf, utf8_ptr *src, |
| | 1285 | size_t macro_srcbuf_ofs, CTcHashEntryPp *entry, |
| | 1286 | int read_more, int allow_defined, int *expanded); |
| | 1287 | |
| | 1288 | /* |
| | 1289 | * Remove our special expansion flags from an expanded macro buffer. |
| | 1290 | * This can be called after all expansion has been completed to clean |
| | 1291 | * up the buffer for human consumption. |
| | 1292 | */ |
| | 1293 | void remove_expansion_flags(CTcTokString *buf); |
| | 1294 | |
| | 1295 | /* scan for a prior expansion of a macro within the current context */ |
| | 1296 | static int scan_for_prior_expansion(utf8_ptr src, const char *src_end, |
| | 1297 | const class CTcHashEntryPp *entry); |
| | 1298 | |
| | 1299 | /* remove end-of-macro-expansion flags from a buffer */ |
| | 1300 | static void remove_end_markers(class CTcTokString *buf); |
| | 1301 | |
| | 1302 | /* change a buffer to use individual token full-expansion markers */ |
| | 1303 | void mark_full_exp_tokens(CTcTokString *dstbuf, |
| | 1304 | const class CTcTokString *srcbuf, |
| | 1305 | int append) const; |
| | 1306 | |
| | 1307 | /* allocate a macro expansion resource */ |
| | 1308 | class CTcMacroRsc *alloc_macro_rsc(); |
| | 1309 | |
| | 1310 | /* release a macro expansion resource */ |
| | 1311 | void release_macro_rsc(class CTcMacroRsc *rsc); |
| | 1312 | |
| | 1313 | /* |
| | 1314 | * Parse the actual parameters to a macro. Fills in argofs[] and |
| | 1315 | * arglen[] with the offsets (from srcbuf->get_buf()) and lengths, |
| | 1316 | * respectively, of each actual parameter's text. |
| | 1317 | */ |
| | 1318 | int parse_macro_actuals(const class CTcTokString *srcbuf, utf8_ptr *src, |
| | 1319 | const CTcHashEntryPp *macro_entry, |
| | 1320 | size_t argofs[TOK_MAX_MACRO_ARGS], |
| | 1321 | size_t arglen[TOK_MAX_MACRO_ARGS], |
| | 1322 | int read_more, int *found_actuals); |
| | 1323 | |
| | 1324 | /* splice the next line for reading more macro actuals */ |
| | 1325 | tc_toktyp_t actual_splice_next_line(const CTcTokString *srcbuf, |
| | 1326 | utf8_ptr *src, CTcToken *tok); |
| | 1327 | |
| | 1328 | /* substitute the actual parameters in a macro's expansion */ |
| | 1329 | int substitute_macro_actuals(class CTcMacroRsc *rsc, |
| | 1330 | class CTcTokString *subexp, |
| | 1331 | CTcHashEntryPp *macro_entry, |
| | 1332 | const class CTcTokString *srcbuf, |
| | 1333 | const size_t *argofs, const size_t *arglen, |
| | 1334 | int allow_defined); |
| | 1335 | |
| | 1336 | /* stringize a macro actual parameter into an expansion buffer */ |
| | 1337 | void stringize_macro_actual(class CTcTokString *expbuf, |
| | 1338 | const char *actual_val, size_t actual_len, |
| | 1339 | char quote_char, int add_open_quote, |
| | 1340 | int add_close_quote); |
| | 1341 | |
| | 1342 | /* skip a delimited macro expansion area (#foreach, #ifempty, etc) */ |
| | 1343 | void skip_delimited_group(utf8_ptr *p, int parts_to_skip); |
| | 1344 | |
| | 1345 | /* expand a defined() preprocessor operator */ |
| | 1346 | int expand_defined(class CTcTokString *subexp, |
| | 1347 | const class CTcTokString *srcbuf, utf8_ptr *src); |
| | 1348 | |
| | 1349 | /* add a file to the list of files to be included only once */ |
| | 1350 | void add_include_once(const char *fname); |
| | 1351 | |
| | 1352 | /* find a file in the list of files to be included only once */ |
| | 1353 | int find_include_once(const char *fname); |
| | 1354 | |
| | 1355 | /* process a #pragma directive */ |
| | 1356 | void pp_pragma(); |
| | 1357 | |
| | 1358 | /* process a #charset directive */ |
| | 1359 | void pp_charset(); |
| | 1360 | |
| | 1361 | /* process a #include directive */ |
| | 1362 | void pp_include(); |
| | 1363 | |
| | 1364 | /* process a #define directive */ |
| | 1365 | void pp_define(); |
| | 1366 | |
| | 1367 | /* process a #if directive */ |
| | 1368 | void pp_if(); |
| | 1369 | |
| | 1370 | /* process a #ifdef directive */ |
| | 1371 | void pp_ifdef(); |
| | 1372 | |
| | 1373 | /* process a #ifdef directive */ |
| | 1374 | void pp_ifndef(); |
| | 1375 | |
| | 1376 | /* process a #ifdef or #ifndef */ |
| | 1377 | void pp_ifdef_or_ifndef(int sense); |
| | 1378 | |
| | 1379 | /* process a #else directive */ |
| | 1380 | void pp_else(); |
| | 1381 | |
| | 1382 | /* process a #elif directive */ |
| | 1383 | void pp_elif(); |
| | 1384 | |
| | 1385 | /* process a #endif directive */ |
| | 1386 | void pp_endif(); |
| | 1387 | |
| | 1388 | /* process a #error directive */ |
| | 1389 | void pp_error(); |
| | 1390 | |
| | 1391 | /* process a #undef directive */ |
| | 1392 | void pp_undef(); |
| | 1393 | |
| | 1394 | /* process a #line directive */ |
| | 1395 | void pp_line(); |
| | 1396 | |
| | 1397 | /* get a lone identifier for a preprocessor directive */ |
| | 1398 | int pp_get_lone_ident(char *buf, size_t bufl); |
| | 1399 | |
| | 1400 | /* process a #pragma C directive */ |
| | 1401 | // void pragma_c(); - not currently used |
| | 1402 | |
| | 1403 | /* process a #pragma once directive */ |
| | 1404 | void pragma_once(); |
| | 1405 | |
| | 1406 | /* process a #pragma all_once directive */ |
| | 1407 | void pragma_all_once(); |
| | 1408 | |
| | 1409 | /* process a #pragma message directive */ |
| | 1410 | void pragma_message(); |
| | 1411 | |
| | 1412 | /* process a #pragma newline_spacing(on/off) directive */ |
| | 1413 | void pragma_newline_spacing(); |
| | 1414 | |
| | 1415 | /* process a #pragma sourceTextGroup directive */ |
| | 1416 | void pragma_source_text_group(); |
| | 1417 | |
| | 1418 | /* |
| | 1419 | * Determine if we're in a false #if branch. If we're inside a #if |
| | 1420 | * block, and the state is either IF_NO, IF_DONE, or ELSE_NO, or |
| | 1421 | * we're inside a #if nested within any negative branch, we're in a |
| | 1422 | * not-taken branch of a #if block. |
| | 1423 | */ |
| | 1424 | int in_false_if() const |
| | 1425 | { |
| | 1426 | return (if_sp_ != 0 |
| | 1427 | && (if_false_level_ != 0 |
| | 1428 | || if_stack_[if_sp_ - 1].state == TOKIF_IF_NO |
| | 1429 | || if_stack_[if_sp_ - 1].state == TOKIF_IF_DONE |
| | 1430 | || if_stack_[if_sp_ - 1].state == TOKIF_ELSE_NO)); |
| | 1431 | } |
| | 1432 | |
| | 1433 | /* push a new #if level with the given state */ |
| | 1434 | void push_if(tok_if_t state); |
| | 1435 | |
| | 1436 | /* get the current #if state */ |
| | 1437 | tok_if_t get_if_state() const |
| | 1438 | { |
| | 1439 | if (if_sp_ == 0) |
| | 1440 | return TOKIF_NONE; |
| | 1441 | else |
| | 1442 | return if_stack_[if_sp_ - 1].state; |
| | 1443 | } |
| | 1444 | |
| | 1445 | /* switch the current #if level to the given state */ |
| | 1446 | void change_if_state(tok_if_t state) |
| | 1447 | { |
| | 1448 | if (if_sp_ != 0) |
| | 1449 | if_stack_[if_sp_ - 1].state = state; |
| | 1450 | } |
| | 1451 | |
| | 1452 | /* pop the current #if level */ |
| | 1453 | void pop_if(); |
| | 1454 | |
| | 1455 | /* |
| | 1456 | * Find or create a descriptor for the given filename. 'fname' is |
| | 1457 | * the full file system path specifying the file. 'orig_fname' is |
| | 1458 | * the filename as originally specified by the user, if different; |
| | 1459 | * in the case of #include files, this indicates the name that was |
| | 1460 | * specified in the directive itself, whereas 'fname' is the actual |
| | 1461 | * filename that resulted from searching the include path for the |
| | 1462 | * given name. |
| | 1463 | */ |
| | 1464 | class CTcTokFileDesc *get_file_desc(const char *fname, size_t fname_len, |
| | 1465 | int always_create, |
| | 1466 | const char *orig_fname, |
| | 1467 | size_t orig_fname_len); |
| | 1468 | |
| | 1469 | /* clear the line buffer */ |
| | 1470 | void clear_linebuf(); |
| | 1471 | |
| | 1472 | /* flag: ALL_ONCE mode - we include each file only once */ |
| | 1473 | int all_once_ : 1; |
| | 1474 | |
| | 1475 | /* flag: warn on ignoring a redundant #include file */ |
| | 1476 | int warn_on_ignore_incl_ : 1; |
| | 1477 | |
| | 1478 | /* |
| | 1479 | * Flag: in preprocess-only mode. In this mode, we'll leave certain |
| | 1480 | * preprocessor directives intact in the source, since they'll be |
| | 1481 | * needed in a subsequent compilation of the preprocessed source. |
| | 1482 | * For example, we'll leave #line directives, #pragma C, #error, and |
| | 1483 | * #pragma message directives in the preprocessed result. |
| | 1484 | */ |
| | 1485 | int pp_only_mode_ : 1; |
| | 1486 | |
| | 1487 | /* |
| | 1488 | * Flag: in test reporting mode. In this mode, we'll expand __FILE__ |
| | 1489 | * macros with the root name only. |
| | 1490 | */ |
| | 1491 | int test_report_mode_ : 1; |
| | 1492 | |
| | 1493 | /* |
| | 1494 | * Flag: in preprocess-for-includes mode. In this mode, we'll do |
| | 1495 | * nothing except run the preprocessor and generate a list of the |
| | 1496 | * header files that are included, along with header files they |
| | 1497 | * include, and so on. |
| | 1498 | */ |
| | 1499 | int list_includes_mode_ : 1; |
| | 1500 | |
| | 1501 | /* |
| | 1502 | * Flag: treat newlines in strings as whitespace. When this is true, |
| | 1503 | * whenever we find a newline character in a string, we'll convert the |
| | 1504 | * newline and all leading whitespace on the next line to a single |
| | 1505 | * space character. When this is false, we'll entirely strip out each |
| | 1506 | * newline in a string and all whitespace that immediately follows; |
| | 1507 | * this mode is desirable for some languages, such as Chinese, where |
| | 1508 | * whitespace is not conventionally used as a token separator in |
| | 1509 | * ordinary text. |
| | 1510 | */ |
| | 1511 | int string_newline_spacing_ : 1; |
| | 1512 | |
| | 1513 | /* |
| | 1514 | * flag: we're parsing a preprocessor constant expression (for a |
| | 1515 | * #if, for example; this doesn't apply to simple macro expansion) |
| | 1516 | */ |
| | 1517 | int in_pp_expr_ : 1; |
| | 1518 | |
| | 1519 | /* resource loader */ |
| | 1520 | class CResLoader *res_loader_; |
| | 1521 | |
| | 1522 | /* |
| | 1523 | * name of our default character set - this is generally specified |
| | 1524 | * by the user (on the compiler command line, for example), or |
| | 1525 | * obtained from the operating system |
| | 1526 | */ |
| | 1527 | char *default_charset_; |
| | 1528 | |
| | 1529 | /* input (to unicode) character mapper for the default character set */ |
| | 1530 | class CCharmapToUni *default_mapper_; |
| | 1531 | |
| | 1532 | /* head of list of previously-included files */ |
| | 1533 | struct tctok_incfile_t *prev_includes_; |
| | 1534 | |
| | 1535 | /* head and tail of include path list */ |
| | 1536 | struct tctok_incpath_t *incpath_head_; |
| | 1537 | struct tctok_incpath_t *incpath_tail_; |
| | 1538 | |
| | 1539 | /* file descriptor and line number of last line read */ |
| | 1540 | class CTcTokFileDesc *last_desc_; |
| | 1541 | long last_linenum_; |
| | 1542 | |
| | 1543 | /* file descriptor and line number of last line appended */ |
| | 1544 | class CTcTokFileDesc *appended_desc_; |
| | 1545 | long appended_linenum_; |
| | 1546 | |
| | 1547 | /* current input stream */ |
| | 1548 | class CTcTokStream *str_; |
| | 1549 | |
| | 1550 | /* master list of file descriptors */ |
| | 1551 | class CTcTokFileDesc *desc_head_; |
| | 1552 | class CTcTokFileDesc *desc_tail_; |
| | 1553 | |
| | 1554 | /* |
| | 1555 | * array of file descriptors (we keep the list in both an array and |
| | 1556 | * a linked list, since we need both sequential and indexed access; |
| | 1557 | * this isn't a lot of trouble since we never need to remove an |
| | 1558 | * entry from the list) |
| | 1559 | */ |
| | 1560 | class CTcTokFileDesc **desc_list_; |
| | 1561 | |
| | 1562 | /* number of entries in desc_list_ */ |
| | 1563 | size_t desc_list_cnt_; |
| | 1564 | |
| | 1565 | /* number of slots allocated in desc_list_ array */ |
| | 1566 | size_t desc_list_alo_; |
| | 1567 | |
| | 1568 | /* next file descriptor ID to be assigned */ |
| | 1569 | int next_filedesc_id_; |
| | 1570 | |
| | 1571 | /* pointer to current position in current line */ |
| | 1572 | utf8_ptr p_; |
| | 1573 | |
| | 1574 | /* |
| | 1575 | * The CTcTokString object containing the current line. This is the |
| | 1576 | * buffer object we're currently reading from, and will be either |
| | 1577 | * linebuf_ or expbuf_. p_ always points into this buffer. |
| | 1578 | */ |
| | 1579 | CTcTokString *curbuf_; |
| | 1580 | |
| | 1581 | /* raw file input buffer */ |
| | 1582 | CTcTokString linebuf_; |
| | 1583 | |
| | 1584 | /* |
| | 1585 | * unsplice buffer - we'll put any unspliced text into this buffer, |
| | 1586 | * then read it back at the next read_line() |
| | 1587 | */ |
| | 1588 | CTcTokString unsplicebuf_; |
| | 1589 | |
| | 1590 | /* macro expansion buffer */ |
| | 1591 | CTcTokString expbuf_; |
| | 1592 | |
| | 1593 | /* |
| | 1594 | * Flag: in a string. If this is '\0', we're not in a string; |
| | 1595 | * otherwise, this is the quote character that ends the string. |
| | 1596 | */ |
| | 1597 | wchar_t in_quote_; |
| | 1598 | |
| | 1599 | /* flag: in an embedded expression during line processing */ |
| | 1600 | uint comment_in_embedding_ : 1; |
| | 1601 | |
| | 1602 | /* flag: macro processing token stream is in an embedded expression */ |
| | 1603 | int macro_in_embedding_; |
| | 1604 | |
| | 1605 | /* flag: main token stream is in an embedded expression */ |
| | 1606 | int main_in_embedding_; |
| | 1607 | |
| | 1608 | /* |
| | 1609 | * #if state stack. if_sp_ is the index of the next nesting slot; |
| | 1610 | * if if_sp_ is zero, it means that we're not in a #if at all. |
| | 1611 | * |
| | 1612 | * Separately, the if_false_level_ is the level of #if's contained |
| | 1613 | * within a false #if branch. This is separate because, once we're |
| | 1614 | * in a false #if branch, everything within it is false. |
| | 1615 | */ |
| | 1616 | int if_sp_; |
| | 1617 | tok_if_info_t if_stack_[TOK_MAX_IF_NESTING]; |
| | 1618 | int if_false_level_; |
| | 1619 | |
| | 1620 | /* source block list head */ |
| | 1621 | CTcTokSrcBlock *src_head_; |
| | 1622 | |
| | 1623 | /* current (and last) source block */ |
| | 1624 | CTcTokSrcBlock *src_cur_; |
| | 1625 | |
| | 1626 | /* pointer to next available byte in the current source block */ |
| | 1627 | char *src_ptr_; |
| | 1628 | |
| | 1629 | /* number of bytes remaining in the current source block */ |
| | 1630 | size_t src_rem_; |
| | 1631 | |
| | 1632 | /* current token */ |
| | 1633 | CTcToken curtok_; |
| | 1634 | |
| | 1635 | /* previous token (for unget) */ |
| | 1636 | CTcToken prvtok_; |
| | 1637 | |
| | 1638 | /* |
| | 1639 | * next token, if a token has been un-gotten, and a flag indicating |
| | 1640 | * that this is indeed the case. |
| | 1641 | */ |
| | 1642 | CTcToken nxttok_; |
| | 1643 | unsigned int nxttok_valid_ : 1; |
| | 1644 | |
| | 1645 | /* the external token source, if any */ |
| | 1646 | CTcTokenSource *ext_src_; |
| | 1647 | |
| | 1648 | /* symbol table for #define symbols */ |
| | 1649 | class CVmHashTable *defines_; |
| | 1650 | |
| | 1651 | /* |
| | 1652 | * symbol table for symbols explicitly undefined; we keep track of |
| | 1653 | * these so that we can exclude anything ever undefined from the debug |
| | 1654 | * macro records, since only static global macros can be handled in the |
| | 1655 | * debug records |
| | 1656 | */ |
| | 1657 | class CVmHashTable *undefs_; |
| | 1658 | |
| | 1659 | /* symbol table for TADS keywords */ |
| | 1660 | class CVmHashTable *kw_; |
| | 1661 | |
| | 1662 | /* head of macro resource pool list */ |
| | 1663 | class CTcMacroRsc *macro_res_head_; |
| | 1664 | |
| | 1665 | /* head of list of available macro resources */ |
| | 1666 | class CTcMacroRsc *macro_res_avail_; |
| | 1667 | |
| | 1668 | /* |
| | 1669 | * string capture file - if this is non-null, we'll capture all of |
| | 1670 | * the strings we read to this file, one string per line |
| | 1671 | */ |
| | 1672 | osfildef *string_fp_; |
| | 1673 | |
| | 1674 | /* character mapper for writing to the string capture file */ |
| | 1675 | class CCharmapToLocal *string_fp_map_; |
| | 1676 | |
| | 1677 | /* true -> allow preprocessor directives */ |
| | 1678 | unsigned int allow_pp_; |
| | 1679 | }; |
| | 1680 | |
| | 1681 | /* ------------------------------------------------------------------------ */ |
| | 1682 | /* |
| | 1683 | * Error handler interface. Callers of load_macros_from_file() in |
| | 1684 | * CTcTokenizer must provide an implementation of this interface to handle |
| | 1685 | * errors that occur while loading macros. |
| | 1686 | */ |
| | 1687 | class CTcTokLoadMacErr |
| | 1688 | { |
| | 1689 | public: |
| | 1690 | /* |
| | 1691 | * Flag an error. The error codes are taken from the following list: |
| | 1692 | * |
| | 1693 | * 1 - a macro name symbol in the file is too long (it exceeds the |
| | 1694 | * maximum symbol length for the preprocessor) |
| | 1695 | * |
| | 1696 | * 2 - a formal parameter name is too long |
| | 1697 | */ |
| | 1698 | virtual void log_error(int err) = 0; |
| | 1699 | }; |
| | 1700 | |
| | 1701 | /* ------------------------------------------------------------------------ */ |
| | 1702 | /* |
| | 1703 | * Tokenizer File Descriptor. Each unique source file has a separate |
| | 1704 | * file descriptor, which keeps track of the file's name. |
| | 1705 | */ |
| | 1706 | class CTcTokFileDesc |
| | 1707 | { |
| | 1708 | public: |
| | 1709 | /* create a file descriptor */ |
| | 1710 | CTcTokFileDesc(const char *fname, size_t fname_len, int index, |
| | 1711 | CTcTokFileDesc *orig_desc, |
| | 1712 | const char *orig_fname, size_t orig_fname_len); |
| | 1713 | |
| | 1714 | /* delete the descriptor */ |
| | 1715 | ~CTcTokFileDesc(); |
| | 1716 | |
| | 1717 | /* get the filename */ |
| | 1718 | const char *get_fname() const { return fname_; } |
| | 1719 | |
| | 1720 | /* get the original filename string */ |
| | 1721 | const char *get_orig_fname() const { return orig_fname_; } |
| | 1722 | |
| | 1723 | /* |
| | 1724 | * get the filename as a double-quoted string (backslashes and |
| | 1725 | * double-quotes will be escaped with backslashes) |
| | 1726 | */ |
| | 1727 | const char *get_dquoted_fname() const { return dquoted_fname_; } |
| | 1728 | |
| | 1729 | /* |
| | 1730 | * get the root filename (i.e., with no path prefix) as a |
| | 1731 | * double-quoted string |
| | 1732 | */ |
| | 1733 | const char *get_dquoted_rootname() const { return dquoted_rootname_; } |
| | 1734 | |
| | 1735 | /* get the filename as a single-quoted string */ |
| | 1736 | const char *get_squoted_fname() const { return squoted_fname_; } |
| | 1737 | |
| | 1738 | /* get the root filename as a single-quoted string */ |
| | 1739 | const char *get_squoted_rootname() const { return squoted_rootname_; } |
| | 1740 | |
| | 1741 | /* get/set the next file descriptor in the descriptor chain */ |
| | 1742 | CTcTokFileDesc *get_next() const { return next_; } |
| | 1743 | void set_next(CTcTokFileDesc *nxt) { next_ = nxt; } |
| | 1744 | |
| | 1745 | /* get my index in the master list */ |
| | 1746 | int get_index() const { return index_; } |
| | 1747 | |
| | 1748 | /* get the original descriptor for this file in the list */ |
| | 1749 | CTcTokFileDesc *get_orig() const { return orig_; } |
| | 1750 | |
| | 1751 | /* |
| | 1752 | * get the list index of the original entry (returns my own list |
| | 1753 | * index if I am the original entry) |
| | 1754 | */ |
| | 1755 | int get_orig_index() const |
| | 1756 | { return orig_ == 0 ? index_ : orig_->get_index(); } |
| | 1757 | |
| | 1758 | /* |
| | 1759 | * Add a source line position to our list. We keep an index of the |
| | 1760 | * byte-code address for each executable source line, so that |
| | 1761 | * debuggers can find the compiled code corresponding to a source |
| | 1762 | * location. The image builder gives us this information during the |
| | 1763 | * linking process. The address is the absolute location in the |
| | 1764 | * image file of the executable code for the given source line (the |
| | 1765 | * first line in the file is numbered 1). |
| | 1766 | */ |
| | 1767 | void add_source_line(ulong linenum, ulong line_addr); |
| | 1768 | |
| | 1769 | /* |
| | 1770 | * Enumerate the source lines, calling the callback for each one. |
| | 1771 | * We will only enumerate source lines which actually have an |
| | 1772 | * associated code location - source lines that generated no |
| | 1773 | * executable code are skipped. We'll enumerate the lines in |
| | 1774 | * ascending order of line number, and each line number will appear |
| | 1775 | * only once. |
| | 1776 | */ |
| | 1777 | void enum_source_lines(void (*cbfunc)(void *ctx, ulong linenum, |
| | 1778 | ulong byte_code_addr), |
| | 1779 | void *cbctx); |
| | 1780 | |
| | 1781 | private: |
| | 1782 | /* index in the master list */ |
| | 1783 | int index_; |
| | 1784 | |
| | 1785 | /* filename string - this is the actual file system filename */ |
| | 1786 | char *fname_; |
| | 1787 | |
| | 1788 | /* |
| | 1789 | * original filename string, if different from fname_ - this is the |
| | 1790 | * filename as specified by the user, before it was adjusted with |
| | 1791 | * include paths or other extra location information |
| | 1792 | */ |
| | 1793 | char *orig_fname_; |
| | 1794 | |
| | 1795 | /* double-quoted version of the filename */ |
| | 1796 | char *dquoted_fname_; |
| | 1797 | |
| | 1798 | /* single-quoted version of the filename */ |
| | 1799 | char *squoted_fname_; |
| | 1800 | |
| | 1801 | /* single-quoted version of the root filename */ |
| | 1802 | char *squoted_rootname_; |
| | 1803 | |
| | 1804 | /* double-quoted version of the root filename */ |
| | 1805 | char *dquoted_rootname_; |
| | 1806 | |
| | 1807 | /* next descriptor in the master descriptor list */ |
| | 1808 | CTcTokFileDesc *next_; |
| | 1809 | |
| | 1810 | /* |
| | 1811 | * The original file descriptor with the same filename. If we |
| | 1812 | * create multiple descriptors for the same filename (because, for |
| | 1813 | * example, the same header is included in several different object |
| | 1814 | * files), we'll keep track of the original descriptor for the file |
| | 1815 | * in all of the copies. |
| | 1816 | */ |
| | 1817 | CTcTokFileDesc *orig_; |
| | 1818 | |
| | 1819 | /* source line pages */ |
| | 1820 | struct CTcTokSrcPage **src_pages_; |
| | 1821 | |
| | 1822 | /* number of source line page slots allocated */ |
| | 1823 | size_t src_pages_alo_; |
| | 1824 | }; |
| | 1825 | |
| | 1826 | |
| | 1827 | /* ------------------------------------------------------------------------ */ |
| | 1828 | /* |
| | 1829 | * Tokenizer Input Stream |
| | 1830 | */ |
| | 1831 | class CTcTokStream |
| | 1832 | { |
| | 1833 | public: |
| | 1834 | /* create a token stream */ |
| | 1835 | CTcTokStream(class CTcTokFileDesc *desc, class CTcSrcObject *src, |
| | 1836 | CTcTokStream *parent, int charset_error, |
| | 1837 | int init_if_level); |
| | 1838 | |
| | 1839 | /* delete the stream */ |
| | 1840 | ~CTcTokStream(); |
| | 1841 | |
| | 1842 | /* get/set the associated file descriptor */ |
| | 1843 | class CTcTokFileDesc *get_desc() const { return desc_; } |
| | 1844 | void set_desc(class CTcTokFileDesc *desc) { desc_ = desc; } |
| | 1845 | |
| | 1846 | /* get the underlying source file */ |
| | 1847 | class CTcSrcObject *get_src() const { return src_; } |
| | 1848 | |
| | 1849 | /* get the line number of the next line to be read */ |
| | 1850 | long get_next_linenum() const { return next_linenum_; } |
| | 1851 | |
| | 1852 | /* set the next line number */ |
| | 1853 | void set_next_linenum(long l) { next_linenum_ = l; } |
| | 1854 | |
| | 1855 | /* get the enclosing stream */ |
| | 1856 | CTcTokStream *get_parent() const { return parent_; } |
| | 1857 | |
| | 1858 | /* count having read a line */ |
| | 1859 | void count_line() { ++next_linenum_; } |
| | 1860 | |
| | 1861 | /* was there a #charset error when opening the file? */ |
| | 1862 | int get_charset_error() const { return charset_error_; } |
| | 1863 | |
| | 1864 | /* get/set the in-comment status */ |
| | 1865 | int is_in_comment() const { return in_comment_; } |
| | 1866 | void set_in_comment(int f) { in_comment_ = f; } |
| | 1867 | |
| | 1868 | /* get/set the pragma C mode */ |
| | 1869 | // int is_pragma_c() const { return pragma_c_; } |
| | 1870 | // void set_pragma_c(int f) { pragma_c_ = f; } |
| | 1871 | |
| | 1872 | /* get/set if nesting level at the start of the file */ |
| | 1873 | int get_init_if_level() const { return init_if_level_; } |
| | 1874 | void set_init_if_level(int level) { init_if_level_ = level; } |
| | 1875 | |
| | 1876 | /* get/set the newline spacing mode */ |
| | 1877 | int get_newline_spacing() const { return newline_spacing_; } |
| | 1878 | void set_newline_spacing(int f) { newline_spacing_ = f; } |
| | 1879 | |
| | 1880 | private: |
| | 1881 | /* file descriptor associated with this file */ |
| | 1882 | class CTcTokFileDesc *desc_; |
| | 1883 | |
| | 1884 | /* the underlying source reader */ |
| | 1885 | class CTcSrcObject *src_; |
| | 1886 | |
| | 1887 | /* |
| | 1888 | * the enclosing stream - this is the stream that #include'd the |
| | 1889 | * current stream |
| | 1890 | */ |
| | 1891 | CTcTokStream *parent_; |
| | 1892 | |
| | 1893 | /* line number of next line to be read */ |
| | 1894 | ulong next_linenum_; |
| | 1895 | |
| | 1896 | /* #if nesting level at the start of the file */ |
| | 1897 | int init_if_level_; |
| | 1898 | |
| | 1899 | /* flag: we were unable to load the map in the #charset directive */ |
| | 1900 | uint charset_error_ : 1; |
| | 1901 | |
| | 1902 | /* the stream is in a multi-line comment */ |
| | 1903 | uint in_comment_ : 1; |
| | 1904 | |
| | 1905 | /* newline_spacing mode when the stream was stacked */ |
| | 1906 | uint newline_spacing_ : 1; |
| | 1907 | |
| | 1908 | /* flag: we're in #pragma C+ mode */ |
| | 1909 | // uint pragma_c_ : 1; - #pragma C is not currently used |
| | 1910 | }; |
| | 1911 | |
| | 1912 | /* ------------------------------------------------------------------------ */ |
| | 1913 | /* |
| | 1914 | * Keyword Hash Table Entry |
| | 1915 | */ |
| | 1916 | class CTcHashEntryKw: public CVmHashEntryCS |
| | 1917 | { |
| | 1918 | public: |
| | 1919 | CTcHashEntryKw(const textchar_t *str, tc_toktyp_t tokid) |
| | 1920 | : CVmHashEntryCS(str, strlen(str), FALSE) |
| | 1921 | { |
| | 1922 | /* save the token ID for the keyword */ |
| | 1923 | tokid_ = tokid; |
| | 1924 | } |
| | 1925 | |
| | 1926 | /* get the token ID */ |
| | 1927 | tc_toktyp_t get_tok_id() const { return tokid_; } |
| | 1928 | |
| | 1929 | private: |
| | 1930 | /* our token ID */ |
| | 1931 | tc_toktyp_t tokid_; |
| | 1932 | }; |
| | 1933 | |
| | 1934 | /* ------------------------------------------------------------------------ */ |
| | 1935 | /* |
| | 1936 | * basic #define symbol table entry |
| | 1937 | */ |
| | 1938 | class CTcHashEntryPp: public CVmHashEntryCS |
| | 1939 | { |
| | 1940 | public: |
| | 1941 | CTcHashEntryPp(const textchar_t *str, size_t len, int copy) |
| | 1942 | : CVmHashEntryCS(str, len, copy) |
| | 1943 | { |
| | 1944 | /* by default, we have no arguments */ |
| | 1945 | has_args_ = FALSE; |
| | 1946 | has_varargs_ = FALSE; |
| | 1947 | argc_ = 0; |
| | 1948 | argv_ = 0; |
| | 1949 | params_table_ = 0; |
| | 1950 | } |
| | 1951 | |
| | 1952 | /* get the expansion text */ |
| | 1953 | virtual const char *get_expansion() const = 0; |
| | 1954 | virtual size_t get_expan_len() const = 0; |
| | 1955 | |
| | 1956 | /* certain special macros (__LINE__, __FILE__) aren't undef'able */ |
| | 1957 | virtual int is_undefable() const { return TRUE; } |
| | 1958 | |
| | 1959 | /* |
| | 1960 | * most macros are real symbols, created by #define's, but some are |
| | 1961 | * special pseudo-macros, like __LINE__ and __FILE__, that the |
| | 1962 | * preprocessor provides |
| | 1963 | */ |
| | 1964 | virtual int is_pseudo() const { return FALSE; } |
| | 1965 | |
| | 1966 | /* does the macro have an argument list? */ |
| | 1967 | int has_args() const { return has_args_; } |
| | 1968 | |
| | 1969 | /* get the number of arguments */ |
| | 1970 | int get_argc() const { return argc_; } |
| | 1971 | |
| | 1972 | /* do we have a variable number of arguments? */ |
| | 1973 | int has_varargs() const { return has_varargs_; } |
| | 1974 | |
| | 1975 | /* |
| | 1976 | * get the minimum number of allowed arguments - if we have varargs, |
| | 1977 | * this is one less than the number of formals listed, since the last |
| | 1978 | * formal can correspond to any number of actuals, including zero |
| | 1979 | */ |
| | 1980 | int get_min_argc() const { return has_varargs_ ? argc_ - 1 : argc_; } |
| | 1981 | |
| | 1982 | /* get the name of an argument by position (0 = first argument) */ |
| | 1983 | const char *get_arg_name(int idx) const { return argv_[idx]; } |
| | 1984 | |
| | 1985 | /* get the parameter hash table entry for the parameter */ |
| | 1986 | class CTcHashEntryPpArg *get_arg_entry(int idx) const |
| | 1987 | { return arg_entry_[idx]; } |
| | 1988 | |
| | 1989 | /* get the parameters hash table */ |
| | 1990 | const CVmHashTable *get_params_table() const { return params_table_; } |
| | 1991 | |
| | 1992 | protected: |
| | 1993 | /* argument list */ |
| | 1994 | char **argv_; |
| | 1995 | |
| | 1996 | /* list of parameter hash entries */ |
| | 1997 | class CTcHashEntryPpArg **arg_entry_; |
| | 1998 | |
| | 1999 | /* parameter hash table */ |
| | 2000 | CVmHashTable *params_table_; |
| | 2001 | |
| | 2002 | /* argument count */ |
| | 2003 | int argc_; |
| | 2004 | |
| | 2005 | /* flag: the macro has a parameter list */ |
| | 2006 | uint has_args_ : 1; |
| | 2007 | |
| | 2008 | /* |
| | 2009 | * flag: the parameter list takes a variable number of arguments; if |
| | 2010 | * this is set, then argc_ is one greater than the minimum number of |
| | 2011 | * arguments required, and the last formal receives the varying part |
| | 2012 | * of the actual parameter list, which can contain zero or more |
| | 2013 | * actuals |
| | 2014 | */ |
| | 2015 | uint has_varargs_ : 1; |
| | 2016 | }; |
| | 2017 | |
| | 2018 | /* |
| | 2019 | * #define symbol hash table entry |
| | 2020 | */ |
| | 2021 | class CTcHashEntryPpDefine: public CTcHashEntryPp |
| | 2022 | { |
| | 2023 | public: |
| | 2024 | /* |
| | 2025 | * Create the hash entry. argc is the number of arguments to the |
| | 2026 | * macro, and argv is an array of pointers to null-terminated |
| | 2027 | * strings with the argument names, in the order defined in the |
| | 2028 | * macro. |
| | 2029 | * |
| | 2030 | * If has_args is false, the macro does not take a parameter list at |
| | 2031 | * all. Note that it is possible for has_args to be true and argc |
| | 2032 | * to be zero, because a macro can be defined to take an argument |
| | 2033 | * list with no arguments (i.e., empty parens). A macro with an |
| | 2034 | * empty argument list is distinct from a macro with no argument |
| | 2035 | * list: in the former case, the empty parens are required, and are |
| | 2036 | * removed from the input stream and replaced with the macro's |
| | 2037 | * expansion. |
| | 2038 | * |
| | 2039 | * We'll make a copy of the argument list vector, strings, and |
| | 2040 | * expansion text, so the caller is free to forget all of that after |
| | 2041 | * creating the entry instance. |
| | 2042 | */ |
| | 2043 | CTcHashEntryPpDefine(const textchar_t *str, size_t len, int copy, |
| | 2044 | int has_args, int argc, int has_varargs, |
| | 2045 | const char **argv, const size_t *argvlen, |
| | 2046 | const char *expansion, size_t expan_len); |
| | 2047 | |
| | 2048 | ~CTcHashEntryPpDefine(); |
| | 2049 | |
| | 2050 | /* get the expansion text and its length */ |
| | 2051 | const char *get_expansion() const { return expan_; } |
| | 2052 | size_t get_expan_len() const { return expan_len_; } |
| | 2053 | |
| | 2054 | private: |
| | 2055 | /* expansion */ |
| | 2056 | char *expan_; |
| | 2057 | size_t expan_len_; |
| | 2058 | }; |
| | 2059 | |
| | 2060 | |
| | 2061 | /* |
| | 2062 | * Hash table entry for __FILE__ and __LINE__ |
| | 2063 | */ |
| | 2064 | class CTcHashEntryPpSpecial: public CTcHashEntryPp |
| | 2065 | { |
| | 2066 | public: |
| | 2067 | CTcHashEntryPpSpecial(CTcTokenizer *tok, const char *str) |
| | 2068 | : CTcHashEntryPp(str, strlen(str), FALSE) |
| | 2069 | { |
| | 2070 | /* remember my tokenizer */ |
| | 2071 | tok_ = tok; |
| | 2072 | } |
| | 2073 | |
| | 2074 | /* these special macros are not undef'able */ |
| | 2075 | virtual int is_undefable() const { return FALSE; } |
| | 2076 | |
| | 2077 | /* special macros are pseudo-macros provided by the preprocessor */ |
| | 2078 | virtual int is_pseudo() const { return TRUE; } |
| | 2079 | |
| | 2080 | protected: |
| | 2081 | /* my tokenizer */ |
| | 2082 | CTcTokenizer *tok_; |
| | 2083 | }; |
| | 2084 | |
| | 2085 | class CTcHashEntryPpFILE: public CTcHashEntryPpSpecial |
| | 2086 | { |
| | 2087 | public: |
| | 2088 | CTcHashEntryPpFILE(CTcTokenizer *tok) |
| | 2089 | : CTcHashEntryPpSpecial(tok, "__FILE__") { } |
| | 2090 | |
| | 2091 | /* our expansion is the current filename, in single quotes */ |
| | 2092 | const char *get_expansion() const { return get_base_text(); } |
| | 2093 | size_t get_expan_len() const { return strlen(get_base_text()); } |
| | 2094 | |
| | 2095 | private: |
| | 2096 | /* get our expansion base text */ |
| | 2097 | const char *get_base_text() const |
| | 2098 | { |
| | 2099 | /* |
| | 2100 | * if we're in test-report mode, use the root name only; |
| | 2101 | * otherwise, use the full name with path |
| | 2102 | */ |
| | 2103 | if (tok_->get_test_report_mode()) |
| | 2104 | return tok_->get_last_desc()->get_squoted_rootname(); |
| | 2105 | else |
| | 2106 | return tok_->get_last_desc()->get_squoted_fname(); |
| | 2107 | } |
| | 2108 | }; |
| | 2109 | |
| | 2110 | class CTcHashEntryPpLINE: public CTcHashEntryPpSpecial |
| | 2111 | { |
| | 2112 | public: |
| | 2113 | CTcHashEntryPpLINE(CTcTokenizer *tok) |
| | 2114 | : CTcHashEntryPpSpecial(tok, "__LINE__") { } |
| | 2115 | |
| | 2116 | /* our expansion is the line number as a decimal string */ |
| | 2117 | const char *get_expansion() const |
| | 2118 | { gen_expansion(tok_); return buf_; } |
| | 2119 | size_t get_expan_len() const |
| | 2120 | { gen_expansion(tok_); return strlen(buf_); } |
| | 2121 | |
| | 2122 | private: |
| | 2123 | /* generate the expansion text into our internal buffer */ |
| | 2124 | static void gen_expansion(CTcTokenizer *tok) |
| | 2125 | { sprintf(buf_, "%ld", tok->get_last_linenum()); } |
| | 2126 | |
| | 2127 | /* internal buffer */ |
| | 2128 | static char buf_[20]; |
| | 2129 | }; |
| | 2130 | |
| | 2131 | |
| | 2132 | /* |
| | 2133 | * Hash entry for preprocessor arguments |
| | 2134 | */ |
| | 2135 | class CTcHashEntryPpArg: public CVmHashEntryCS |
| | 2136 | { |
| | 2137 | public: |
| | 2138 | CTcHashEntryPpArg(const char *str, size_t len, int copy, int argnum) |
| | 2139 | : CVmHashEntryCS(str, len, copy) |
| | 2140 | { |
| | 2141 | /* remember the argument number */ |
| | 2142 | argnum_ = argnum; |
| | 2143 | } |
| | 2144 | |
| | 2145 | /* get my argument number */ |
| | 2146 | int get_argnum() const { return argnum_; } |
| | 2147 | |
| | 2148 | private: |
| | 2149 | /* argument number */ |
| | 2150 | int argnum_; |
| | 2151 | }; |
| | 2152 | |
| | 2153 | |
| | 2154 | /* ------------------------------------------------------------------------ */ |
| | 2155 | /* |
| | 2156 | * Previously-included file list entry. Each time we include a file, |
| | 2157 | * we'll add an entry to a list of files; in the future, we'll consult |
| | 2158 | * this list to ensure that we don't include the same file again. |
| | 2159 | */ |
| | 2160 | struct tctok_incfile_t |
| | 2161 | { |
| | 2162 | /* next entry in the list of previously-included files */ |
| | 2163 | tctok_incfile_t *nxt; |
| | 2164 | |
| | 2165 | /* name of this file (we'll allocate memory to hold the name) */ |
| | 2166 | char fname[1]; |
| | 2167 | }; |
| | 2168 | |
| | 2169 | /* ------------------------------------------------------------------------ */ |
| | 2170 | /* |
| | 2171 | * Include path list entry. This structure defines one include path; we |
| | 2172 | * maintain a list of these structures. |
| | 2173 | */ |
| | 2174 | struct tctok_incpath_t |
| | 2175 | { |
| | 2176 | /* next entry in the list */ |
| | 2177 | tctok_incpath_t *nxt; |
| | 2178 | |
| | 2179 | /* path */ |
| | 2180 | char path[1]; |
| | 2181 | }; |
| | 2182 | |
| | 2183 | #endif /* TCTOK_H */ |
| | 2184 | |