cfad47cfa3/t3compiler/tads3/tctok.h

4b825dc642cb6eb9a060e54bf8d69288fbee4904cfad47cfa334b206c65f22086bcc5d63e6f70944
1
/* $Header: d:/cvsroot/tads/tads3/tctok.h,v 1.5 1999/07/11 00:46:59 MJRoberts Exp $ */
2
3
/* 
4
 *   Copyright (c) 1999, 2002 Michael J. Roberts.  All Rights Reserved.
5
 *   
6
 *   Please see the accompanying license file, LICENSE.TXT, for information
7
 *   on using and copying this software.  
8
 */
9
/*
10
Name
11
  tctok.h - TADS3 compiler tokenizer and preprocessor
12
Function
13
  
14
Notes
15
  The tokenizer is layered with the preprocessor, so that the preprocessor
16
  can deal with include files, macro expansion, and preprocessor directives.
17
Modified
18
  04/12/99 MJRoberts  - Creation
19
*/
20
21
#ifndef TCTOK_H
22
#define TCTOK_H
23
24
#include <stdlib.h>
25
#include <string.h>
26
#include <assert.h>
27
28
#include "os.h"
29
#include "t3std.h"
30
#include "utf8.h"
31
#include "vmhash.h"
32
#include "vmerr.h"
33
#include "tcerr.h"
34
#include "tcerrnum.h"
35
36
37
/* ------------------------------------------------------------------------ */
38
/*
39
 *   Constants 
40
 */
41
42
/* maximum length of a symbol name, in characters */
43
const size_t TOK_SYM_MAX_LEN = 80;
44
45
/*
46
 *   Maximum buffer required to hold a symbol, in bytes.  Each UTF-8
47
 *   character may take up three bytes, plus we need a null terminator
48
 *   byte. 
49
 */
50
const size_t TOK_SYM_MAX_BUFFER = (3*TOK_SYM_MAX_LEN + 1);
51
52
/* maximum #if nesting level */
53
const size_t TOK_MAX_IF_NESTING = 100;
54
55
/* maximum number of parameters per macro */
56
const int TOK_MAX_MACRO_ARGS = 128;
57
58
/* 
59
 *   Special token flag characters - these are a characters that can't
60
 *   occur in an input file (we guarantee this by converting any
61
 *   occurrences of this character to a space on reading input).  We use
62
 *   these to flag certain special properties of tokens in the input
63
 *   buffer.
64
 *   
65
 *   We use ASCII characters in the control range (0x01 (^A) through 0x1A
66
 *   (^Z), excluding 0x09 (tab), 0x0A (LF), 0x0D (CR), and 0x0C (Page
67
 *   Feed); a well-formed source file would never use any of these
68
 *   characters in input.  Even if it does, we won't get confused, since
69
 *   we'll always translate these to a space if we find them in input; but
70
 *   choosing characters that *should* never occur in valid input will
71
 *   ensure that we never alter the meaning of valid source by this
72
 *   translation.  
73
 */
74
75
/* 
76
 *   macro parameter flag - we use this in the internal storage of a
77
 *   #define expansion to flag where the formal parameters are mentioned,
78
 *   so that we can substitute the actuals when expanding the macro 
79
 */
80
const char TOK_MACRO_FORMAL_FLAG = 0x01;
81
82
/*
83
 *   Token fully expanded flag.  Whenever we detect that a particular
84
 *   token has been fully expanded in the course of a particular macro
85
 *   expansion, we'll insert this byte before the token; on subsequent
86
 *   re-scans, whenever we see this flag, we'll realize that the token
87
 *   needs no further consideration of expansion. 
88
 */
89
const char TOK_FULLY_EXPANDED_FLAG = 0x02;
90
91
/*
92
 *   Macro substitution end marker.  Each time we expand a macro, we'll
93
 *   insert immediately after the macro expansion a special pseudo-token,
94
 *   consisting of this flag followed by a pointer to the symbol table
95
 *   entry for the symbol expanded.  As we expand macros, we'll check to
96
 *   see if any of these special flags appear in the buffer after the
97
 *   macro about to be expanded.  If we find such a flag matching the
98
 *   symbol about to be expanded, we'll know the symbol has already been
99
 *   fully expanded on a previous scan and thus must not be expanded
100
 *   again.  
101
 */
102
const char TOK_MACRO_EXP_END = 0x03;
103
104
/*
105
 *   End-of-line flag.  This serves as a local end-of-file marker for
106
 *   preprocessor lines.  Because preprocessor lines must be considered in
107
 *   isolation, we need some way when parsing one to tell the tokenizer
108
 *   not to try to read another line when it reaches the end of the
109
 *   current line.  This flag serves this purpose: when the tokenizer
110
 *   encounters one of these flags, it will simply return end-of-file
111
 *   until the caller explicitly reads a new source line. 
112
 */
113
const char TOK_END_PP_LINE = 0x04;
114
115
/*
116
 *   "#foreach" marker flag.  This marks the presence of a #foreach token in
117
 *   a macro's expansion.  We leave the text of the expansion area intact,
118
 *   but we replace the #foreach token with this marker character.  
119
 */
120
const char TOK_MACRO_FOREACH_FLAG = 0x05;
121
122
/* 
123
 *   "#argcount" marker flag.  This marks the presence of a #argcount token
124
 *   in a macro's expansion.  
125
 */
126
const char TOK_MACRO_ARGCOUNT_FLAG = 0x06;
127
128
/*
129
 *   "#ifempty" and #ifnempty" marker flags 
130
 */
131
const char TOK_MACRO_IFEMPTY_FLAG = 0x07;
132
const char TOK_MACRO_IFNEMPTY_FLAG = 0x08;
133
134
135
/* ------------------------------------------------------------------------ */
136
/*
137
 *   #if state 
138
 */
139
enum tok_if_t
140
{
141
    TOKIF_NONE,                                /* not in a #if block at all */
142
    TOKIF_IF_YES,                           /* processing a true #if branch */
143
    TOKIF_IF_NO,                           /* processing a false #if branch */
144
    TOKIF_IF_DONE,      /* done with true #if/#elif; skip #elif's and #else */
145
    TOKIF_ELSE_YES,                       /* processing a true #else branch */
146
    TOKIF_ELSE_NO                        /* processing a false #else branch */
147
};
148
149
/*
150
 *   #if stack entry 
151
 */
152
struct tok_if_info_t
153
{
154
    /* state */
155
    tok_if_t state;
156
157
    /* file descriptor and line number of starting #if */
158
    class CTcTokFileDesc *desc;
159
    long linenum;
160
};
161
162
/* ------------------------------------------------------------------------ */
163
/*
164
 *   Token Types 
165
 */
166
167
enum tc_toktyp_t
168
{
169
    TOKT_INVALID,                                          /* invalid token */
170
    TOKT_NULLTOK,          /* null token - caller should read another token */
171
    TOKT_EOF,                                                /* end of file */
172
    TOKT_MACRO_FORMAL,          /* formal parameter replacement placeholder */
173
    TOKT_MACRO_FOREACH,               /* macro varargs #foreach placeholder */
174
    TOKT_MACRO_ARGCOUNT,             /* macro varargs #argcount placeholder */
175
    TOKT_MACRO_IFEMPTY,                       /* #ifempty macro placeholder */
176
    TOKT_MACRO_IFNEMPTY,                     /* #ifnempty macro placeholder */
177
    TOKT_SYM,                                              /* symbolic name */
178
    TOKT_INT,                                                    /* integer */
179
    TOKT_SSTR,                                      /* single-quoted string */
180
    TOKT_DSTR,                                      /* double-quoted string */
181
    TOKT_DSTR_START,          /* start of a dstring with embedding - "...<< */
182
    TOKT_DSTR_MID,          /* middle of a dstring with embedding - >>...<< */
183
    TOKT_DSTR_END,              /* end of a dstring with embedding - >>..." */
184
    TOKT_LPAR,                                            /* left paren '(' */
185
    TOKT_RPAR,                                           /* right paren ')' */
186
    TOKT_COMMA,                                                /* comma ',' */
187
    TOKT_DOT,                                                 /* period '.' */
188
    TOKT_LBRACE,                                          /* left brace '{' */
189
    TOKT_RBRACE,                                         /* right brace '}' */
190
    TOKT_LBRACK,                                 /* left square bracket '[' */
191
    TOKT_RBRACK,                                /* right square bracket ']' */
192
    TOKT_EQ,                                             /* equals sign '=' */
193
    TOKT_EQEQ,                                   /* double-equals sign '==' */
194
    TOKT_ASI,                      /* colon-equals assignment operator ':=' */
195
    TOKT_PLUS,                                             /* plus sign '+' */
196
    TOKT_MINUS,                                           /* minus sign '-' */
197
    TOKT_TIMES,                                /* multiplication symbol '*' */
198
    TOKT_DIV,                                        /* division symbol '/' */
199
    TOKT_MOD,                                                 /* modulo '%' */
200
    TOKT_GT,                                       /* greater-than sign '>' */
201
    TOKT_LT,                                          /* less-than sign '<' */
202
    TOKT_GE,                                  /* greater-or-equal sign '>=' */
203
    TOKT_LE,                                     /* less-or-equal sign '<=' */
204
    TOKT_NE,                                /* not-equals sign '!=' or '<>' */
205
    TOKT_ARROW,                                        /* arrow symbol '->' */
206
    TOKT_COLON,                                                /* colon ':' */
207
    TOKT_SEM,                                              /* semicolon ';' */
208
    TOKT_AND,                                            /* bitwise AND '&' */
209
    TOKT_ANDAND,                                        /* logical AND '&&' */
210
    TOKT_OR,                                              /* bitwise OR '|' */
211
    TOKT_OROR,                                           /* logical OR '||' */
212
    TOKT_XOR,                                            /* bitwise XOR '^' */
213
    TOKT_SHL,                                            /* shift left '<<' */
214
    TOKT_SHR,                                           /* shift right '>>' */
215
    TOKT_INC,                                             /* increment '++' */
216
    TOKT_DEC,                                             /* decrement '--' */
217
    TOKT_PLUSEQ,                                        /* plus-equals '+=' */
218
    TOKT_MINEQ,                                        /* minus-equals '-=' */
219
    TOKT_TIMESEQ,                                      /* times-equals '*=' */
220
    TOKT_DIVEQ,                                       /* divide-equals '/=' */
221
    TOKT_MODEQ,                                          /* mod-equals '%=' */
222
    TOKT_ANDEQ,                                          /* and-equals '&=' */
223
    TOKT_OREQ,                                            /* or-equals '|=' */
224
    TOKT_XOREQ,                                          /* xor-equals '^=' */
225
    TOKT_SHLEQ,                              /* shift-left-and-assign '<<=' */
226
    TOKT_SHREQ,                             /* shift-right-and-assign '>>=' */
227
    TOKT_NOT,                                            /* logical not '!' */
228
    TOKT_BNOT,                                           /* bitwise not '~' */
229
    TOKT_POUND,                                                /* pound '#' */
230
    TOKT_POUNDPOUND,                                   /* double-pound '##' */
231
    TOKT_POUNDAT,                                          /* pound-at '#@' */
232
    TOKT_ELLIPSIS,                                        /* ellipsis '...' */
233
    TOKT_QUESTION,                                     /* question mark '?' */
234
    TOKT_COLONCOLON,                                   /* double-colon '::' */
235
    TOKT_FLOAT,                                    /* floating-point number */
236
    TOKT_AT,                                                     /* at-sign */
237
238
    /* keywords */
239
    TOKT_SELF,
240
    TOKT_INHERITED,
241
    TOKT_ARGCOUNT,
242
    TOKT_IF,
243
    TOKT_ELSE,
244
    TOKT_FOR,
245
    TOKT_WHILE,
246
    TOKT_DO,
247
    TOKT_SWITCH,
248
    TOKT_CASE,
249
    TOKT_DEFAULT,
250
    TOKT_GOTO,
251
    TOKT_BREAK,
252
    TOKT_CONTINUE,
253
    TOKT_FUNCTION,
254
    TOKT_RETURN,
255
    TOKT_LOCAL,
256
    TOKT_OBJECT,
257
    TOKT_NIL,
258
    TOKT_TRUE,
259
    TOKT_PASS,
260
    TOKT_EXTERNAL,
261
    TOKT_EXTERN,
262
    TOKT_FORMATSTRING,
263
    TOKT_CLASS,
264
    TOKT_REPLACE,
265
    TOKT_MODIFY,
266
    TOKT_NEW,
267
    TOKT_DELETE,
268
    TOKT_THROW,
269
    TOKT_TRY,
270
    TOKT_CATCH,
271
    TOKT_FINALLY,
272
    TOKT_INTRINSIC,
273
    TOKT_DICTIONARY,
274
    TOKT_GRAMMAR,
275
    TOKT_ENUM,
276
    TOKT_TEMPLATE,
277
    TOKT_STATIC,
278
    TOKT_FOREACH,
279
    TOKT_EXPORT,
280
    TOKT_DELEGATED,
281
    TOKT_TARGETPROP,
282
    TOKT_PROPERTYSET,
283
    TOKT_TARGETOBJ,
284
    TOKT_DEFININGOBJ,
285
    TOKT_TRANSIENT,
286
    TOKT_REPLACED,
287
    TOKT_PROPERTY
288
289
    /* type names - formerly reserved but later withdrawn */
290
//  TOKT_VOID,
291
//  TOKT_INTKW,
292
//  TOKT_STRING,
293
//  TOKT_LIST,
294
//  TOKT_BOOLEAN,
295
//  TOKT_ANY
296
};
297
298
/* ------------------------------------------------------------------------ */
299
/*
300
 *   Source Block.  As we read the source file, we need to keep quoted
301
 *   strings and symbol names around for later reference, in case they're
302
 *   needed after reading more tokens and flushing the line buffer.  We'll
303
 *   copy needed text into our source blocks, which we keep in memory
304
 *   throughout the compilation, so that we can be certain we can
305
 *   reference these strings at any time.  
306
 */
307
308
/* size of a source block */
309
const size_t TCTOK_SRC_BLOCK_SIZE = 50000;
310
311
/* source block class */
312
class CTcTokSrcBlock
313
{
314
public:
315
    CTcTokSrcBlock()
316
    {
317
        /* no next block yet */
318
        nxt_ = 0;
319
    }
320
321
    ~CTcTokSrcBlock()
322
    {
323
        /* delete the next block in line */
324
        if (nxt_ != 0)
325
            delete nxt_;
326
    }
327
328
    /* get/set the next block */
329
    CTcTokSrcBlock *get_next() const { return nxt_; }
330
    void set_next(CTcTokSrcBlock *blk) { nxt_ = blk; }
331
332
    /* get a pointer to the block's buffer */
333
    char *get_buf() { return buf_; }
334
335
private:
336
    /* the next block in the list */
337
    CTcTokSrcBlock *nxt_;
338
339
    /* bytes of the list entry */
340
    char buf_[TCTOK_SRC_BLOCK_SIZE];
341
};
342
343
344
/* ------------------------------------------------------------------------ */
345
/*
346
 *   String Buffer.  We use these buffers for reading input lines and
347
 *   expanding macros.  
348
 */
349
class CTcTokString
350
{
351
public:
352
    CTcTokString()
353
    {
354
        /* no buffer yet */
355
        buf_ = 0;
356
        buf_len_ = 0;
357
        buf_size_ = 0;
358
    }
359
360
    virtual ~CTcTokString()
361
    {
362
        /* delete our buffer */
363
        if (buf_ != 0)
364
            t3free(buf_);
365
    }
366
367
    /* ensure that a given amount of space if available */
368
    virtual void ensure_space(size_t siz)
369
    {
370
        /* make sure there's room for the requested size plus a null byte */
371
        if (buf_size_ < siz + 1)
372
        {
373
            /* increase to the next 4k increment */
374
            buf_size_ = (siz + 4095 + 1) & ~4095;
375
            
376
            /* allocate or re-allocate the buffer */
377
            if (buf_ == 0)
378
                buf_ = (char *)t3malloc(buf_size_);
379
            else
380
                buf_ = (char *)t3realloc(buf_, buf_size_);
381
382
            /* throw an error if that failed */
383
            if (buf_ == 0)
384
                err_throw(TCERR_NO_STRBUF_MEM);
385
        }
386
    }
387
388
    /* expand the buffer */
389
    void expand()
390
    {
391
        /* expand to the next 4k increment */
392
        ensure_space(buf_size_ + 4096);
393
    }
394
395
    /* get the text and the length of the text */
396
    const char *get_text() const { return buf_; }
397
    size_t get_text_len() const { return buf_len_; }
398
399
    /* get the end of the text */
400
    const char *get_text_end() const { return buf_ + buf_len_; }
401
402
    /* append text to the buffer */
403
    virtual void append(const char *p) { append(p, strlen(p)); }
404
    virtual void append(const char *p, size_t len)
405
    {
406
        /* make sure we have space available */
407
        ensure_space(buf_len_ + len);
408
409
        /* copy the text onto the end of our buffer */
410
        memcpy(buf_ + buf_len_, p, len);
411
412
        /* add it to the length of the text */
413
        buf_len_ += len;
414
415
        /* null-terminte it */
416
        buf_[buf_len_] = '\0';
417
    }
418
419
    /* prepend text */
420
    virtual void prepend(const char *p) { prepend(p, strlen(p)); }
421
    virtual void prepend(const char *p, size_t len)
422
    {
423
        /* make sure we have enough space */
424
        ensure_space(buf_len_ + len);
425
426
        /* 
427
         *   move the existing text (including the null terminator) up in the
428
         *   buffer to make room for the prepended text 
429
         */
430
        memmove(buf_ + len, buf_, buf_len_ + 1);
431
432
        /* copy the new text to the start of the buffer */
433
        memcpy(buf_, p, len);
434
435
        /* count the new size */
436
        buf_len_ += len;
437
    }
438
439
    /* 
440
     *   Append a string to the buffer, enclosing the text in single or
441
     *   double quote (as given by 'qu', which must be either '"' or '\'')
442
     *   and backslash-escaping any occurrences of the same quote character
443
     *   found within the string.  
444
     */
445
    void append_qu(char qu, const char *p) { append_qu(qu, p, strlen(p)); }
446
    void append_qu(char qu, const char *p, size_t len)
447
    {
448
        const char *start;
449
        
450
        /* append the open quote */
451
        append(&qu, 1);
452
453
        /* scan for quotes we'll need to escape */
454
        while (len != 0)
455
        {
456
            size_t rem;
457
            
458
            /* skip to the next quote */
459
            for (start = p, rem = len ; rem != 0 && *p != qu ; ++p, --rem) ;
460
461
            /* insert the chunk up to the quote */
462
            if (p != start)
463
                append(start, p - start);
464
465
            /* if we did find a quote, append it with a backslash escape */
466
            if (rem != 0)
467
            {
468
                /* append the backslash and the quote */
469
                append("\\", 1);
470
                append(&qu, 1);
471
472
                /* skip the quote in the source */
473
                ++p;
474
                --rem;
475
            }
476
477
            /* we now only have 'rem' left to consider */
478
            len = rem;
479
        }
480
481
        /* finally, append the closing quote */
482
        append(&qu, 1);
483
    }
484
485
    /* insert text into the buffer at the given offset */
486
    virtual void insert(int ofs, const char *p, size_t len)
487
    {
488
        /* check to see if there's anything after the insertion point */
489
        if ((size_t)ofs >= buf_len_)
490
        {
491
            /* 
492
             *   there's nothing after the insertion point, so this is simply
493
             *   equivalent to 'append' - go do the append, and we're done 
494
             */
495
            append(p, len);
496
            return;
497
        }
498
499
        /* ensure there's space for the added text */
500
        ensure_space(buf_len_ + len);
501
502
        /* 
503
         *   Move the existing text after the insertion point just far enough
504
         *   to make room for the new text.  Include the null terminator.  
505
         */
506
        memmove(buf_ + ofs + len, buf_ + ofs, buf_len_ - ofs + 1);
507
508
        /* copy the new text in at the given offset */
509
        memcpy(buf_ + ofs, p, len);
510
511
        /* include the new text in our length */
512
        buf_len_ += len;
513
    }
514
515
    /* copy text into the buffer, replacing existing text */
516
    virtual void copy(const char *p, size_t len)
517
    {
518
        /* ensure we have enough space */
519
        ensure_space(len);
520
521
        /* copy the text */
522
        memcpy(buf_, p, len);
523
524
        /* set our length */
525
        buf_len_ = len;
526
527
        /* null-terminate it */
528
        buf_[buf_len_] = '\0';
529
    }
530
531
    /* clear any existing text */
532
    virtual void clear_text()
533
    {
534
        /* zero the length */
535
        buf_len_ = 0;
536
537
        /* put a null terminator at the start of the buffer if possible */
538
        if (buf_size_ > 0)
539
            buf_[0] = '\0';
540
    }
541
542
    /* get the buffer, for copying text directly into it */
543
    virtual char *get_buf() const { return buf_; }
544
    size_t get_buf_size() const { return buf_size_; }
545
546
    /* 
547
     *   Set the text length - use this after copying directly into the
548
     *   buffer to set the length, excluding the null terminator.  We'll
549
     *   add a null terminator at the given length.  
550
     */
551
    virtual void set_text_len(size_t len)
552
    {
553
        /* set the new length */
554
        buf_len_ = len;
555
556
        /* add a null terminator after the new length */
557
        if (len < buf_size_)
558
            buf_[len] = '\0';
559
    }
560
561
protected:
562
    /* buffer */
563
    char *buf_;
564
565
    /* size of the buffer */
566
    size_t buf_size_;
567
568
    /* length of the text in the buffer (excluding trailing null) */
569
    size_t buf_len_;
570
};
571
572
573
/*
574
 *   String buffer subclass for a non-allocated string that merely
575
 *   references another buffer.  This can be used anywhere a CTcString is
576
 *   required, but does not require any allocation.
577
 *   
578
 *   These objects can only be used in 'const' contexts: the underlying
579
 *   buffer cannot be changed or expanded, since we do not own the
580
 *   underlying buffer.  
581
 */
582
class CTcTokStringRef: public CTcTokString
583
{
584
public:
585
    CTcTokStringRef()
586
    {
587
        /* we have no referenced buffer yet */
588
        buf_ = 0;
589
        buf_size_ = 0;
590
        buf_len_ = 0;
591
    }
592
593
    ~CTcTokStringRef()
594
    {
595
        /* we don't own the underlying buffer, so simply forget about it */
596
        buf_ = 0;
597
    }
598
599
    /* we can't make any changes to the underlying buffer */
600
    void ensure_space(size_t) { }
601
    void append(const char *) { assert(FALSE); }
602
    void append(const char *, size_t) { assert(FALSE); }
603
    void prepend(const char *) { assert(FALSE); }
604
    void prepend(const char *, size_t) { assert(FALSE); }
605
    void insert(int, const char *, size_t) { assert(FALSE); }
606
    void copy(const char *, size_t) { assert(FALSE); }
607
    void clear_text() { assert(FALSE); }
608
    char *get_buf() const { assert(FALSE); return 0; }
609
    void set_text_len(size_t) { assert(FALSE); }
610
611
    /* set my underlying buffer */
612
    void set_buffer(const char *buf, size_t len)
613
    {
614
        buf_ = (char *)buf;
615
        buf_size_ = len + 1;
616
        buf_len_ = len;
617
    }
618
};
619
620
/* ------------------------------------------------------------------------ */
621
/*
622
 *   Token 
623
 */
624
class CTcToken
625
{
626
public:
627
    /* get/set the token type */
628
    tc_toktyp_t gettyp() const { return typ_; }
629
    void settyp(tc_toktyp_t typ) { typ_ = typ; }
630
631
    /* get/set the fully-expanded flag */
632
    int get_fully_expanded() const { return fully_expanded_; }
633
    void set_fully_expanded(int flag) { fully_expanded_ = flag; }
634
    
635
    /* get/set the text pointer */
636
    const char *get_text() const { return text_; }
637
    size_t get_text_len() const { return text_len_; }
638
    void set_text(const char *txt, size_t len)
639
    {
640
        text_ = txt;
641
        text_len_ = len;
642
    }
643
644
    /* get/set the integer value */
645
    long get_int_val() const { return int_val_; }
646
    void set_int_val(long val) { typ_ = TOKT_INT; int_val_ = val; }
647
648
    /* 
649
     *   compare the text to the given string - returns true if the text
650
     *   matches, false if not 
651
     */
652
    int text_matches(const char *txt, size_t len) const
653
    {
654
        return (len == text_len_
655
                && memcmp(txt, text_, len) == 0);
656
    }
657
658
private:
659
    /* token type */
660
    tc_toktyp_t typ_;
661
    
662
    /* 
663
     *   Pointer to the token's text.  This is a pointer into the
664
     *   tokenizer's symbol table or into the token list itself, so this
665
     *   pointer is valid as long as the tokenizer and its token list are
666
     *   valid.
667
     */
668
    const char *text_;
669
    size_t text_len_;
670
671
    /* integer value - valid when the token type is TOKT_INT */
672
    long int_val_;
673
674
    /* 
675
     *   flag: the token has been fully expanded, and should not be
676
     *   expanded further on any subsequent rescan for macros 
677
     */
678
    uint fully_expanded_ : 1;
679
};
680
681
682
/* ------------------------------------------------------------------------ */
683
/*
684
 *   Macro Expansion Resource object.  This object is a collection of
685
 *   resources that are needed for a macro expansion.  To avoid frequent
686
 *   allocating and freeing of these resources, we keep a pool of these
687
 *   objects around so that we can re-use them as needed.  We'll
688
 *   dynamically expand the pool as necessary, so this doesn't impose any
689
 *   pre-set limits; it simply avoids lots of memory allocation activity. 
690
 */
691
class CTcMacroRsc
692
{
693
public:
694
    CTcMacroRsc()
695
    {
696
        /* we're not in any lists yet */
697
        next_avail_ = 0;
698
        next_ = 0;
699
    }
700
    
701
    /* buffer for expansion of the whole line */
702
    CTcTokString line_exp_;
703
704
    /* buffer for expansion of current macro on line */
705
    CTcTokString macro_exp_;
706
707
    /* buffer for expansion of an actual parameter value */
708
    CTcTokString actual_exp_buf_;
709
710
    /* next resource object in the "available" list */
711
    CTcMacroRsc *next_avail_;
712
713
    /* next resource object in the master list */
714
    CTcMacroRsc *next_;
715
};
716
717
718
/* ------------------------------------------------------------------------ */
719
/*
720
 *   Abstract token source interface.  This is used to allow external code
721
 *   to inject their own substreams into the main token stream.  
722
 */
723
class CTcTokenSource
724
{
725
public:
726
    /* 
727
     *   Get the next token from the source.  Returns null if there are no
728
     *   more tokens.  
729
     */
730
    virtual const CTcToken *get_next_token() = 0;
731
732
    /* set the enclosing external token source and current token */
733
    void set_enclosing_source(CTcTokenSource *src, const CTcToken *tok)
734
    {
735
        /* remember the enclosing source */
736
        enclosing_src_ = src;
737
738
        /* remember the current token */
739
        enclosing_curtok_ = *tok;
740
    }
741
742
    /* get the enclosing external token source */
743
    CTcTokenSource *get_enclosing_source() const
744
        { return enclosing_src_; }
745
746
    /* get the token that was current when this source was inserted */
747
    const CTcToken *get_enclosing_curtok() const
748
        { return &enclosing_curtok_; }
749
750
protected:
751
    /* the enclosing external token source */
752
    CTcTokenSource *enclosing_src_;
753
754
    /* 
755
     *   the current token in effect enclosing this source - this is the
756
     *   token that comes immediately after the source's tokens, because a
757
     *   source is inserted before the current token 
758
     */
759
    CTcToken enclosing_curtok_;
760
};
761
762
763
/* ------------------------------------------------------------------------ */
764
/*
765
 *   Tokenizer.  This object reads a file and constructs a representation
766
 *   of the file as a token list in memory.  The tokenizer interprets
767
 *   preprocessor directives and expands macros.  
768
 */
769
class CTcTokenizer
770
{
771
public:
772
    /*
773
     *   Create the tokenizer and start reading from the given file.  The
774
     *   default character set is generally specified by the user (on the
775
     *   compiler command line, for example), or obtained from the
776
     *   operating system.
777
     */
778
    CTcTokenizer(class CResLoader *res_loader, const char *default_charset);
779
780
    /* destroy the tokenizer */
781
    ~CTcTokenizer();
782
783
    /*
784
     *   Reset the tokenizer.  Deletes the current source object and all
785
     *   saved token text.  This can be used after compilation of a unit
786
     *   is completed and the intermediate parser state can be completely
787
     *   discarded. 
788
     */
789
    void reset();
790
791
    /* 
792
     *   Set the source file.  'src_filename' is the fully-resolved local
793
     *   filename of the source file; 'orig_name' is the original name as
794
     *   given on the command line, in the makefile, or wherever it came
795
     *   from.  We keep track of the original name so that we can pass
796
     *   information to the debugger indicating the name as it was originally
797
     *   given; this is more useful than the resolved filename, because we
798
     *   might want to run the debugger on another machine with a different
799
     *   local directory structure.  
800
     */
801
    int set_source(const char *src_filename, const char *orig_name);
802
803
    /* set the source to a memory buffer */
804
    void set_source_buf(const char *buf);
805
806
    /* 
807
     *   Add a #include directory to the include path.  We search the
808
     *   include path in the order in which they were defined.
809
     */
810
    void add_inc_path(const char *path);
811
812
    /*
813
     *   Set preprocess-only mode.  In this mode, we'll retain
814
     *   preprocessor directives that will be needed if the preprocessed
815
     *   result is itself compiled; for example, we'll retain #line,
816
     *   #pragma C, #error, and #pragma message directives. 
817
     */
818
    void set_mode_pp_only(int flag) { pp_only_mode_ = flag; }
819
820
    /*
821
     *   Set list-includes mode.  In this mode, we'll simply scan source
822
     *   files and write to the standard output a list of the names of all
823
     *   of the #include files.  
824
     */
825
    void set_list_includes_mode(int flag) { list_includes_mode_ = flag; }
826
827
    /* 
828
     *   Get/set the test-report mode.  In this mode, we'll expand __FILE__
829
     *   macros with the root name only.  
830
     */
831
    int get_test_report_mode() const { return test_report_mode_; }
832
    void set_test_report_mode(int flag) { test_report_mode_ = flag; }
833
834
    /* enable or disable preprocessing directives */
835
    void enable_pp(int enable) { allow_pp_ = enable; }
836
837
    /* get the type of the current token */
838
    tc_toktyp_t cur() const { return curtok_.gettyp(); }
839
840
    /* get the next token, reading a new line of source if necessary */
841
    tc_toktyp_t next();
842
843
    /* 
844
     *   Un-get the current token and back up to the previous token.  The
845
     *   maximum un-get depth is one token - after un-getting one token,
846
     *   another token must not be un-gotten until after reading another
847
     *   token.
848
     *   
849
     *   Tokens un-got with this routine are accessible only to next(),
850
     *   not to any of the lower-level token readers.  
851
     */
852
    void unget();
853
854
    /* get the current token */
855
    const class CTcToken *getcur() const { return &curtok_; }
856
857
    /* 
858
     *   Copy the current token.  This makes a copy of the token's text in
859
     *   tokenizer source memory, to ensure that the reference to the text
860
     *   buffer the caller is keeping will remain valid forever. 
861
     */
862
    const class CTcToken *copycur();
863
864
    /* make a safely storable copy of a given token */
865
    void copytok(class CTcToken *dst, const class CTcToken *src);
866
867
    /* check to see if the current token matches the given text */
868
    int cur_tok_matches(const char *txt, size_t len);
869
870
    /*
871
     *   Set an external token source.  We'll read tokens from this source
872
     *   until it is exhausted, at which point we'll revert to the enclosing
873
     *   source.
874
     *   
875
     *   The new source is inserted before the current token, so the current
876
     *   token will become current once again when this source is exhausted.
877
     *   We'll automatically advance to the next token, which (unless we
878
     *   have an ungotten token stashed) will go to the first token in the
879
     *   new source.  
880
     */
881
    void set_external_source(CTcTokenSource *src)
882
    {
883
        /* 
884
         *   store the old source in the new source, so we can restore the
885
         *   old source when we have exhausted the new source 
886
         */
887
        src->set_enclosing_source(ext_src_, &curtok_);
888
889
        /* set the new external source */
890
        ext_src_ = src;
891
892
        /* skip to the next token */
893
        next();
894
    }
895
896
    /* clear all external sources, returning to the real token stream */
897
    void clear_external_sources();
898
899
    /* 
900
     *   assume that we should have found '>>' sequence after an embedded
901
     *   expression in a string - used by parsers to resynchronize after
902
     *   an apparent syntax error 
903
     */
904
    void assume_missing_dstr_cont();
905
906
    /* define a macro */
907
    void add_define(const char *sym, size_t len, const char *expansion,
908
                    size_t expan_len);
909
910
    void add_define(const char *sym, const char *expansion, size_t expan_len)
911
        { add_define(sym, strlen(sym), expansion, expan_len); }
912
913
    void add_define(const char *sym, const char *expansion)
914
        { add_define(sym, strlen(sym), expansion, strlen(expansion)); }
915
916
    /* add a macro, given the symbol entry */
917
    void add_define(class CTcHashEntryPp *entry);
918
919
    /* undefine a previously defined macro */
920
    void undefine(const char *sym, size_t len);
921
    void undefine(const char *sym) { undefine(sym, strlen(sym)); }
922
923
    /* find a #define symbol */
924
    class CTcHashEntryPp *find_define(const char *sym, size_t len) const;
925
926
    /* find an #undef symbol */
927
    class CTcHashEntryPp *find_undef(const char *sym, size_t len) const;
928
929
    /* enumerate all of the #define symbols through a callback */
930
    void enum_defines(void (*func)(void *ctx, class CTcHashEntryPp *entry),
931
                      void *ctx);
932
933
    /* read the next line and handle preprocessor directives */
934
    int read_line_pp();
935
936
    /* get the file descriptor and line number of the last line read */
937
    class CTcTokFileDesc *get_last_desc() const { return last_desc_; }
938
    long get_last_linenum() const { return last_linenum_; }
939
    void get_last_pos(class CTcTokFileDesc **desc, long *linenum) const
940
    {
941
        *desc = last_desc_;
942
        *linenum = last_linenum_;
943
    }
944
945
    /* 
946
     *   set the current file descriptor and line number -- this can be
947
     *   used to force the line position to a previously-saved value
948
     *   (during code generation, for example) for error-reporting and
949
     *   debug-record purposes 
950
     */
951
    void set_line_info(class CTcTokFileDesc *desc, long linenum)
952
    {
953
        last_desc_ = desc;
954
        last_linenum_ = linenum;
955
    }
956
957
    /* 
958
     *   Parse a preprocessor constant expression.  We always parse out of
959
     *   the macro expansion buffer (expbuf_), but the caller must set p_
960
     *   to point to the starting point on the expansion line prior to
961
     *   calling this routine.
962
     *   
963
     *   If 'read_first' is true, we'll read a token into curtok_ before
964
     *   parsing; otherwise, we'll assume the caller has already primed
965
     *   the pump by reading the first token.
966
     *   
967
     *   If 'last_on_line' is true, we'll flag an error if anything is
968
     *   left on the line after we finish parsing the expression.
969
     *   
970
     *   If 'add_line_ending' is true, we'll add an end-of-line marker to
971
     *   the expansion buffer, so that the tokenizer won't attempt to read
972
     *   past the end of the line.  Since a preprocessor expression must
973
     *   be contained entirely on a single logical line, we must never try
974
     *   to read past the end of the current line when parsing a
975
     *   preprocessor expression.  
976
     */
977
    int pp_parse_expr(class CTcConstVal *result,
978
                      int read_first, int last_on_line, int add_line_ending);
979
980
    /* log an error, optionally with parameters */
981
    static void log_error(int errnum, ...);
982
983
    /* 
984
     *   log an error with the current token text as the parameter,
985
     *   suitable for a "%.*s" format list entry (hence we'll provide two
986
     *   parameters: an integer with the length of the token text, and a
987
     *   pointer to the token text string) 
988
     */
989
    void log_error_curtok(int errnum);
990
991
    /* log a warning, optionally with parameters */
992
    static void log_warning(int errnum, ...);
993
994
    /* log a warning with the current token as the parameter */
995
    void log_warning_curtok(int errnum);
996
997
    /* log a warning or error for the current token */
998
    void log_error_or_warning_curtok(tc_severity_t sev, int errnum);
999
1000
    /* log a warning or error for a given token */
1001
    void log_error_or_warning_with_tok(tc_severity_t sev, int errnum,
1002
                                       const CTcToken *tok);
1003
1004
    /* 
1005
     *   log then throw a fatal error (this is different from an internal
1006
     *   error in that it indicates an unrecoverable error in the input;
1007
     *   an internal error indicates that something is wrong with the
1008
     *   compiler itself)
1009
     */
1010
    static void throw_fatal_error(int errnum, ...);
1011
1012
    /* 
1013
     *   log then throw an internal error (internal errors are always
1014
     *   fatal: these indicate that something has gone wrong in the
1015
     *   compiler, and are equivalent to an assert failure) 
1016
     */
1017
    static void throw_internal_error(int errnum, ...);
1018
1019
    /* display a string/number value */
1020
    void msg_str(const char *str, size_t len) const;
1021
    void msg_long(long val) const;
1022
1023
    /* get the current line */
1024
    const char *get_cur_line() const { return linebuf_.get_text(); }
1025
    size_t get_cur_line_len() const { return linebuf_.get_text_len(); }
1026
1027
    /* get the #define hash table */
1028
    class CVmHashTable *get_defines_table() const { return defines_; }
1029
1030
    /* 
1031
     *   look up a token as a keyword; returns true and fills in 'kw' with
1032
     *   the keyword token ID if the token is in fact a keyword, or
1033
     *   returns false if it's not a keyword 
1034
     */
1035
    int look_up_keyword(const CTcToken *tok, tc_toktyp_t *kw);
1036
1037
    /* 
1038
     *   Get the next token on the line, filling in the token object.
1039
     *   Advances the pointer to the character immediately following the
1040
     *   token.
1041
     *   
1042
     *   If the token is a string, and the string contains backslash
1043
     *   sequences, we'll modify the source string by translating each
1044
     *   backslash sequences; for example, a "\n" sequence is changed into an
1045
     *   ASCII 10.
1046
     *   
1047
     *   'expanding' indicates whether or not we're in the initial macro
1048
     *   expansion pass.  If this is true, we'll suppress error messages
1049
     *   during this pass, as we'll encounter the same tokens again when we
1050
     *   parse the expanded form of the line.  
1051
     */
1052
    static tc_toktyp_t next_on_line(utf8_ptr *p, CTcToken *tok,
1053
                                    int *in_embedding, int expanding);
1054
1055
    /*
1056
     *   Get the text of an operator token.  Returns a pointer to a
1057
     *   constant, static, null-terminated string, suitable for use in
1058
     *   error messages.  
1059
     */
1060
    static const char *get_op_text(tc_toktyp_t op);
1061
1062
    /* 
1063
     *   Store text in the source list.  Text stored here is available
1064
     *   throughout compilation.  This routine automatically reserves the
1065
     *   space needed, so do not call 'reserve' or 'commit' separately.  
1066
     */
1067
    const char *store_source(const char *txt, size_t len);
1068
1069
    /* reserve space for text in the source list */
1070
    void reserve_source(size_t len);
1071
1072
    /* 
1073
     *   Store a piece of text into pre-reserved space in the source list.
1074
     *   This can be used to build up a string from several pieces.  You must
1075
     *   call 'reserve' first to allocate the space, and you must explicitly
1076
     *   add a null terminator at the end of the string.  Do not call
1077
     *   'commit'; this automatically commits the space as each substring is
1078
     *   added. 
1079
     */
1080
    const char *store_source_partial(const char *txt, size_t len);
1081
1082
    /*
1083
     *   Get the index of the next source file descriptor that will be
1084
     *   created.  The linker can use this information to fix up
1085
     *   references to file descriptors in an object file when loading
1086
     *   multiple object files.  
1087
     */
1088
    int get_next_filedesc_index() const { return next_filedesc_id_; }
1089
1090
    /* get the number of source file descriptors in the master list */
1091
    int get_filedesc_count() const { return next_filedesc_id_; }
1092
1093
    /* get the file descriptor at the given (0-based) index */
1094
    class CTcTokFileDesc *get_filedesc(size_t idx) const
1095
    {
1096
        /* return the array entry at the index, if the index is valid */
1097
        return (idx < desc_list_cnt_ ? desc_list_[idx] : 0);
1098
    }
1099
1100
    /* get the head of the master source file descriptor list */
1101
    class CTcTokFileDesc *get_first_filedesc() const { return desc_head_; }
1102
1103
    /* 
1104
     *   Create a new file descriptor and add it to the master list.  This
1105
     *   creates the new descriptor unconditionally, even if a descriptor
1106
     *   for the same source file already exists. 
1107
     */
1108
    class CTcTokFileDesc *create_file_desc(const char *fname, size_t len)
1109
        { return get_file_desc(fname, len, TRUE, fname, len); }
1110
1111
    /*
1112
     *   Set the string capture file.  Once this is set, we'll write the
1113
     *   contents of each string token that we encounter to this file,
1114
     *   with a newline after each token.  
1115
     */
1116
    void set_string_capture(osfildef *fp);
1117
1118
    /* write macros to a file, for debugger use */
1119
    void write_macros_to_file_for_debug(class CVmFile *fp);
1120
1121
    /* 
1122
     *   Load macros from a file.  If any errors occur, we'll flag them
1123
     *   through the error handler object and return a non-zero value.
1124
     *   Returns zero on success.  
1125
     */
1126
    int load_macros_from_file(class CVmStream *fp,
1127
                              class CTcTokLoadMacErr *err_handler);
1128
1129
    /* receive notification that the compiler is done with all parsing */
1130
    void parsing_done()
1131
    {
1132
        /* forget any input file position */
1133
        set_line_info(0, 0);
1134
    }
1135
1136
    /*
1137
     *   Stuff text into the tokenizer source stream.  The new text is
1138
     *   inserted at the current read pointer, so that the next token we
1139
     *   fetch will come from the start of the inserted text.  If 'expand' is
1140
     *   true, we'll expand macros in the text; if not, we'll insert the text
1141
     *   exactly as is with no macro expansion.  
1142
     */
1143
    void stuff_text(const char *txt, size_t len, int expand);
1144
1145
private:
1146
    /* skip whitespace and token markers */
1147
    static void skip_ws_and_markers(utf8_ptr *p);
1148
    
1149
    /* 
1150
     *   get the next token on the line; if we go past the end of the
1151
     *   string buffer, we'll return EOF 
1152
     */
1153
    static tc_toktyp_t next_on_line(const CTcTokString *srcbuf, utf8_ptr *p,
1154
                                    CTcToken *tok, int *in_embedding,
1155
                                    int expanding);
1156
1157
    /* 
1158
     *   get the next token on the current line, updating the internal
1159
     *   character position pointer to point just past the token, and filling
1160
     *   in the internal current token object with the toen data 
1161
     */
1162
    tc_toktyp_t next_on_line()
1163
        { return next_on_line(&p_, &curtok_, 0, FALSE); }
1164
1165
    /* get the next token on the line, with string translation */
1166
    tc_toktyp_t next_on_line_xlat(int *in_embedding)
1167
        { return next_on_line_xlat(&p_, &curtok_, in_embedding); }
1168
1169
    /* 
1170
     *   get the next token, translating strings and storing string and
1171
     *   symbol text in the source block list 
1172
     */
1173
    tc_toktyp_t next_on_line_xlat_keep();
1174
1175
    /* 
1176
     *   get the next token on the line, translating strings to internal
1177
     *   format 
1178
     */
1179
    tc_toktyp_t next_on_line_xlat(utf8_ptr *p, CTcToken *tok,
1180
                                  int *in_embedding);
1181
1182
    /* 
1183
     *   translate a string to internal format by converting escape
1184
     *   sequences; overwrites the original buffer 
1185
     */
1186
    tc_toktyp_t xlat_string(utf8_ptr *p, CTcToken *tok,
1187
                            int *in_embedding);
1188
1189
    /* 
1190
     *   translate a string into a given buffer; if 'force_embed_end' is
1191
     *   true, we'll act as though we're continuing the string after the
1192
     *   '>>' after an embedded expression, no matter what the actual
1193
     *   input looks like 
1194
     */
1195
    tc_toktyp_t xlat_string_to(char *dst, utf8_ptr *p, CTcToken *tok,
1196
                               int *in_embedding, int force_embed_end);
1197
1198
    /* 
1199
     *   Translate a string, saving the translated version in the source
1200
     *   block list.  If 'force_end_embed' is true, we'll act as though we
1201
     *   were looking at '>>' (or, more precisely, we'll act as though
1202
     *   '>>' immediately preceded the current input), regardless of what
1203
     *   the actual input looks like.  
1204
     */
1205
    tc_toktyp_t xlat_string_to_src(int *in_embedding, int force_end_embed);
1206
1207
    /* initialize the source block list */
1208
    void init_src_block_list();
1209
1210
    /* delete current source file, including all including parents */
1211
    void delete_source();
1212
1213
    /* 
1214
     *   Read the next line; processes comments, but does not expand macros
1215
     *   or parse preprocessor directives.  This always reads into linebuf_;
1216
     *   the return value is the offset within linebuf_ of the new text.  A
1217
     *   return value of -1 indicates that we're at end of file.  
1218
     */
1219
    int read_line(int append);
1220
1221
    /* 
1222
     *   Set the source read pointer to the start of a new line, given the
1223
     *   CTcTokString object containing the buffer, and the offset within
1224
     *   that buffer. 
1225
     */
1226
    void start_new_line(CTcTokString *str, int ofs)
1227
    {
1228
        /* remember the buffer we're reading out of */
1229
        curbuf_ = str;
1230
1231
        /* set the read pointer to the start of the new line's text */
1232
        p_.set((char *)str->get_text() + ofs);
1233
    }
1234
1235
    /* unsplice text from the current line and make it the next line */
1236
    void unsplice_line(const char *new_line_start);
1237
1238
    /* 
1239
     *   Commit space in the source list - this is used when text is directly
1240
     *   stored after reserving space.  The size reserved may be greater than
1241
     *   the size committed, because it is sometimes more efficient to make a
1242
     *   guess that may overestimate the amount we actually end up needing.  
1243
     */
1244
    void commit_source(size_t len);
1245
1246
    /* parse a string */
1247
    static tc_toktyp_t tokenize_string(utf8_ptr *p, CTcToken *tok,
1248
                                       int *in_embedding);
1249
1250
    /* process comments */
1251
    void process_comments(size_t start_ofs);
1252
1253
    /* splice lines for a string that runs across multiple lines */
1254
    void splice_string();
1255
1256
    /* expand macros in the current line */
1257
    int expand_macros_curline(int read_more, int allow_defined,
1258
                              int append_to_expbuf);
1259
1260
    /* 
1261
     *   Expand the macros in the given text, filling in the given
1262
     *   CTcTokString with the results.  The expansion will clear out any
1263
     *   existing text in the result buffer.  Returns zero on success, or
1264
     *   non-zero on error.  
1265
     */
1266
    int expand_macros(class CTcTokString *dest, const char *str, size_t len)
1267
    {
1268
        CTcTokStringRef srcbuf;
1269
1270
        /* set up a CTcTokString for the source */
1271
        srcbuf.set_buffer(str, len);
1272
1273
        /* go expand macros */
1274
        return expand_macros(&srcbuf, 0, dest, FALSE, FALSE, FALSE);
1275
    }
1276
1277
    /* expand all of the macros in the given text */
1278
    int expand_macros(class CTcTokString *srcbuf, utf8_ptr *src,
1279
                      class CTcTokString *expbuf, int read_more,
1280
                      int allow_defined, int append);
1281
1282
    /* expand the macro at the current token on the current line */
1283
    int expand_macro(class CTcMacroRsc *res, class CTcTokString *expbuf,
1284
                     const class CTcTokString *srcbuf, utf8_ptr *src,
1285
                     size_t macro_srcbuf_ofs, CTcHashEntryPp *entry,
1286
                     int read_more, int allow_defined, int *expanded);
1287
1288
    /* 
1289
     *   Remove our special expansion flags from an expanded macro buffer.
1290
     *   This can be called after all expansion has been completed to clean
1291
     *   up the buffer for human consumption. 
1292
     */
1293
    void remove_expansion_flags(CTcTokString *buf);
1294
1295
    /* scan for a prior expansion of a macro within the current context */
1296
    static int scan_for_prior_expansion(utf8_ptr src, const char *src_end,
1297
                                        const class CTcHashEntryPp *entry);
1298
1299
    /* remove end-of-macro-expansion flags from a buffer */
1300
    static void remove_end_markers(class CTcTokString *buf);
1301
1302
    /* change a buffer to use individual token full-expansion markers */
1303
    void mark_full_exp_tokens(CTcTokString *dstbuf,
1304
                              const class CTcTokString *srcbuf,
1305
                              int append) const;
1306
1307
    /* allocate a macro expansion resource */
1308
    class CTcMacroRsc *alloc_macro_rsc();
1309
1310
    /* release a macro expansion resource */
1311
    void release_macro_rsc(class CTcMacroRsc *rsc);
1312
1313
    /* 
1314
     *   Parse the actual parameters to a macro.  Fills in argofs[] and
1315
     *   arglen[] with the offsets (from srcbuf->get_buf()) and lengths,
1316
     *   respectively, of each actual parameter's text. 
1317
     */
1318
    int parse_macro_actuals(const class CTcTokString *srcbuf, utf8_ptr *src,
1319
                            const CTcHashEntryPp *macro_entry,
1320
                            size_t argofs[TOK_MAX_MACRO_ARGS],
1321
                            size_t arglen[TOK_MAX_MACRO_ARGS],
1322
                            int read_more, int *found_actuals);
1323
1324
    /* splice the next line for reading more macro actuals */
1325
    tc_toktyp_t actual_splice_next_line(const CTcTokString *srcbuf,
1326
                                        utf8_ptr *src, CTcToken *tok);
1327
1328
    /* substitute the actual parameters in a macro's expansion */
1329
    int substitute_macro_actuals(class CTcMacroRsc *rsc,
1330
                                 class CTcTokString *subexp,
1331
                                 CTcHashEntryPp *macro_entry,
1332
                                 const class CTcTokString *srcbuf,
1333
                                 const size_t *argofs, const size_t *arglen,
1334
                                 int allow_defined);
1335
1336
    /* stringize a macro actual parameter into an expansion buffer */
1337
    void stringize_macro_actual(class CTcTokString *expbuf,
1338
                                const char *actual_val, size_t actual_len,
1339
                                char quote_char, int add_open_quote,
1340
                                int add_close_quote);
1341
1342
    /* skip a delimited macro expansion area (#foreach, #ifempty, etc) */
1343
    void skip_delimited_group(utf8_ptr *p, int parts_to_skip);
1344
1345
    /* expand a defined() preprocessor operator */
1346
    int expand_defined(class CTcTokString *subexp,
1347
                       const class CTcTokString *srcbuf, utf8_ptr *src);
1348
1349
    /* add a file to the list of files to be included only once */
1350
    void add_include_once(const char *fname);
1351
1352
    /* find a file in the list of files to be included only once */
1353
    int find_include_once(const char *fname);
1354
1355
    /* process a #pragma directive */
1356
    void pp_pragma();
1357
1358
    /* process a #charset directive */
1359
    void pp_charset();
1360
1361
    /* process a #include directive */
1362
    void pp_include();
1363
1364
    /* process a #define directive */
1365
    void pp_define();
1366
1367
    /* process a #if directive */
1368
    void pp_if();
1369
1370
    /* process a #ifdef directive */
1371
    void pp_ifdef();
1372
1373
    /* process a #ifdef directive */
1374
    void pp_ifndef();
1375
1376
    /* process a #ifdef or #ifndef */
1377
    void pp_ifdef_or_ifndef(int sense);
1378
1379
    /* process a #else directive */
1380
    void pp_else();
1381
1382
    /* process a #elif directive */
1383
    void pp_elif();
1384
1385
    /* process a #endif directive */
1386
    void pp_endif();
1387
1388
    /* process a #error directive */
1389
    void pp_error();
1390
1391
    /* process a #undef directive */
1392
    void pp_undef();
1393
1394
    /* process a #line directive */
1395
    void pp_line();
1396
1397
    /* get a lone identifier for a preprocessor directive */
1398
    int pp_get_lone_ident(char *buf, size_t bufl);
1399
1400
    /* process a #pragma C directive */
1401
    // void pragma_c(); - not currently used
1402
1403
    /* process a #pragma once directive */
1404
    void pragma_once();
1405
1406
    /* process a #pragma all_once directive */
1407
    void pragma_all_once();
1408
1409
    /* process a #pragma message directive */
1410
    void pragma_message();
1411
1412
    /* process a #pragma newline_spacing(on/off) directive */
1413
    void pragma_newline_spacing();
1414
1415
    /* process a #pragma sourceTextGroup directive */
1416
    void pragma_source_text_group();
1417
1418
    /* 
1419
     *   Determine if we're in a false #if branch.  If we're inside a #if
1420
     *   block, and the state is either IF_NO, IF_DONE, or ELSE_NO, or
1421
     *   we're inside a #if nested within any negative branch, we're in a
1422
     *   not-taken branch of a #if block.  
1423
     */
1424
    int in_false_if() const
1425
    {
1426
        return (if_sp_ != 0
1427
                && (if_false_level_ != 0
1428
                    || if_stack_[if_sp_ - 1].state == TOKIF_IF_NO
1429
                    || if_stack_[if_sp_ - 1].state == TOKIF_IF_DONE
1430
                    || if_stack_[if_sp_ - 1].state == TOKIF_ELSE_NO));
1431
    }
1432
1433
    /* push a new #if level with the given state */
1434
    void push_if(tok_if_t state);
1435
1436
    /* get the current #if state */
1437
    tok_if_t get_if_state() const
1438
    {
1439
        if (if_sp_ == 0)
1440
            return TOKIF_NONE;
1441
        else
1442
            return if_stack_[if_sp_ - 1].state;
1443
    }
1444
1445
    /* switch the current #if level to the given state */
1446
    void change_if_state(tok_if_t state)
1447
    {
1448
        if (if_sp_ != 0)
1449
            if_stack_[if_sp_ - 1].state = state;
1450
    }
1451
1452
    /* pop the current #if level */
1453
    void pop_if();
1454
1455
    /* 
1456
     *   Find or create a descriptor for the given filename.  'fname' is
1457
     *   the full file system path specifying the file.  'orig_fname' is
1458
     *   the filename as originally specified by the user, if different;
1459
     *   in the case of #include files, this indicates the name that was
1460
     *   specified in the directive itself, whereas 'fname' is the actual
1461
     *   filename that resulted from searching the include path for the
1462
     *   given name. 
1463
     */
1464
    class CTcTokFileDesc *get_file_desc(const char *fname, size_t fname_len,
1465
                                        int always_create,
1466
                                        const char *orig_fname,
1467
                                        size_t orig_fname_len);
1468
1469
    /* clear the line buffer */
1470
    void clear_linebuf();
1471
1472
    /* flag: ALL_ONCE mode - we include each file only once */
1473
    int all_once_ : 1;
1474
1475
    /* flag: warn on ignoring a redundant #include file */
1476
    int warn_on_ignore_incl_ : 1;
1477
1478
    /*
1479
     *   Flag: in preprocess-only mode.  In this mode, we'll leave certain
1480
     *   preprocessor directives intact in the source, since they'll be
1481
     *   needed in a subsequent compilation of the preprocessed source.
1482
     *   For example, we'll leave #line directives, #pragma C, #error, and
1483
     *   #pragma message directives in the preprocessed result.  
1484
     */
1485
    int pp_only_mode_ : 1;
1486
1487
    /* 
1488
     *   Flag: in test reporting mode.  In this mode, we'll expand __FILE__
1489
     *   macros with the root name only. 
1490
     */
1491
    int test_report_mode_ : 1;
1492
1493
    /*
1494
     *   Flag: in preprocess-for-includes mode.  In this mode, we'll do
1495
     *   nothing except run the preprocessor and generate a list of the
1496
     *   header files that are included, along with header files they
1497
     *   include, and so on.  
1498
     */
1499
    int list_includes_mode_ : 1;
1500
1501
    /*
1502
     *   Flag: treat newlines in strings as whitespace.  When this is true,
1503
     *   whenever we find a newline character in a string, we'll convert the
1504
     *   newline and all leading whitespace on the next line to a single
1505
     *   space character.  When this is false, we'll entirely strip out each
1506
     *   newline in a string and all whitespace that immediately follows;
1507
     *   this mode is desirable for some languages, such as Chinese, where
1508
     *   whitespace is not conventionally used as a token separator in
1509
     *   ordinary text.  
1510
     */
1511
    int string_newline_spacing_ : 1;
1512
1513
    /* 
1514
     *   flag: we're parsing a preprocessor constant expression (for a
1515
     *   #if, for example; this doesn't apply to simple macro expansion) 
1516
     */
1517
    int in_pp_expr_ : 1;
1518
1519
    /* resource loader */
1520
    class CResLoader *res_loader_;
1521
1522
    /* 
1523
     *   name of our default character set - this is generally specified
1524
     *   by the user (on the compiler command line, for example), or
1525
     *   obtained from the operating system 
1526
     */
1527
    char *default_charset_;
1528
1529
    /* input (to unicode) character mapper for the default character set */
1530
    class CCharmapToUni *default_mapper_;
1531
1532
    /* head of list of previously-included files */
1533
    struct tctok_incfile_t *prev_includes_;
1534
1535
    /* head and tail of include path list */
1536
    struct tctok_incpath_t *incpath_head_;
1537
    struct tctok_incpath_t *incpath_tail_;
1538
1539
    /* file descriptor and line number of last line read */
1540
    class CTcTokFileDesc *last_desc_;
1541
    long last_linenum_;
1542
1543
    /* file descriptor and line number of last line appended */
1544
    class CTcTokFileDesc *appended_desc_;
1545
    long appended_linenum_;
1546
1547
    /* current input stream */
1548
    class CTcTokStream *str_;
1549
1550
    /* master list of file descriptors */
1551
    class CTcTokFileDesc *desc_head_;
1552
    class CTcTokFileDesc *desc_tail_;
1553
1554
    /* 
1555
     *   array of file descriptors (we keep the list in both an array and
1556
     *   a linked list, since we need both sequential and indexed access;
1557
     *   this isn't a lot of trouble since we never need to remove an
1558
     *   entry from the list) 
1559
     */
1560
    class CTcTokFileDesc **desc_list_;
1561
1562
    /* number of entries in desc_list_ */
1563
    size_t desc_list_cnt_;
1564
1565
    /* number of slots allocated in desc_list_ array */
1566
    size_t desc_list_alo_;
1567
1568
    /* next file descriptor ID to be assigned */
1569
    int next_filedesc_id_;
1570
1571
    /* pointer to current position in current line */
1572
    utf8_ptr p_;
1573
1574
    /* 
1575
     *   The CTcTokString object containing the current line.  This is the
1576
     *   buffer object we're currently reading from, and will be either
1577
     *   linebuf_ or expbuf_.  p_ always points into this buffer.  
1578
     */
1579
    CTcTokString *curbuf_;
1580
1581
    /* raw file input buffer */
1582
    CTcTokString linebuf_;
1583
1584
    /* 
1585
     *   unsplice buffer - we'll put any unspliced text into this buffer,
1586
     *   then read it back at the next read_line() 
1587
     */
1588
    CTcTokString unsplicebuf_;
1589
1590
    /* macro expansion buffer */
1591
    CTcTokString expbuf_;
1592
1593
    /* 
1594
     *   Flag: in a string.  If this is '\0', we're not in a string;
1595
     *   otherwise, this is the quote character that ends the string.
1596
     */
1597
    wchar_t in_quote_;
1598
1599
    /* flag: in an embedded expression during line processing */
1600
    uint comment_in_embedding_ : 1;
1601
1602
    /* flag: macro processing token stream is in an embedded expression */
1603
    int macro_in_embedding_;
1604
1605
    /* flag: main token stream is in an embedded expression */
1606
    int main_in_embedding_;
1607
1608
    /* 
1609
     *   #if state stack.  if_sp_ is the index of the next nesting slot;
1610
     *   if if_sp_ is zero, it means that we're not in a #if at all.
1611
     *   
1612
     *   Separately, the if_false_level_ is the level of #if's contained
1613
     *   within a false #if branch.  This is separate because, once we're
1614
     *   in a false #if branch, everything within it is false.
1615
     */
1616
    int if_sp_;
1617
    tok_if_info_t if_stack_[TOK_MAX_IF_NESTING];
1618
    int if_false_level_;
1619
1620
    /* source block list head */
1621
    CTcTokSrcBlock *src_head_;
1622
1623
    /* current (and last) source block */
1624
    CTcTokSrcBlock *src_cur_;
1625
1626
    /* pointer to next available byte in the current source block */
1627
    char *src_ptr_;
1628
1629
    /* number of bytes remaining in the current source block */
1630
    size_t src_rem_;
1631
1632
    /* current token */
1633
    CTcToken curtok_;
1634
1635
    /* previous token (for unget) */
1636
    CTcToken prvtok_;
1637
1638
    /* 
1639
     *   next token, if a token has been un-gotten, and a flag indicating
1640
     *   that this is indeed the case. 
1641
     */
1642
    CTcToken nxttok_;
1643
    unsigned int nxttok_valid_ : 1;
1644
1645
    /* the external token source, if any */
1646
    CTcTokenSource *ext_src_;
1647
1648
    /* symbol table for #define symbols */
1649
    class CVmHashTable *defines_;
1650
1651
    /* 
1652
     *   symbol table for symbols explicitly undefined; we keep track of
1653
     *   these so that we can exclude anything ever undefined from the debug
1654
     *   macro records, since only static global macros can be handled in the
1655
     *   debug records 
1656
     */
1657
    class CVmHashTable *undefs_;
1658
1659
    /* symbol table for TADS keywords */
1660
    class CVmHashTable *kw_;
1661
1662
    /* head of macro resource pool list */
1663
    class CTcMacroRsc *macro_res_head_;
1664
1665
    /* head of list of available macro resources */
1666
    class CTcMacroRsc *macro_res_avail_;
1667
1668
    /* 
1669
     *   string capture file - if this is non-null, we'll capture all of
1670
     *   the strings we read to this file, one string per line 
1671
     */
1672
    osfildef *string_fp_;
1673
1674
    /* character mapper for writing to the string capture file */
1675
    class CCharmapToLocal *string_fp_map_;
1676
1677
    /* true -> allow preprocessor directives */
1678
    unsigned int allow_pp_;
1679
};
1680
1681
/* ------------------------------------------------------------------------ */
1682
/*
1683
 *   Error handler interface.  Callers of load_macros_from_file() in
1684
 *   CTcTokenizer must provide an implementation of this interface to handle
1685
 *   errors that occur while loading macros.  
1686
 */
1687
class CTcTokLoadMacErr
1688
{
1689
public:
1690
    /* 
1691
     *   Flag an error.  The error codes are taken from the following list:
1692
     *   
1693
     *   1 - a macro name symbol in the file is too long (it exceeds the
1694
     *   maximum symbol length for the preprocessor)
1695
     *   
1696
     *   2 - a formal parameter name is too long 
1697
     */
1698
    virtual void log_error(int err) = 0;
1699
};
1700
1701
/* ------------------------------------------------------------------------ */
1702
/*
1703
 *   Tokenizer File Descriptor.  Each unique source file has a separate
1704
 *   file descriptor, which keeps track of the file's name. 
1705
 */
1706
class CTcTokFileDesc
1707
{
1708
public:
1709
    /* create a file descriptor */
1710
    CTcTokFileDesc(const char *fname, size_t fname_len, int index,
1711
                   CTcTokFileDesc *orig_desc,
1712
                   const char *orig_fname, size_t orig_fname_len);
1713
1714
    /* delete the descriptor */
1715
    ~CTcTokFileDesc();
1716
1717
    /* get the filename */
1718
    const char *get_fname() const { return fname_; }
1719
1720
    /* get the original filename string */
1721
    const char *get_orig_fname() const { return orig_fname_; }
1722
1723
    /* 
1724
     *   get the filename as a double-quoted string (backslashes and
1725
     *   double-quotes will be escaped with backslashes) 
1726
     */
1727
    const char *get_dquoted_fname() const { return dquoted_fname_; }
1728
1729
    /* 
1730
     *   get the root filename (i.e., with no path prefix) as a
1731
     *   double-quoted string 
1732
     */
1733
    const char *get_dquoted_rootname() const { return dquoted_rootname_; }
1734
1735
    /* get the filename as a single-quoted string */
1736
    const char *get_squoted_fname() const { return squoted_fname_; }
1737
1738
    /* get the root filename as a single-quoted string */
1739
    const char *get_squoted_rootname() const { return squoted_rootname_; }
1740
1741
    /* get/set the next file descriptor in the descriptor chain */
1742
    CTcTokFileDesc *get_next() const { return next_; }
1743
    void set_next(CTcTokFileDesc *nxt) { next_ = nxt; }
1744
1745
    /* get my index in the master list */
1746
    int get_index() const { return index_; }
1747
1748
    /* get the original descriptor for this file in the list */
1749
    CTcTokFileDesc *get_orig() const { return orig_; }
1750
1751
    /* 
1752
     *   get the list index of the original entry (returns my own list
1753
     *   index if I am the original entry) 
1754
     */
1755
    int get_orig_index() const
1756
        { return orig_ == 0 ? index_ : orig_->get_index(); }
1757
1758
    /* 
1759
     *   Add a source line position to our list.  We keep an index of the
1760
     *   byte-code address for each executable source line, so that
1761
     *   debuggers can find the compiled code corresponding to a source
1762
     *   location.  The image builder gives us this information during the
1763
     *   linking process.  The address is the absolute location in the
1764
     *   image file of the executable code for the given source line (the
1765
     *   first line in the file is numbered 1).  
1766
     */
1767
    void add_source_line(ulong linenum, ulong line_addr);
1768
1769
    /* 
1770
     *   Enumerate the source lines, calling the callback for each one.
1771
     *   We will only enumerate source lines which actually have an
1772
     *   associated code location - source lines that generated no
1773
     *   executable code are skipped.  We'll enumerate the lines in
1774
     *   ascending order of line number, and each line number will appear
1775
     *   only once.  
1776
     */
1777
    void enum_source_lines(void (*cbfunc)(void *ctx, ulong linenum,
1778
                                          ulong byte_code_addr),
1779
                           void *cbctx);
1780
    
1781
private:
1782
    /* index in the master list */
1783
    int index_;
1784
    
1785
    /* filename string - this is the actual file system filename */
1786
    char *fname_;
1787
1788
    /* 
1789
     *   original filename string, if different from fname_ - this is the
1790
     *   filename as specified by the user, before it was adjusted with
1791
     *   include paths or other extra location information 
1792
     */
1793
    char *orig_fname_;
1794
1795
    /* double-quoted version of the filename */
1796
    char *dquoted_fname_;
1797
1798
    /* single-quoted version of the filename */
1799
    char *squoted_fname_;
1800
1801
    /* single-quoted version of the root filename */
1802
    char *squoted_rootname_;
1803
1804
    /* double-quoted version of the root filename */
1805
    char *dquoted_rootname_;
1806
1807
    /* next descriptor in the master descriptor list */
1808
    CTcTokFileDesc *next_;
1809
1810
    /* 
1811
     *   The original file descriptor with the same filename.  If we
1812
     *   create multiple descriptors for the same filename (because, for
1813
     *   example, the same header is included in several different object
1814
     *   files), we'll keep track of the original descriptor for the file
1815
     *   in all of the copies. 
1816
     */
1817
    CTcTokFileDesc *orig_;
1818
1819
    /* source line pages */
1820
    struct CTcTokSrcPage **src_pages_;
1821
1822
    /* number of source line page slots allocated */
1823
    size_t src_pages_alo_;
1824
};
1825
1826
1827
/* ------------------------------------------------------------------------ */
1828
/*
1829
 *   Tokenizer Input Stream 
1830
 */
1831
class CTcTokStream
1832
{
1833
public:
1834
    /* create a token stream */
1835
    CTcTokStream(class CTcTokFileDesc *desc, class CTcSrcObject *src,
1836
                 CTcTokStream *parent, int charset_error,
1837
                 int init_if_level);
1838
1839
    /* delete the stream */
1840
    ~CTcTokStream();
1841
    
1842
    /* get/set the associated file descriptor */
1843
    class CTcTokFileDesc *get_desc() const { return desc_; }
1844
    void set_desc(class CTcTokFileDesc *desc) { desc_ = desc; }
1845
1846
    /* get the underlying source file */
1847
    class CTcSrcObject *get_src() const { return src_; }
1848
1849
    /* get the line number of the next line to be read */
1850
    long get_next_linenum() const { return next_linenum_; }
1851
1852
    /* set the next line number */
1853
    void set_next_linenum(long l) { next_linenum_ = l; }
1854
1855
    /* get the enclosing stream */
1856
    CTcTokStream *get_parent() const { return parent_; }
1857
1858
    /* count having read a line */
1859
    void count_line() { ++next_linenum_; }
1860
1861
    /* was there a #charset error when opening the file? */
1862
    int get_charset_error() const { return charset_error_; }
1863
1864
    /* get/set the in-comment status */
1865
    int is_in_comment() const { return in_comment_; }
1866
    void set_in_comment(int f) { in_comment_ = f; }
1867
1868
    /* get/set the pragma C mode */
1869
    // int is_pragma_c() const { return pragma_c_; }
1870
    // void set_pragma_c(int f) { pragma_c_ = f; }
1871
1872
    /* get/set if nesting level at the start of the file */
1873
    int get_init_if_level() const { return init_if_level_; }
1874
    void set_init_if_level(int level) { init_if_level_ = level; }
1875
1876
    /* get/set the newline spacing mode */
1877
    int get_newline_spacing() const { return newline_spacing_; }
1878
    void set_newline_spacing(int f) { newline_spacing_ = f; }
1879
1880
private:
1881
    /* file descriptor associated with this file */
1882
    class CTcTokFileDesc *desc_;
1883
    
1884
    /* the underlying source reader */
1885
    class CTcSrcObject *src_;
1886
1887
    /* 
1888
     *   the enclosing stream - this is the stream that #include'd the
1889
     *   current stream 
1890
     */
1891
    CTcTokStream *parent_;
1892
1893
    /* line number of next line to be read */
1894
    ulong next_linenum_;
1895
1896
    /* #if nesting level at the start of the file */
1897
    int init_if_level_;
1898
1899
    /* flag: we were unable to load the map in the #charset directive */
1900
    uint charset_error_ : 1;
1901
1902
    /* the stream is in a multi-line comment */
1903
    uint in_comment_ : 1;
1904
1905
    /* newline_spacing mode when the stream was stacked */
1906
    uint newline_spacing_ : 1;
1907
1908
    /* flag: we're in #pragma C+ mode */
1909
    // uint pragma_c_ : 1; - #pragma C is not currently used
1910
};
1911
1912
/* ------------------------------------------------------------------------ */
1913
/*
1914
 *   Keyword Hash Table Entry 
1915
 */
1916
class CTcHashEntryKw: public CVmHashEntryCS
1917
{
1918
public:
1919
    CTcHashEntryKw(const textchar_t *str, tc_toktyp_t tokid)
1920
        : CVmHashEntryCS(str, strlen(str), FALSE)
1921
    {
1922
        /* save the token ID for the keyword */
1923
        tokid_ = tokid;
1924
    }
1925
1926
    /* get the token ID */
1927
    tc_toktyp_t get_tok_id() const { return tokid_; }
1928
1929
private:
1930
    /* our token ID */
1931
    tc_toktyp_t tokid_;
1932
};
1933
1934
/* ------------------------------------------------------------------------ */
1935
/*
1936
 *   basic #define symbol table entry 
1937
 */
1938
class CTcHashEntryPp: public CVmHashEntryCS
1939
{
1940
public:
1941
    CTcHashEntryPp(const textchar_t *str, size_t len, int copy)
1942
        : CVmHashEntryCS(str, len, copy)
1943
    {
1944
        /* by default, we have no arguments */
1945
        has_args_ = FALSE;
1946
        has_varargs_ = FALSE;
1947
        argc_ = 0;
1948
        argv_ = 0;
1949
        params_table_ = 0;
1950
    }
1951
1952
    /* get the expansion text */
1953
    virtual const char *get_expansion() const = 0;
1954
    virtual size_t get_expan_len() const = 0;
1955
1956
    /* certain special macros (__LINE__, __FILE__) aren't undef'able */
1957
    virtual int is_undefable() const { return TRUE; }
1958
1959
    /* 
1960
     *   most macros are real symbols, created by #define's, but some are
1961
     *   special pseudo-macros, like __LINE__ and __FILE__, that the
1962
     *   preprocessor provides 
1963
     */
1964
    virtual int is_pseudo() const { return FALSE; }
1965
1966
    /* does the macro have an argument list? */
1967
    int has_args() const { return has_args_; }
1968
1969
    /* get the number of arguments */
1970
    int get_argc() const { return argc_; }
1971
1972
    /* do we have a variable number of arguments? */
1973
    int has_varargs() const { return has_varargs_; }
1974
1975
    /* 
1976
     *   get the minimum number of allowed arguments - if we have varargs,
1977
     *   this is one less than the number of formals listed, since the last
1978
     *   formal can correspond to any number of actuals, including zero 
1979
     */
1980
    int get_min_argc() const { return has_varargs_ ? argc_ - 1 : argc_; }
1981
1982
    /* get the name of an argument by position (0 = first argument) */
1983
    const char *get_arg_name(int idx) const { return argv_[idx]; }
1984
1985
    /* get the parameter hash table entry for the parameter */
1986
    class CTcHashEntryPpArg *get_arg_entry(int idx) const
1987
        { return arg_entry_[idx]; }
1988
1989
    /* get the parameters hash table */
1990
    const CVmHashTable *get_params_table() const { return params_table_; }
1991
1992
protected:
1993
    /* argument list */
1994
    char **argv_;
1995
1996
    /* list of parameter hash entries */
1997
    class CTcHashEntryPpArg **arg_entry_;
1998
1999
    /* parameter hash table */
2000
    CVmHashTable *params_table_;
2001
2002
    /* argument count */
2003
    int argc_;
2004
2005
    /* flag: the macro has a parameter list */
2006
    uint has_args_ : 1;
2007
2008
    /* 
2009
     *   flag: the parameter list takes a variable number of arguments; if
2010
     *   this is set, then argc_ is one greater than the minimum number of
2011
     *   arguments required, and the last formal receives the varying part
2012
     *   of the actual parameter list, which can contain zero or more
2013
     *   actuals 
2014
     */
2015
    uint has_varargs_ : 1;
2016
};
2017
2018
/*
2019
 *   #define symbol hash table entry
2020
 */
2021
class CTcHashEntryPpDefine: public CTcHashEntryPp
2022
{
2023
public:
2024
    /* 
2025
     *   Create the hash entry.  argc is the number of arguments to the
2026
     *   macro, and argv is an array of pointers to null-terminated
2027
     *   strings with the argument names, in the order defined in the
2028
     *   macro.
2029
     *   
2030
     *   If has_args is false, the macro does not take a parameter list at
2031
     *   all.  Note that it is possible for has_args to be true and argc
2032
     *   to be zero, because a macro can be defined to take an argument
2033
     *   list with no arguments (i.e., empty parens).  A macro with an
2034
     *   empty argument list is distinct from a macro with no argument
2035
     *   list: in the former case, the empty parens are required, and are
2036
     *   removed from the input stream and replaced with the macro's
2037
     *   expansion.
2038
     *   
2039
     *   We'll make a copy of the argument list vector, strings, and
2040
     *   expansion text, so the caller is free to forget all of that after
2041
     *   creating the entry instance.  
2042
     */
2043
    CTcHashEntryPpDefine(const textchar_t *str, size_t len, int copy,
2044
                         int has_args, int argc, int has_varargs,
2045
                         const char **argv, const size_t *argvlen,
2046
                         const char *expansion, size_t expan_len);
2047
2048
    ~CTcHashEntryPpDefine();
2049
2050
    /* get the expansion text and its length */
2051
    const char *get_expansion() const { return expan_; }
2052
    size_t get_expan_len() const { return expan_len_; }
2053
2054
private:
2055
    /* expansion */
2056
    char *expan_;
2057
    size_t expan_len_;
2058
};
2059
2060
2061
/*
2062
 *   Hash table entry for __FILE__ and __LINE__
2063
 */
2064
class CTcHashEntryPpSpecial: public CTcHashEntryPp
2065
{
2066
public:
2067
    CTcHashEntryPpSpecial(CTcTokenizer *tok, const char *str)
2068
        : CTcHashEntryPp(str, strlen(str), FALSE)
2069
    {
2070
        /* remember my tokenizer */
2071
        tok_ = tok;
2072
    }
2073
2074
    /* these special macros are not undef'able */
2075
    virtual int is_undefable() const { return FALSE; }
2076
2077
    /* special macros are pseudo-macros provided by the preprocessor */
2078
    virtual int is_pseudo() const { return TRUE; }
2079
2080
protected:
2081
    /* my tokenizer */
2082
    CTcTokenizer *tok_;
2083
};
2084
2085
class CTcHashEntryPpFILE: public CTcHashEntryPpSpecial
2086
{
2087
public:
2088
    CTcHashEntryPpFILE(CTcTokenizer *tok)
2089
        : CTcHashEntryPpSpecial(tok, "__FILE__") { }
2090
2091
    /* our expansion is the current filename, in single quotes */
2092
    const char *get_expansion() const { return get_base_text(); }
2093
    size_t get_expan_len() const { return strlen(get_base_text()); }
2094
2095
private:
2096
    /* get our expansion base text */
2097
    const char *get_base_text() const
2098
    {
2099
        /* 
2100
         *   if we're in test-report mode, use the root name only;
2101
         *   otherwise, use the full name with path 
2102
         */
2103
        if (tok_->get_test_report_mode())
2104
            return tok_->get_last_desc()->get_squoted_rootname();
2105
        else
2106
            return tok_->get_last_desc()->get_squoted_fname();
2107
    }
2108
};
2109
2110
class CTcHashEntryPpLINE: public CTcHashEntryPpSpecial
2111
{
2112
public:
2113
    CTcHashEntryPpLINE(CTcTokenizer *tok)
2114
        : CTcHashEntryPpSpecial(tok, "__LINE__") { }
2115
2116
    /* our expansion is the line number as a decimal string */
2117
    const char *get_expansion() const
2118
        { gen_expansion(tok_); return buf_; }
2119
    size_t get_expan_len() const
2120
        { gen_expansion(tok_); return strlen(buf_); }
2121
2122
private:
2123
    /* generate the expansion text into our internal buffer */
2124
    static void gen_expansion(CTcTokenizer *tok)
2125
        { sprintf(buf_, "%ld", tok->get_last_linenum()); }
2126
2127
    /* internal buffer */
2128
    static char buf_[20];
2129
};
2130
2131
2132
/*
2133
 *   Hash entry for preprocessor arguments 
2134
 */
2135
class CTcHashEntryPpArg: public CVmHashEntryCS
2136
{
2137
public:
2138
    CTcHashEntryPpArg(const char *str, size_t len, int copy, int argnum)
2139
        : CVmHashEntryCS(str, len, copy)
2140
    {
2141
        /* remember the argument number */
2142
        argnum_ = argnum;
2143
    }
2144
2145
    /* get my argument number */
2146
    int get_argnum() const { return argnum_; }
2147
2148
private:
2149
    /* argument number */
2150
    int argnum_;
2151
};
2152
2153
2154
/* ------------------------------------------------------------------------ */
2155
/*
2156
 *   Previously-included file list entry.  Each time we include a file,
2157
 *   we'll add an entry to a list of files; in the future, we'll consult
2158
 *   this list to ensure that we don't include the same file again. 
2159
 */
2160
struct tctok_incfile_t
2161
{
2162
    /* next entry in the list of previously-included files */
2163
    tctok_incfile_t *nxt;
2164
2165
    /* name of this file (we'll allocate memory to hold the name) */
2166
    char fname[1];
2167
};
2168
2169
/* ------------------------------------------------------------------------ */
2170
/*
2171
 *   Include path list entry.  This structure defines one include path; we
2172
 *   maintain a list of these structures.  
2173
 */
2174
struct tctok_incpath_t
2175
{
2176
    /* next entry in the list */
2177
    tctok_incpath_t *nxt;
2178
2179
    /* path */
2180
    char path[1];
2181
};
2182
2183
#endif /* TCTOK_H */
2184