cfad47cfa3/t3compiler/tads3/tcpnbase.h

4b825dc642cb6eb9a060e54bf8d69288fbee4904cfad47cfa334b206c65f22086bcc5d63e6f70944
1
/* $Header: d:/cvsroot/tads/tads3/TCPNBASE.H,v 1.3 1999/07/11 00:46:53 MJRoberts Exp $ */
2
3
/* 
4
 *   Copyright (c) 1999, 2002 Michael J. Roberts.  All Rights Reserved.
5
 *   
6
 *   Please see the accompanying license file, LICENSE.TXT, for information
7
 *   on using and copying this software.  
8
 */
9
/*
10
Name
11
  tcpn.h - Parse Node - base class
12
Function
13
  Defines the target-independent base class for parse nodes
14
Notes
15
  All expression parse nodes are derived from the target-specific
16
  subclass of this class.  The target-independent base class is
17
  CTcPrsNodeBase; the target-specific class is CTcPrsNode.
18
Modified
19
  05/10/99 MJRoberts  - Creation
20
*/
21
22
#ifndef TCPN_H
23
#define TCPN_H
24
25
#include "vmhash.h"
26
27
/* ------------------------------------------------------------------------ */
28
/*
29
 *   Parse Tree Allocation Object.  This is a base class that can be used
30
 *   for tree objects that are to be allocated from the parser node pool. 
31
 */
32
class CTcPrsAllocObj
33
{
34
public:
35
    /*
36
     *   Override operator new() - allocate all parse node objects out of
37
     *   the parse node pool.  
38
     */
39
    void *operator new(size_t siz);
40
};
41
42
43
/* ------------------------------------------------------------------------ */
44
/*
45
 *   adjust_for_debug() information structure 
46
 */
47
struct tcpn_debug_info
48
{
49
    /* true -> speculative evaluation mode */
50
    int speculative;
51
52
    /* 
53
     *   stack level - 0 is the active level, 1 is the first enclosing
54
     *   level, and so on 
55
     */
56
    int stack_level;
57
};
58
59
/* ------------------------------------------------------------------------ */
60
/*
61
 *   Parse Tree Expression Node - base class.  As we parse an expression,
62
 *   we build a tree of these objects to describe the source code.
63
 *   
64
 *   This class is subclassed for each type of parsing node: each type of
65
 *   statement has a node type, some statements have helper node types for
66
 *   parts of statements, and each expression operator has a node type.
67
 *   These subclasses contain the information specific to the type of
68
 *   parsing construct represented.
69
 *   
70
 *   Each parsing subclass is then further subclassed for each target
71
 *   architecture.  This final subclass contains the code generator for
72
 *   the node in the target architecture.
73
 *   
74
 *   The target-independent base version of each subclass is called
75
 *   CTPNXxxBase.  The target-specific subclass derived from this base
76
 *   class is CTPNXxx.  For example, the final subclass for constant
77
 *   nodes, which is derived from the target-independent base class
78
 *   CTPNConstBase, is CTPNConst.  (Note that each target uses the same
79
 *   name for the final subclass, so we can only link one target
80
 *   architecture into a given build of the compiler.  Each additional
81
 *   target requires a separate compiler executable with the appropriate
82
 *   CTPNConst classes linked in.)  
83
 */
84
class CTcPrsNodeBase: public CTcPrsAllocObj
85
{
86
public:
87
    /* 
88
     *   Generate code for the expression for the target architecture.
89
     *   This method is defined only by the final target-specific
90
     *   subclasses.
91
     *   
92
     *   This method is used to generate code to evaluate the expression
93
     *   as an rvalue.
94
     *   
95
     *   If 'discard' is true, it indicates that any value yielded by the
96
     *   expression will not be used, in which case the generated code
97
     *   need not leave the result of the expression on the stack.  We can
98
     *   generate code more efficiently for certain types of expressions
99
     *   when we know that we're evaluating them only for side effects.
100
     *   For example, an assignment expression has a result value, but
101
     *   this value need not be pushed onto the stack if it will simply be
102
     *   discarded.  Also, an operator like "+" that has no side effects
103
     *   of its own can merely evaluate its operands for their side
104
     *   effects, but need not compute its own result if that result would
105
     *   simply be discarded.
106
     *   
107
     *   If 'for_condition' is true, it indicates that the result of the
108
     *   expression will be used directly for a conditional of some kind
109
     *   (for a "?:" operator, an "if" statement, a "while" statement, or
110
     *   the like).  In some cases, we can avoid extra conversions to some
111
     *   values when they're going to be used directly for a comparison;
112
     *   for example, the "&&" operator must return a true/nil value, but
113
     *   the code generator may be able to avoid the extra conversion when
114
     *   the value will be used for an "if" statement's conditional value.
115
     */
116
    virtual void gen_code(int discard, int for_condition) = 0;
117
118
    /*
119
     *   Get the constant value of the parse node, if available.  Most
120
     *   parse nodes have no constant value, so by default this returns
121
     *   null.  Only constant parse nodes can provide a constant value, so
122
     *   they should override this.  
123
     */
124
    virtual class CTcConstVal *get_const_val() { return 0; }
125
126
    /* determine if the node has a constant value */
127
    int is_const() { return get_const_val() != 0; }
128
129
    /* determine if I have a given constant integer value */
130
    int is_const_int(int val)
131
    {
132
        return (is_const()
133
                && get_const_val()->get_type() == TC_CVT_INT
134
                && get_const_val()->get_val_int() == val);
135
    }
136
137
    /*
138
     *   Set the constant value of the parse node from that of another
139
     *   node.  The caller must already have checked that this node and
140
     *   the value being assigned are both valid constant values.  
141
     */
142
    void set_const_val(class CTcPrsNode *src)
143
    {
144
        /* set my constant value from the source's constant value */
145
        get_const_val()->set(((CTcPrsNodeBase *)src)->get_const_val());
146
    }
147
148
    /*
149
     *   Check to see if this expression can possibly be a valid lvalue.
150
     *   Return true if so, false if not.  This check is made before
151
     *   symbol resolution; when it is not certain whether or not a symbol
152
     *   expression can be an lvalue, assume it can be at this point.  By
153
     *   default, we'll return false; operator nodes whose result can be
154
     *   used as an lvalue should override this to return true.  
155
     */
156
    virtual int check_lvalue() const { return FALSE; }
157
158
    /*
159
     *   Check to see if this expression is an valid lvalue, after
160
     *   resolving symbols in the given scope.  Returns true if so, false
161
     *   if not. 
162
     */
163
    virtual int check_lvalue_resolved(class CTcPrsSymtab *symtab) const
164
        { return FALSE; }
165
166
    /*
167
     *   Check to see if this expression can possibly be a valid address
168
     *   value, so that the address-of ("&") operator can be applied.
169
     *   Returns true if it is possible, false if not.  The only type of
170
     *   expression whose address can be taken is a simple symbol.  The
171
     *   address of a symbol can be taken only if the symbol is a function
172
     *   or property name, but we won't know this at parse time, so we'll
173
     *   indicate that any symbol is acceptable.  By default, this returns
174
     *   false, since the address of most expressions cannot be taken.  
175
     */
176
    virtual int has_addr() const { return FALSE; }
177
178
    /*
179
     *   Check to see if this expression is an address expression of some
180
     *   kind (i.e., of class CTPNAddrBase, or of a class derived from
181
     *   CTPNAddrBase).  Returns true if so, false if not.  
182
     */
183
    virtual int is_addr() const { return FALSE; }
184
185
    /*
186
     *   Determine if this node is of type double-quoted string (dstring).
187
     *   Returns true if so, false if not.  By default, we return false.
188
     */
189
    virtual int is_dstring() const { return FALSE; }
190
191
    /*
192
     *   Determine if this is a simple assignment operator node.  Returns
193
     *   true if so, false if not.  By default, we return false. 
194
     */
195
    virtual int is_simple_asi() const { return FALSE; }
196
197
    /*
198
     *   Determine if this node yields a value when evaluated.  Returns
199
     *   true if so, false if not.  When it cannot be determined at
200
     *   compile-time whether or not the node has a value (for example,
201
     *   for a call to a pointer to a function whose return type is not
202
     *   declared), this should indicate that a value is returned.
203
     *   
204
     *   Most nodes yield a value when executed, so we'll return true by
205
     *   default.  
206
     */
207
    virtual int has_return_value() const { return TRUE; }
208
209
    /*
210
     *   Determine if this node yields a return value when called as a
211
     *   function.  We assume by default that it does. 
212
     */
213
    virtual int has_return_value_on_call() const { return TRUE; }
214
215
    /*
216
     *   Get the text of the symbol for this node, if any.  If the node is
217
     *   not some kind of symbol node, this returns null.  
218
     */
219
    virtual const textchar_t *get_sym_text() const { return 0; }
220
    virtual size_t get_sym_text_len() const { return 0; }
221
222
    /*
223
     *   Fold constant expressions, given a finished symbol table.  We do
224
     *   most of our constant folding during the initial parsing, but some
225
     *   constant folding must wait until the symbol table is finished; in
226
     *   particular, we can't figure out what to do with symbols until we
227
     *   know what the symbols mean.
228
     *   
229
     *   For most nodes, this function should merely recurse into subnodes
230
     *   and fold constants.  Nodes that are affected by symbol
231
     *   resolution, directly or indirectly, should override this.
232
     *   
233
     *   For example, a list can change from unknown to constant during
234
     *   this operation.  If the list contains a symbol, the list will
235
     *   initially be set to unknown, since the symbol could turn out to
236
     *   be a property evaluation, which would be non-constant, or an
237
     *   object name, which would be constant.
238
     *   
239
     *   Returns the folded version of the node, or simply 'this' if no
240
     *   folding takes place.  
241
     */
242
    virtual class CTcPrsNode *fold_constants(class CTcPrsSymtab *symtab) = 0;
243
244
    /* 
245
     *   generate a constant value node for the address of this node;
246
     *   returns null if the symbol has no address 
247
     */
248
    virtual class CTcPrsNode *fold_addr_const(class CTcPrsSymtab *)
249
    {
250
        /* by default, we have no address */
251
        return 0;
252
    }
253
254
    /*
255
     *   Adjust the expression for use as a debugger expression.  Code
256
     *   generation for debugger expressions is somewhat different than
257
     *   for normal expressions; this routine should allocate a new node,
258
     *   if necessary, for debugger use.  Returns the current node if no
259
     *   changes are necessary, or a new node if changes are needed.
260
     *   
261
     *   If 'speculative' is true, the expression is being evaluated
262
     *   speculatively by the debugger.  This means that the user hasn't
263
     *   explicitly asked for the expression to be evaluated, but rather
264
     *   the debugger is making a guess that the expression might be of
265
     *   interest to the user and is making an unsolicited attempt to
266
     *   offer it to the user.  Because the debugger is only guessing that
267
     *   the expression is interesting, the expression must not be
268
     *   evaluated if it has any side effects at all.  
269
     */
270
    virtual class CTcPrsNode *adjust_for_debug(const tcpn_debug_info *info);
271
};
272
273
/* ------------------------------------------------------------------------ */
274
/*
275
 *   Symbol Table Entry.  Each symbol has an entry in one of the symbol
276
 *   tables:
277
 *   
278
 *   - The global symbol table contains object, property, and built-in
279
 *   functions from the default function set.
280
 *   
281
 *   - Local symbol tables contain local variables and parameters.  Local
282
 *   tables have block-level scope.
283
 *   
284
 *   - Label symbol tables contain code labels (for "goto" statements).
285
 *   Label tables have function-level or method-level scope.  
286
 */
287
288
/*
289
 *   Basic symbol table entry.  The target 
290
 */
291
class CTcSymbolBase: public CVmHashEntryCS
292
{
293
public:
294
    CTcSymbolBase(const char *str, size_t len, int copy, tc_symtype_t typ)
295
        : CVmHashEntryCS(str, len, copy)
296
    {
297
        typ_ = typ;
298
    }
299
300
    /* allocate symbol entries from the parser memory pool */
301
    void *operator new(size_t siz);
302
303
    /* get the symbol type */
304
    tc_symtype_t get_type() const { return typ_; }
305
306
    /* get the symbol text and length */
307
    const char *get_sym() const { return getstr(); }
308
    size_t get_sym_len() const { return getlen(); }
309
310
    /*
311
     *   Generate a constant value node for this symbol, if possible;
312
     *   returns null if the symbol does not evaluate to a compile-time
313
     *   constant value.  An object name, for example, evaluates to a
314
     *   compile-time constant equal to the object reference; a property
315
     *   name, in contrast, is (when not qualified by another operator) an
316
     *   invocation of the property, hence must be executed at run time,
317
     *   hence is not a compile-time constant.  
318
     */
319
    virtual class CTcPrsNode *fold_constant()
320
    {
321
        /* by default, a symbol's value is not a constant */
322
        return 0;
323
    }
324
325
    /* 
326
     *   generate a constant value node for the address of this symbol;
327
     *   returns null if the symbol has no address 
328
     */
329
    virtual class CTcPrsNode *fold_addr_const()
330
    {
331
        /* by default, a symbol has no address */
332
        return 0;
333
    }
334
335
    /* determine if this symbol can be used as an lvalue */
336
    virtual int check_lvalue() const { return FALSE; }
337
338
    /* determine if this symbol can have its address taken */
339
    virtual int has_addr() const { return FALSE; }
340
341
    /* determine if I have a return value when evaluated */
342
    virtual int has_return_value_on_call() const { return TRUE; }
343
344
    /* 
345
     *   Write the symbol to a symbol export file.  By default, we'll
346
     *   write the type and symbol name to the file.  Some subclasses
347
     *   might wish to override this to write additional data, or to write
348
     *   something different or nothing at all (for example, built-in
349
     *   function symbols are not written to a symbol export file).
350
     *   
351
     *   When a subclass does override this, it must write the type as a
352
     *   UINT2 value as the first thing written to the file.  The generic
353
     *   file reader switches on this type code to determine what to call
354
     *   to load the entry, then calls the subclass-specific loader to do
355
     *   the actual work.
356
     *   
357
     *   Returns true if we wrote the symbol to the file, false if not.
358
     *   (False doesn't indicate an error - it indicates that we chose not
359
     *   to store the symbol because the symbol is not of a type that we
360
     *   want to put in the export file.)  
361
     */
362
    virtual int write_to_sym_file(class CVmFile *fp);
363
364
    /* write the symbol name (with a UINT2 length prefix) to a file */
365
    int write_name_to_file(class CVmFile *fp);
366
367
    /*
368
     *   Write the symbol to an object file.  By default, we'll write the
369
     *   type and symbol name to the file.  Some subclasses might wish to
370
     *   override this to write additional data, or to write something
371
     *   different or nothing at all (for example, built-in function
372
     *   symbols are not written to an object file).
373
     *   
374
     *   When a subclass does override this, it must write the type as a
375
     *   UINT2 value as the first thing written to the file.  The generic
376
     *   file reader switches on this type code to determine what to call
377
     *   to load the entry, then calls the subclass-specific loader to do
378
     *   the actual work.
379
     *   
380
     *   Returns true if we wrote the symbol to the file, false if not.
381
     *   (False doesn't indicate an error - it indicates that we chose not
382
     *   to store the symbol because the symbol is not of a type that we
383
     *   want to put in the export file.)  
384
     */
385
    virtual int write_to_obj_file(class CVmFile *fp);
386
387
    /*
388
     *   Write the symbol's cross references to the object file.  This can
389
     *   write references to other symbols by storing the other symbol's
390
     *   index in the object file.  Most symbols don't have any cross
391
     *   references, so this does nothing by default.
392
     *   
393
     *   If this writes anything, the first thing written must be a UINT4
394
     *   giving the object file index of this symbol.  On loading, we'll
395
     *   read this and look up the loaded symbol.  
396
     */
397
    virtual int write_refs_to_obj_file(class CVmFile *) { return FALSE; }
398
399
    /* 
400
     *   perform basic writing to a file - this performs common work that
401
     *   can be used for object or symbol files 
402
     */
403
    int write_to_file_gen(CVmFile *fp);
404
405
    /*
406
     *   Read a symbol from a symbol file, returning the new symbol 
407
     */
408
    static class CTcSymbol *read_from_sym_file(class CVmFile *fp);
409
410
    /*
411
     *   Load a symbol from an object file.  Stores the symbol in the
412
     *   global symbol table, and fills in the appropriate translation
413
     *   mapping table when necessary.  Returns zero on success; logs
414
     *   error messages and return non-zero on failure.  
415
     */
416
    static int load_from_obj_file(class CVmFile *fp,
417
                                  const textchar_t *fname,
418
                                  tctarg_obj_id_t *obj_xlat,
419
                                  tctarg_prop_id_t *prop_xlat,
420
                                  ulong *enum_xlat);
421
422
    /*
423
     *   Load references from the object file - reads the information that
424
     *   write_refs_to_obj_file() wrote, except that the caller will have
425
     *   read the first UINT4 giving the symbol's object file index before
426
     *   calling this routine. 
427
     */
428
    virtual void load_refs_from_obj_file(class CVmFile *,
429
                                         const textchar_t * /*obj_fname*/,
430
                                         tctarg_obj_id_t * /*obj_xlat*/,
431
                                         tctarg_prop_id_t * /*prop_xlat*/)
432
    {
433
        /* by default, do nothing */
434
    }
435
436
    /*
437
     *   Log an object file loading conflict with this symbol.  The given
438
     *   type is the new type found in the object file of the given name. 
439
     */
440
    void log_objfile_conflict(const textchar_t *fname, tc_symtype_t new_type)
441
        const;
442
443
    /*
444
     *   Get a pointer to the head of the fixup list for this symbol.
445
     *   Symbols such as functions that keep a list of fixups for
446
     *   references to the symbol must override this to provide a fixup
447
     *   list head; by default, symbols keep no fixup list, so we'll just
448
     *   return null. 
449
     */
450
    virtual struct CTcAbsFixup **get_fixup_list_anchor() { return 0; }
451
452
    /*
453
     *   Set my code stream anchor object.  By default, symbols don't keep
454
     *   track of any stream anchors.  Symbols that refer to code or data
455
     *   stream locations directly must keep an anchor, since they must
456
     *   keep track of their fixup list in order to fix up generated
457
     *   references to the symbol.  This must be overridden by any
458
     *   subclasses that keep anchors.  
459
     */
460
    virtual void set_anchor(struct CTcStreamAnchor *) { }
461
462
    /*
463
     *   Determine if this symbol is external and unresolved.  By default,
464
     *   a symbol cannot be external at all, so this will return false.
465
     *   Subclasses for symbol types that can be external should override
466
     *   this to return true if the symbol is an unresolved external
467
     *   reference. 
468
     */
469
    virtual int is_unresolved_extern() const { return FALSE; }
470
471
    /*
472
     *   Mark the symbol as referenced.  Some symbol types keep track of
473
     *   whether they've been referenced or not; those types can override
474
     *   this to keep track.  This method is called each time the symbol
475
     *   is found in the symbol table via the find() or find_or_def()
476
     *   methods.  By default, we do nothing.
477
     */
478
    virtual void mark_referenced() { }
479
480
    /*
481
     *   Apply internal fixups.  If the symbol keeps its own internal
482
     *   fixup information, it can translate the fixups here.  By default,
483
     *   this does nothing.  
484
     */
485
    virtual void apply_internal_fixups() { }
486
487
    /*
488
     *   Build dictionary entries for this symbol.  Most symbols do
489
     *   nothing here; objects which can have associated vocabulary words
490
     *   should insert their vocabulary into the dictionary.  
491
     */
492
    virtual void build_dictionary() { }
493
494
    /*
495
     *   Create a new "context variable" version of this symbol for use in
496
     *   an anonymous function.  This is only needed for symbols that can
497
     *   exist in a local scope.  
498
     */
499
    virtual class CTcSymbol *new_ctx_var() const { return 0; }
500
501
    /*
502
     *   Apply context variable conversion.  If this symbol has not been
503
     *   referenced, this should simply remove the symbol from the symbol
504
     *   table.  Otherwise, this should apply the necessary conversions to
505
     *   the original symbol from which this symbol was created to ensure
506
     *   that the original and this symbol share a context variable slot.
507
     *   
508
     *   Returns true if a conversion was performed (i.e., the symbol was
509
     *   referenced), false if not.  
510
     */
511
    virtual int apply_ctx_var_conv(class CTcPrsSymtab *,
512
                                   class CTPNCodeBody *)
513
        { return FALSE; }
514
515
    /*
516
     *   Finalize context variable conversion.  This should do nothing if
517
     *   the variable hasn't already been notified that it's a context
518
     *   variable (how this happens varies by symbol type - see locals in
519
     *   particular).  This is called with the variable's own scope active
520
     *   in the parser, so the final variable assignments for the symbol
521
     *   can be made.  
522
     */
523
    virtual void finish_ctx_var_conv() { }
524
525
    /*
526
     *   Check for local references.  For variables that can exist in
527
     *   local scope, such as locals, this will be called when all of the
528
     *   code for the scope has been parsed; this should check to see if
529
     *   the symbol has been referenced in the scope, and display an
530
     *   appropriate warning message if not.  
531
     */
532
    virtual void check_local_references() { }
533
534
    /*
535
     *   Add an entry for this symbol to a "runtime symbol table," which is
536
     *   a symbol table that we can pass to the interpreter.  This must be
537
     *   overridden by each symbol type for each target architecture,
538
     *   because the nature of the runtime symbol table varies by target
539
     *   architecture.
540
     *   
541
     *   By default, this does nothing.  Symbol types that don't need to
542
     *   generate runtime symbol table entries don't need to override this.  
543
     */
544
    virtual void add_runtime_symbol(class CVmRuntimeSymbols *) { }
545
    
546
protected:
547
    /* 
548
     *   Base routine to read from a symbol file - reads the symbol name.
549
     *   Returns a pointer to the symbol name (stored in tokenizer memory
550
     *   that will remain valid throughout the compilation) on success; on
551
     *   failure, logs an error and returns null.  
552
     */
553
    static const char *base_read_from_sym_file(class CVmFile *fp);
554
    
555
    /* symbol type */
556
    tc_symtype_t typ_;
557
};
558
559
#endif /* TCPN_H */