cfad47cfa3/tads3/vmgram.h

4b825dc642cb6eb9a060e54bf8d69288fbee4904cfad47cfa334b206c65f22086bcc5d63e6f70944
1
/* $Header$ */
2
3
/* 
4
 *   Copyright (c) 2000, 2002 Michael J. Roberts.  All Rights Reserved.
5
 *   
6
 *   Please see the accompanying license file, LICENSE.TXT, for information
7
 *   on using and copying this software.  
8
 */
9
/*
10
Name
11
  vmgram.h - T3 grammar-production metaclass
12
Function
13
  
14
Notes
15
  
16
Modified
17
  02/15/00 MJRoberts  - Creation
18
*/
19
20
#ifndef VMGRAM_H
21
#define VMGRAM_H
22
23
#include <stdlib.h>
24
#include <string.h>
25
26
#include "os.h"
27
#include "t3std.h"
28
#include "vmtype.h"
29
#include "vmglob.h"
30
#include "vmobj.h"
31
32
/* ------------------------------------------------------------------------ */
33
/*
34
 *   intrinsic function vector indices 
35
 */
36
enum vmobjgram_meta_fnset
37
{
38
    /* undefined function */
39
    VMOBJGRAM_UNDEF = 0,
40
41
    /* parseTokens(tokenList, dict) */
42
    VMOBJGRAM_PARSE = 1
43
};
44
45
/* ------------------------------------------------------------------------ */
46
/*
47
 *   Match types
48
 */
49
enum vmgram_match_type
50
{
51
    /* production - matches a sub-production */
52
    VMGRAM_MATCH_PROD = 1,
53
54
    /* 
55
     *   part of speech - matches a word that appears in the dictionary
56
     *   under a particular part of speech 
57
     */
58
    VMGRAM_MATCH_SPEECH = 2,
59
60
    /* literal - matches a literal string */
61
    VMGRAM_MATCH_LITERAL = 3,
62
63
    /* token type - matches any token of a given type */
64
    VMGRAM_MATCH_TOKTYPE = 4,
65
66
    /* star - matches all remaining input tokens */
67
    VMGRAM_MATCH_STAR = 5,
68
69
    /* 
70
     *   N parts of speech - matches a word that appears in the dictionary
71
     *   under any of a set of N parts of speech 
72
     */
73
    VMGRAM_MATCH_NSPEECH = 6
74
};
75
76
/* ------------------------------------------------------------------------ */
77
/*
78
 *   Grammar production object - image file format
79
 *   
80
 *   UINT2 alt_count
81
 *.  alternative 1
82
 *.  alternative 2
83
 *.  etc
84
 *   
85
 *   Each alternative has the following structure:
86
 *   
87
 *.  INT2 score
88
 *.  INT2 badness
89
 *.  UINT4 processor_object_id
90
 *.  UINT2 token_count
91
 *.  token 1
92
 *.  token 2
93
 *.  etc
94
 *   
95
 *   Each token has this structure:
96
 *   
97
 *   UINT2 property_association
98
 *.  BYTE token_match_type (see below)
99
 *.  extra data depending on token_match_type (see below)
100
 *   
101
 *   The extra data for the token varies by match type:
102
 *   
103
 *   VMGRAM_MATCH_PROD - a UINT4 giving the production object ID
104
 *   
105
 *   VMGRAM_MATCH_SPEECH - a UINT2 giving the vocabulary property
106
 *   
107
 *   VMGRAM_MATCH_NSPEECH - a UINT2 giving a count, then that many
108
 *   additional UINT2's giving a list of vocabulary properties
109
 *   
110
 *   VMGRAM_MATCH_LITERAL - a UINT2 byte-length prefix followed by the
111
 *   UTF8-encoded bytes of the literal string
112
 *   
113
 *   VMGRAM_MATCH_TOKTYPE - a UINT4 giving the token enum's ID
114
 *   
115
 *   VMGRAM_MATCH_STAR - no additional data 
116
 */
117
118
/* pull the various parts out of an alternative byte stream */
119
#define vmgram_alt_score(p)      osrp2(p)
120
#define vmgram_alt_badness(p)    osrp2((p) + 2)
121
#define vmgram_alt_procobj(p)    ((vm_obj_id_t)t3rp4u((p) + 4))
122
#define vmgram_alt_tokcnt(p)     osrp2((p) + 8)
123
#define vmgram_alt_tokptr(p)     ((p) + 10)
124
125
/* pull the header parts out of a token in an alternative */
126
#define vmgram_tok_prop(p)       ((vm_prop_id_t)osrp2(p))
127
#define vmgram_tok_type(p)       (*((p) + 2))
128
129
/* pull the production object from a VMGRAM_MATCH_PROD token */
130
#define vmgram_tok_prod_obj(p)   ((vm_obj_id_t)t3rp4u((p) + 3))
131
132
/* pull the part-of-speech property from a VMGRAM_MATCH_SPEECH token */
133
#define vmgram_tok_voc_prop(p)   ((vm_prop_id_t)osrp2((p) + 3))
134
135
/* pull the literal length/text from a VMGRAM_MATCH_LITERAL token */
136
#define vmgram_tok_lit_len(p)    osrp2((p) + 3)
137
#define vmgram_tok_lit_txt(p)    ((p) + 5)
138
139
/* pull the enum from a VMGRAM_MATCH_TOKTYPE token */
140
#define vmgram_tok_tok_enum(p)   ((ulong)t3rp4u((p) + 3))
141
142
/* pull the count/nth property from a VMGRAM_MATCH_NSPEECH token */
143
#define vmgram_tok_vocn_cnt(p)      osrp2((p) + 3)
144
#define vmgram_tok_vocn_prop(p, n)  osrp2((p) + 5 + (n)*2)
145
146
/* get the size of a token of the given type */
147
#define VMGRAM_TOK_PROD_SIZE     (3 + 4)
148
#define VMGRAM_TOK_SPEECH_SIZE   (3 + 2)
149
#define VMGRAM_TOK_LIT_SIZE(p)   (3 + 2 + vmgram_tok_lit_len(p))
150
#define VMGRAM_TOK_TYPE_SIZE     (3 + 4)
151
#define VMGRAM_TOK_STAR_SIZE     (3 + 0)
152
#define VMGRAM_TOK_NSPEECH_SIZE(p) (3 + 2 + vmgram_tok_vocn_cnt(p)*2)
153
154
/* property/match result enumeration entry */
155
struct vmgram_match_info
156
{
157
    vm_prop_id_t prop;
158
};
159
160
/*
161
 *   Grammar production object extension 
162
 */
163
struct vm_gram_ext
164
{
165
    /* pointer to load image data, if any */
166
    const char *image_data_;
167
    size_t image_data_size_;
168
169
    /* 
170
     *   The last comparator object we used to calculate hash values for
171
     *   literals.  Each time we need literal hash values, we'll check to see
172
     *   if we are using the same comparator we were last time; if so, we'll
173
     *   use the cached hash values, otherwise we'll recalculate them.  We
174
     *   reference this object weakly.  
175
     */
176
    vm_obj_id_t comparator_;
177
178
    /* flag: we've cached hash values for our literals */
179
    uint hashes_cached_ : 1;
180
181
    /* 
182
     *   flag: there's at least one circular rule among my rules (i.e.,
183
     *   there's a rule whose first element is a self-reference
184
     *   subproduction) 
185
     */
186
    uint has_circular_alt : 1;
187
188
    /* private memory pool - we use this to make allocation cheaper */
189
    class CVmGramProdMem *mem_;
190
191
    /*
192
     *   Property list enumeration space.  We use this to build a list of
193
     *   properties for which a dictionary word is defined.  We'll expand
194
     *   this list as needed when we find we need more space. 
195
     */
196
    vmgram_match_info *prop_enum_arr_;
197
    size_t prop_enum_max_;
198
199
    /* array of rule alternatives */
200
    struct vmgram_alt_info *alts_;
201
    size_t alt_cnt_;
202
};
203
204
/*
205
 *   Alternative object.  Each of these objects represents one of our rule
206
 *   alternatives. 
207
 */
208
struct vmgram_alt_info
209
{
210
    /* the alternative's score and badness values */
211
    int score;
212
    int badness;
213
214
    /* 
215
     *   the "processor object" for this alternative - this is the class we
216
     *   instantiate to represent a match to the alternative 
217
     */
218
    vm_obj_id_t proc_obj;
219
220
    /* array of token elements in the alternative */
221
    struct vmgram_tok_info *toks;
222
    size_t tok_cnt;
223
};
224
225
/*
226
 *   Grammar rule token entry.  This represents a token in a grammar rule. 
227
 */
228
struct vmgram_tok_info
229
{
230
    /* 
231
     *   property association - this is the property of the processor object
232
     *   that we'll set to point to the match object or input token if we
233
     *   match this rule token 
234
     */
235
    vm_prop_id_t prop;
236
237
    /* token type - this is a VMGRAM_MATCH_xxx value */
238
    uchar typ;
239
240
    /* extra data, depending on 'typ' */
241
    union
242
    {
243
        /* VMGRAM_MATCH_PROD - the sub-production object */
244
        vm_obj_id_t prod_obj;
245
246
        /* VMGRAM_MATCH_SPEECH - the part-of-speech property */
247
        vm_prop_id_t speech_prop;
248
249
        /* VMGRAM_MATCH_NSPEECH - an array of part-of-speech proeprties */
250
        struct
251
        {
252
            size_t cnt;
253
            vm_prop_id_t *props;
254
        } nspeech;
255
256
        /* VMGRAM_MATCH_LITERAL - the literal string to match */
257
        struct
258
        {
259
            /* the literal text and its length */
260
            char *str;
261
            size_t len;
262
263
            /* the cached hash value for the literal */
264
            uint hash;
265
        } lit;
266
267
        /* VMGRAM_MATCH_TOKTYPE - token type enum */
268
        uint32 toktyp_enum;
269
270
    } typinfo;
271
};
272
273
/* ------------------------------------------------------------------------ */
274
/*
275
 *   Grammar-Production object interface 
276
 */
277
class CVmObjGramProd: public CVmObject
278
{
279
    friend class CVmMetaclassGramProd;
280
281
public:
282
    /* metaclass registration object */
283
    static class CVmMetaclass *metaclass_reg_;
284
    class CVmMetaclass *get_metaclass_reg() const { return metaclass_reg_; }
285
286
    /* am I of the given metaclass? */
287
    virtual int is_of_metaclass(class CVmMetaclass *meta) const
288
    {
289
        /* try my own metaclass and my base class */
290
        return (meta == metaclass_reg_
291
                || CVmObject::is_of_metaclass(meta));
292
    }
293
294
    /* create dynamically using stack arguments */
295
    static vm_obj_id_t create_from_stack(VMG_ const uchar **pc_ptr,
296
                                         uint argc);
297
298
    /* 
299
     *   call a static property - we don't have any of our own, so simply
300
     *   "inherit" the base class handling 
301
     */
302
    static int call_stat_prop(VMG_ vm_val_t *result,
303
                              const uchar **pc_ptr, uint *argc,
304
                              vm_prop_id_t prop)
305
        { return CVmObject::call_stat_prop(vmg_ result, pc_ptr, argc, prop); }
306
307
    /* determine if an object is a GrammarProduction object */
308
    static int is_gramprod_obj(VMG_ vm_obj_id_t obj)
309
        { return vm_objp(vmg_ obj)->is_of_metaclass(metaclass_reg_); }
310
311
    /* notify of deletion */
312
    void notify_delete(VMG_ int in_root_set);
313
314
    /* get a property */
315
    int get_prop(VMG_ vm_prop_id_t prop, vm_val_t *val,
316
                 vm_obj_id_t self, vm_obj_id_t *source_obj, uint *argc);
317
318
    /* set a property */
319
    void set_prop(VMG_ class CVmUndo *undo,
320
                  vm_obj_id_t self, vm_prop_id_t prop, const vm_val_t *val);
321
322
    /* receive notification of a new undo savepoint */
323
    void notify_new_savept() { }
324
325
    /* apply undo */
326
    void apply_undo(VMG_ struct CVmUndoRecord *) { }
327
328
    /* discard additional information associated with an undo record */
329
    void discard_undo(VMG_ struct CVmUndoRecord *) { }
330
331
    /* mark a reference in an undo record */
332
    void mark_undo_ref(VMG_ struct CVmUndoRecord *) { }
333
334
    /* remove stale weak references from an undo record */
335
    void remove_stale_undo_weak_ref(VMG_ struct CVmUndoRecord *) { }
336
337
    /* 
338
     *   mark references - we can only reference root-set objects (since
339
     *   we cannot be modified during execution), hence we don't need to
340
     *   mark anything here 
341
     */
342
    void mark_refs(VMG_ uint) { }
343
344
    /* remove weak references */
345
    void remove_stale_weak_refs(VMG0_);
346
347
    /* load from an image file */
348
    void load_from_image(VMG_ vm_obj_id_t self, const char *ptr, size_t siz);
349
350
    /* 
351
     *   restore to image file state/save/restore - we can't change at
352
     *   run-time, so there's nothing to save or load 
353
     */
354
    void reset_to_image(VMG_ vm_obj_id_t /*self*/) { }
355
    void save_to_file(VMG_ class CVmFile *) { }
356
    void restore_from_file(VMG_ vm_obj_id_t self,
357
                           class CVmFile *, class CVmObjFixup *) { }
358
359
    /* determine if the object has been changed since it was loaded */
360
    int is_changed_since_load() const { return FALSE; }
361
362
    /* 
363
     *   rebuild for image file - we can't change during execution, so our
364
     *   image file data never change 
365
     */
366
    virtual ulong rebuild_image(VMG_ char *buf, ulong buflen);
367
368
    /* convert to constant data */
369
    virtual void convert_to_const_data(VMG_ class CVmConstMapper *,
370
                                       vm_obj_id_t) { }
371
372
protected:
373
    /* private constructor */
374
    CVmObjGramProd(VMG0_);
375
376
    /* property evaluation - undefined property */
377
    int getp_undef(VMG_ vm_obj_id_t, vm_val_t *, uint *) { return FALSE; }
378
379
    /* property evaluation - parseTokens */
380
    int getp_parse(VMG_ vm_obj_id_t self, vm_val_t *val, uint *argc);
381
382
    /* property evaluation - getGrammarInfo */
383
    int getp_get_gram_info(VMG_ vm_obj_id_t self, vm_val_t *val, uint *argc);
384
385
    /* get my extension, properly cast */
386
    vm_gram_ext *get_ext() const { return (vm_gram_ext *)ext_; }
387
388
    /* callback for dictionary word property enumeration */
389
    static void enum_props_cb(VMG_ void *ctx, vm_prop_id_t prop,
390
                              const vm_val_t *match_val);
391
392
    /* search a token for a match to the given vocabulary property */
393
    static int find_prop_in_tok(const struct vmgramprod_tok *tok,
394
                                vm_prop_id_t prop);
395
396
    /* get the next token in an alternative */
397
    static const char *get_next_alt_tok(const char *tokp);
398
399
    /* enqueue our alternatives */
400
    void enqueue_alts(VMG_ class CVmGramProdMem *mem,
401
                      const struct vmgramprod_tok *tok,
402
                      size_t tok_cnt, size_t start_tok_pos,
403
                      struct CVmGramProdState *state,
404
                      struct CVmGramProdQueue *queues,
405
                      vm_obj_id_t self, int circ_only,
406
                      struct CVmGramProdMatch *circ_match,
407
                      class CVmObjDict *dict);
408
409
    /* create and enqueue a new state */
410
    static struct CVmGramProdState *
411
        enqueue_new_state(class CVmGramProdMem *mem,
412
                          size_t start_tok_pos,
413
                          struct CVmGramProdState *enclosing_state,
414
                          const vmgram_alt_info *altp, vm_obj_id_t self,
415
                          int *need_to_clone,
416
                          struct CVmGramProdQueue *queues,
417
                          int circular_alt);
418
    
419
    /* create a new state */
420
    static struct CVmGramProdState *
421
        create_new_state(class CVmGramProdMem *mem,
422
                         size_t start_tok_pos,
423
                         struct CVmGramProdState *enclosing_state,
424
                         const vmgram_alt_info *altp, vm_obj_id_t self,
425
                         int *need_to_clone, int circular_alt);
426
    
427
    /* enqueue a state */
428
    static void enqueue_state(struct CVmGramProdState *state,
429
                              struct CVmGramProdQueue *queues);
430
431
    /* process the work queue */
432
    static void process_work_queue(VMG_ CVmGramProdMem *mem,
433
                                   const struct vmgramprod_tok *tok,
434
                                   size_t tok_cnt,
435
                                   struct CVmGramProdQueue *queues,
436
                                   class CVmObjDict *dict);
437
                                   
438
439
    /* process the first work queue entry */
440
    static void process_work_queue_head(VMG_ CVmGramProdMem *mem,
441
                                        const struct vmgramprod_tok *tok,
442
                                        size_t tok_cnt,
443
                                        struct CVmGramProdQueue *queues,
444
                                        class CVmObjDict *dict);
445
446
    /* build a match tree */
447
    static void build_match_tree(VMG_ const struct CVmGramProdMatch *match,
448
                                 const vm_val_t *tok_list,
449
                                 const vm_val_t *tok_match_list,
450
                                 vm_val_t *retval,
451
                                 size_t *first_tok, size_t *last_tok);
452
453
    /* cache the hash values for the literal tokens in our alternatives */
454
    void cache_hashes(VMG_ CVmObjDict *dict);
455
456
    /* calculate the hash value for a literal string */
457
    static unsigned int calc_str_hash(VMG_ class CVmObjDict *dict,
458
                                      const vm_val_t *strval,
459
                                      const char *str, size_t len);
460
461
    /* check to see if a token matches a literal */
462
    static int tok_equals_lit(VMG_ const struct vmgramprod_tok *tok,
463
                              const char *lit, size_t lit_len,
464
                              class CVmObjDict *dict,
465
                              vm_val_t *match_result);
466
467
    /* property evaluation function table */
468
    static int (CVmObjGramProd::*func_table_[])(VMG_ vm_obj_id_t self,
469
                                                vm_val_t *retval, uint *argc);
470
};
471
472
473
/* ------------------------------------------------------------------------ */
474
/*
475
 *   Registration table object 
476
 */
477
class CVmMetaclassGramProd: public CVmMetaclass
478
{
479
public:
480
    /* get the global name */
481
    const char *get_meta_name() const { return "grammar-production/030001"; }
482
483
    /* create from image file */
484
    void create_for_image_load(VMG_ vm_obj_id_t id)
485
    {
486
        new (vmg_ id) CVmObjGramProd(vmg0_);
487
        G_obj_table->set_obj_gc_characteristics(id, FALSE, TRUE);
488
    }
489
490
    /* create from restoring from saved state */
491
    void create_for_restore(VMG_ vm_obj_id_t id)
492
    {
493
        new (vmg_ id) CVmObjGramProd(vmg0_);
494
        G_obj_table->set_obj_gc_characteristics(id, FALSE, TRUE);
495
    }
496
497
    /* create dynamically using stack arguments */
498
    vm_obj_id_t create_from_stack(VMG_ const uchar **pc_ptr, uint argc)
499
        { return CVmObjGramProd::create_from_stack(vmg_ pc_ptr, argc); }
500
501
    /* call a static property */
502
    int call_stat_prop(VMG_ vm_val_t *result,
503
                       const uchar **pc_ptr, uint *argc,
504
                       vm_prop_id_t prop)
505
    {
506
        return CVmObjGramProd::
507
            call_stat_prop(vmg_ result, pc_ptr, argc, prop);
508
    }
509
};
510
511
512
#endif /* VMGRAM_H */
513
514
/*
515
 *   Register the class 
516
 */
517
VM_REGISTER_METACLASS(CVmObjGramProd)
518