cfad47cfa3/tads3/vmstrcmp.cpp

4b825dc642cb6eb9a060e54bf8d69288fbee4904cfad47cfa334b206c65f22086bcc5d63e6f70944
1
/* 
2
 *   Copyright (c) 2002 by Michael J. Roberts.  All Rights Reserved.
3
 *   
4
 *   Please see the accompanying license file, LICENSE.TXT, for information
5
 *   on using and copying this software.  
6
 */
7
/*
8
Name
9
  vmstrcmp.cpp - T3 String Comparator intrinsic class
10
Function
11
  
12
Notes
13
  
14
Modified
15
  09/05/02 MJRoberts  - Creation
16
*/
17
18
19
#include <stdlib.h>
20
#include <os.h>
21
#include "utf8.h"
22
#include "vmuni.h"
23
#include "vmtype.h"
24
#include "vmobj.h"
25
#include "vmmeta.h"
26
#include "vmglob.h"
27
#include "vmstrcmp.h"
28
#include "vmstack.h"
29
#include "vmbif.h"
30
#include "vmfile.h"
31
#include "vmlst.h"
32
33
34
/* ------------------------------------------------------------------------ */
35
/*
36
 *   Statics 
37
 */
38
39
/* metaclass registration */
40
static CVmMetaclassStrComp metaclass_reg_obj;
41
CVmMetaclass *CVmObjStrComp::metaclass_reg_ = &metaclass_reg_obj;
42
43
/* function table */
44
int (CVmObjStrComp::
45
     *CVmObjStrComp::func_table_[])(VMG_ vm_obj_id_t self,
46
                                    vm_val_t *retval, uint *argc) =
47
{
48
    &CVmObjStrComp::getp_undef,
49
    &CVmObjStrComp::getp_calc_hash,
50
    &CVmObjStrComp::getp_match_values
51
};
52
53
/* ------------------------------------------------------------------------ */
54
/*
55
 *   notify of deletion 
56
 */
57
void CVmObjStrComp::notify_delete(VMG_ int in_root_set)
58
{
59
    /* delete my extension data */
60
    delete_ext(vmg0_);
61
}
62
63
/*
64
 *   Delete my extension data 
65
 */
66
void CVmObjStrComp::delete_ext(VMG0_)
67
{
68
    vmobj_strcmp_ext *ext = get_ext();
69
70
    /* if I have an extension, delete it */
71
    if (ext != 0)
72
    {
73
        size_t i;
74
        
75
        /* delete each first-level mapping table */
76
        for (i = 0 ; i < sizeof(ext->equiv)/sizeof(ext->equiv[0]) ; ++i)
77
        {
78
            /* if this table is present, delete it */
79
            if (ext->equiv[i] != 0)
80
                t3free(ext->equiv[i]);
81
        }
82
83
        /* delete and forget our extension */
84
        G_mem->get_var_heap()->free_mem(ext_);
85
        ext_ = 0;
86
    }
87
}
88
89
/* ------------------------------------------------------------------------ */
90
/* 
91
 *   set a property 
92
 */
93
void CVmObjStrComp::set_prop(VMG_ class CVmUndo *,
94
                             vm_obj_id_t, vm_prop_id_t,
95
                             const vm_val_t *)
96
{
97
    /* we have no properties to set */
98
    err_throw(VMERR_INVALID_SETPROP);
99
}
100
101
/* ------------------------------------------------------------------------ */
102
/*
103
 *   Get a property 
104
 */
105
int CVmObjStrComp::get_prop(VMG_ vm_prop_id_t prop, vm_val_t *val,
106
                            vm_obj_id_t self, vm_obj_id_t *source_obj,
107
                            uint *argc)
108
{
109
    uint func_idx;
110
111
    /* translate the property into a function vector index */
112
    func_idx = G_meta_table
113
               ->prop_to_vector_idx(metaclass_reg_->get_reg_idx(), prop);
114
115
    /* call the function, if we found it */
116
    if ((this->*func_table_[func_idx])(vmg_ self, val, argc))
117
    {
118
        *source_obj = metaclass_reg_->get_class_obj(vmg0_);
119
        return TRUE;
120
    }
121
122
    /* not found - inherit default handling */
123
    return CVmObject::get_prop(vmg_ prop, val, self, source_obj, argc);
124
}
125
126
127
/* ------------------------------------------------------------------------ */
128
/*
129
 *   Abstract equivalence mapping reader.  This can be implemented on
130
 *   different underlying data sources.  
131
 */
132
class CVmObjStrCompMapReader
133
{
134
public:
135
    /* 
136
     *   Read the next equivalence mapping.  We fill in *ref_ch with the
137
     *   reference character, *uc_result_flags with the upper-case result
138
     *   flags, and *lc_result_flags with the lower-case result flags.  On
139
     *   input, *value_ch_cnt is the maximum number of value characters we
140
     *   can store in the buffer at val_buf; on return, this is the number
141
     *   of characters in the mapping, which might be higher than the number
142
     *   originally in the buffer.  In any case, we will write no more than
143
     *   the allowed buffer size as given by *value_ch_cnt on input (so we
144
     *   might indicate a higher number than we actually wrote: we always
145
     *   return the actual value mapping size, even if we couldn't store the
146
     *   whole thing because of a lack of buffer space).
147
     *   
148
     *   This routine should retrieve one mapping each time it's called, and
149
     *   then move on to the next mapping.  The caller is responsible for
150
     *   making sure that this routine is called the correct number of
151
     *   times, so we don't have to worry about running out of mappings.  
152
     */
153
    virtual void read_mapping(VMG_ wchar_t *ref_ch,
154
                              unsigned long *uc_result_flags,
155
                              unsigned long *lc_result_flags,
156
                              wchar_t *val_buf, size_t *value_ch_cnt) = 0;
157
};
158
159
/*
160
 *   Stream-based mapping reader.  This reads mappings from a stream that
161
 *   uses our serialization format for image and saved-state files.  
162
 */
163
class CVmObjStrCompMapReaderStream: public CVmObjStrCompMapReader
164
{
165
public:
166
    CVmObjStrCompMapReaderStream(CVmStream *str) { str_ = str; }
167
168
    /* read a mapping */
169
    virtual void read_mapping(VMG_ wchar_t *ref_ch,
170
                              unsigned long *uc_result_flags,
171
                              unsigned long *lc_result_flags,
172
                              wchar_t *val_buf, size_t *value_ch_cnt)
173
    {
174
        size_t copy_limit;
175
        size_t copy_size;
176
        size_t i;
177
178
        /* limit our value character copying to the actual buffer size */
179
        copy_limit = *value_ch_cnt;
180
        
181
        /* read the header values */
182
        *ref_ch = (wchar_t)str_->read_uint2();
183
        copy_size = *value_ch_cnt = str_->read_byte();
184
        *uc_result_flags = str_->read_uint4();
185
        *lc_result_flags = str_->read_uint4();
186
187
        /* limit copying to the actual buffer size */
188
        if (copy_size > copy_limit)
189
            copy_size = copy_limit;
190
191
        /* read the values */
192
        for (i = 0 ; i < copy_size ; ++i)
193
            *val_buf++ = (wchar_t)str_->read_uint2();
194
195
        /* skip any values from the input that we weren't able to store */
196
        for ( ; i < *value_ch_cnt ; ++i)
197
            str_->read_uint2();
198
    }
199
200
protected:
201
    /* my stream */
202
    CVmStream *str_;
203
};
204
205
/*
206
 *   Constructor-list mapping reader.  This reads mappings from list data in
207
 *   the format that we use in our constructor. 
208
 */
209
class CVmObjStrCompMapReaderList: public CVmObjStrCompMapReader
210
{
211
public:
212
    CVmObjStrCompMapReaderList(const char *lst)
213
    {
214
        /* remember the list */
215
        lst_ = lst;
216
217
        /* start at the first element */
218
        idx_ = 1;
219
    }
220
    
221
    /* read a mapping */
222
    virtual void read_mapping(VMG_ wchar_t *ref_ch,
223
                              unsigned long *uc_result_flags,
224
                              unsigned long *lc_result_flags,
225
                              wchar_t *val_buf, size_t *value_ch_cnt)
226
    {
227
        vm_val_t val;
228
        const char *sublst;
229
        const char *refstr;
230
        const char *valstr;
231
        size_t copy_rem;
232
        size_t rem;
233
        utf8_ptr p;
234
235
        /* note the size limit of the caller's value string buffer */
236
        copy_rem = *value_ch_cnt;
237
238
        /* retrieve the next element of the list */
239
        CVmObjList::index_list(vmg_ &val, lst_, idx_);
240
241
        /* get the value as a sublist */
242
        if ((sublst = val.get_as_list(vmg0_)) == 0)
243
            err_throw(VMERR_BAD_TYPE_BIF);
244
245
        /* retrieve the reference character string (sublst's 1st element) */
246
        CVmObjList::index_list(vmg_ &val, sublst, 1);
247
        refstr = val.get_as_string(vmg0_);
248
249
        /* retrieve the value string (sublst's 2nd element) */
250
        CVmObjList::index_list(vmg_ &val, sublst, 2);
251
        valstr = val.get_as_string(vmg0_);
252
253
        /* make sure the reference and value strings are indeed strings */
254
        if (refstr == 0 || valstr == 0)
255
            err_throw(VMERR_BAD_TYPE_BIF);
256
257
        /* we need at least one character in each string */
258
        if (vmb_get_len(refstr) == 0 || vmb_get_len(valstr) == 0)
259
            err_throw(VMERR_BAD_VAL_BIF);
260
261
        /* fill in the caller's reference character from the ref string */
262
        *ref_ch = utf8_ptr::s_getch(refstr + VMB_LEN);
263
264
        /* store the characters of the value string, up to the buffer limit */
265
        p.set((char *)valstr + VMB_LEN);
266
        rem = vmb_get_len(valstr);
267
        for (*value_ch_cnt = 0 ; rem != 0 ; p.inc(&rem))
268
        {
269
            /* if we have room, copy this character */
270
            if (copy_rem != 0)
271
            {
272
                /* copy the character */
273
                *val_buf++ = p.getch();
274
275
                /* count it */
276
                --copy_rem;
277
            }
278
279
            /* count it in the actual length */
280
            ++(*value_ch_cnt);
281
        }
282
283
        /* get the upper-case flags (sublst's 3rd element) */
284
        CVmObjList::index_list(vmg_ &val, sublst, 3);
285
        if (val.typ != VM_INT)
286
            err_throw(VMERR_BAD_TYPE_BIF);
287
        *uc_result_flags = val.val.intval;
288
289
        /* get the lower-case flags (sublst's 4th element) */
290
        CVmObjList::index_list(vmg_ &val, sublst, 4);
291
        if (val.typ != VM_INT)
292
            err_throw(VMERR_BAD_TYPE_BIF);
293
        *lc_result_flags = val.val.intval;
294
295
        /* we're done with this mapping, so advance to the next one */
296
        ++idx_;
297
    }
298
    
299
protected:
300
    /* my list data */
301
    const char *lst_;
302
303
    /* the list index of the next mapping to retrieve */
304
    size_t idx_;
305
};
306
307
/* ------------------------------------------------------------------------ */
308
/*
309
 *   Create from stack.  We take the following constructor arguments:
310
 *   
311
 *   trunc_len (int: truncation length)
312
 *.  case_sensitive (bool: case sensitivity flag)
313
 *.  mappings (list: equivalence mappings)
314
 *   
315
 *   Our equivalence mappings are given as a list of lists; the main list
316
 *   consists of one sublist per mapping.  Each mapping sublist looks like
317
 *   this:
318
 *   
319
 *   ['ref_char', 'val_string', uc_flags, lc_flag]
320
 *   
321
 *   The 'ref_char' is a one-character string giving the reference character
322
 *   of the mapping, and 'val_string' is a string of one or more characters
323
 *   that can match the reference character in a value (input) string.
324
 *   uc_flags and lc_flags are integers giving the upper-case and lower-case
325
 *   flags (respectively) that are to be added to the match result code when
326
 *   the mapping is used to match a pair of strings.  
327
 */
328
vm_obj_id_t CVmObjStrComp::create_from_stack(
329
    VMG_ const uchar **pc_ptr, uint argc)
330
{
331
    size_t trunc_len;
332
    int case_sensitive;
333
    const char *lst;
334
    vm_obj_id_t id;
335
    CVmObjStrComp *obj;
336
    size_t equiv_cnt;
337
    size_t total_chars;
338
    
339
    /* check arguments */
340
    if (argc != 3)
341
        err_throw(VMERR_WRONG_NUM_OF_ARGS);
342
343
    /* pop the truncation length parameter */
344
    if (G_stk->get(0)->typ == VM_NIL)
345
    {
346
        /* it's nil, so truncation is not allowed */
347
        trunc_len = 0;
348
        G_stk->discard();
349
    }
350
    else
351
    {
352
        /* retrieve the truncation length as an integer */
353
        trunc_len = CVmBif::pop_int_val(vmg0_);
354
    }
355
356
    /* get the case sensitivity flag */
357
    case_sensitive = CVmBif::pop_bool_val(vmg0_);
358
359
    /* 
360
     *   retrieve the mapping list, but leave it on the stack (for gc
361
     *   protection) 
362
     */
363
    if (G_stk->get(0)->typ == VM_NIL)
364
    {
365
        /* there are no mappings */
366
        lst = 0;
367
        equiv_cnt = 0;
368
        total_chars = 0;
369
    }
370
    else
371
    {
372
        size_t i;
373
        
374
        /* get the list value from the argument */
375
        lst = G_stk->get(0)->get_as_list(vmg0_);
376
        if (lst == 0)
377
            err_throw(VMERR_BAD_TYPE_BIF);
378
379
        /* the list contains one entry per equivalence mapping */
380
        equiv_cnt = vmb_get_len(lst);
381
382
        /* run through the list and count the value string characters */
383
        for (i = 1, total_chars = 0 ; i <= equiv_cnt ; ++i)
384
        {
385
            vm_val_t val;
386
            const char *sublst;
387
            const char *strp;
388
            utf8_ptr ustrp;
389
390
            /* get this mapping from the list */
391
            CVmObjList::index_list(vmg_ &val, lst, i);
392
393
            /* make sure it's a sublist */
394
            if ((sublst = val.get_as_list(vmg0_)) == 0)
395
                err_throw(VMERR_BAD_TYPE_BIF);
396
397
            /* 
398
             *   get the second element of the mapping sublist - this is the
399
             *   value string 
400
             */
401
            CVmObjList::index_list(vmg_ &val, sublst, 2);
402
            if ((strp = val.get_as_string(vmg0_)) == 0)
403
                err_throw(VMERR_BAD_TYPE_BIF);
404
405
            /* add the character length of the string to the total */
406
            ustrp.set((char *)strp + VMB_LEN);
407
            total_chars += ustrp.len(vmb_get_len(strp));
408
        }
409
    }
410
411
    /* create the new object */
412
    id = vm_new_id(vmg_ FALSE, FALSE, FALSE);
413
    obj = new (vmg_ id) CVmObjStrComp();
414
415
    /* set up a list-based mapping reader */
416
    CVmObjStrCompMapReaderList reader(lst);
417
418
    /* allocate and initialize the new object's extension */
419
    obj->alloc_ext(vmg_ trunc_len, case_sensitive, equiv_cnt, total_chars,
420
                   &reader);
421
422
    /* discard the gc protection */
423
    G_stk->discard();
424
425
    /* return the new object */
426
    return id;
427
}
428
429
/* ------------------------------------------------------------------------ */
430
/*
431
 *   Load from an image file 
432
 */
433
void CVmObjStrComp::load_from_image(VMG_ vm_obj_id_t /*self*/,
434
                                    const char *ptr, size_t len)
435
{
436
    /* load my image data */
437
    CVmReadOnlyMemoryStream str(ptr, len);
438
    load_from_stream(vmg_ &str);
439
}
440
441
/*
442
 *   Load from an abstract stream 
443
 */
444
void CVmObjStrComp::load_from_stream(VMG_ CVmStream *str)
445
{
446
    unsigned int trunc_len;
447
    unsigned int flags;
448
    unsigned int equiv_cnt;
449
    unsigned int total_chars;
450
    
451
    /* load the fixed header */
452
    trunc_len = str->read_uint2();
453
    flags = str->read_uint2();
454
    equiv_cnt = str->read_uint2();
455
    total_chars = str->read_uint2();
456
457
    /* set up a stream-based mapping reader */
458
    CVmObjStrCompMapReaderStream reader(str);
459
460
    /* allocate and initialize our extension */
461
    alloc_ext(vmg_ trunc_len, (flags & 0x0001) != 0, equiv_cnt, total_chars,
462
              &reader);
463
}
464
465
/*
466
 *   Allocate and initialize our extension 
467
 */
468
void CVmObjStrComp::alloc_ext(VMG_ size_t trunc_len, int case_sensitive,
469
                              size_t equiv_cnt, size_t total_chars,
470
                              CVmObjStrCompMapReader *reader)
471
{
472
    size_t siz;
473
    vmobj_strcmp_ext *ext;
474
    vmobj_strcmp_equiv *nxt_equiv;
475
    wchar_t *nxt_ch;
476
    size_t ch_rem;
477
    size_t i;
478
    size_t idx1, idx2;
479
480
    /* delete my extension, if I have one already */
481
    delete_ext(vmg0_);
482
483
    /* 
484
     *   Calculate how much space we need for our extension.  In addition to
485
     *   the fixed part, allocate space for one vmobj_strcmp_equiv structure
486
     *   per mapping, plus the wchar_t's for the value mappings.  
487
     */
488
    siz = sizeof(vmobj_strcmp_ext)
489
          + (equiv_cnt * sizeof(vmobj_strcmp_equiv))
490
          + (total_chars * sizeof(wchar_t));
491
492
    /* allocate our new extension */
493
    ext_ = (char *)G_mem->get_var_heap()->alloc_mem(siz, this);
494
    ext = get_ext();
495
496
    /* 
497
     *   set up our suballocation pool pointers: put the equivalence mapping
498
     *   structures after the fixed part of the extension, and put the
499
     *   wchar_t's after the equivalence mappings 
500
     */
501
    nxt_equiv = (vmobj_strcmp_equiv *)(ext + 1);
502
    nxt_ch = (wchar_t *)(&nxt_equiv[equiv_cnt]);
503
    ch_rem = total_chars;
504
505
    /* initialize the extension structure */
506
    ext->trunc_len = trunc_len;
507
    ext->case_sensitive = case_sensitive;
508
509
    /* 
510
     *   we have no equivalence mappings installed yet, so clear out the
511
     *   first tier of the mapping array 
512
     */
513
    for (i = 0 ; i < sizeof(ext->equiv)/sizeof(ext->equiv[0]) ; ++i)
514
        ext->equiv[i] = 0;
515
516
    /* load the mappings */
517
    for (i = 0 ; i < equiv_cnt ; ++i, ++nxt_equiv)
518
    {
519
        wchar_t ref_ch;
520
521
        /* 
522
         *   set up our equivalent's value buffer with the remainder of our
523
         *   main buffer 
524
         */
525
        nxt_equiv->val_ch = nxt_ch;
526
        nxt_equiv->val_ch_cnt = ch_rem;
527
528
        /* read the mapping */
529
        reader->read_mapping(vmg_ &ref_ch,
530
                             &nxt_equiv->uc_result_flags,
531
                             &nxt_equiv->lc_result_flags,
532
                             nxt_equiv->val_ch, &nxt_equiv->val_ch_cnt);
533
534
        /* deduct this mapping from our main value character buffer */
535
        nxt_ch += nxt_equiv->val_ch_cnt;
536
        ch_rem -= nxt_equiv->val_ch_cnt;
537
538
        /* if we don't have a first-tier table for this character, add one */
539
        idx1 = (ref_ch >> 8) & 0xFF;
540
        idx2 = (ref_ch & 0xFF);
541
        if (ext->equiv[idx1] == 0)
542
        {
543
            vmobj_strcmp_equiv **p;
544
            size_t j;
545
            
546
            /* allocate a first-tier table for this index */
547
            ext->equiv[idx1] = p = (vmobj_strcmp_equiv **)t3malloc(
548
                256 * sizeof(vmobj_strcmp_equiv *));
549
550
            /* clear out the first-tier table */
551
            for (j = 0 ; j < 256 ; ++j, ++p)
552
                *p = 0;
553
        }
554
555
        /* set the mapping for this character */
556
        ext->equiv[idx1][idx2] = nxt_equiv;
557
    }
558
}
559
560
/* ------------------------------------------------------------------------ */
561
/* 
562
 *   save to a file 
563
 */
564
void CVmObjStrComp::save_to_file(VMG_ class CVmFile *fp)
565
{
566
    /* write our data to the file */
567
    CVmFileStream str(fp);
568
    write_to_stream(vmg_ &str, 0);
569
}
570
571
/*
572
 *   Serialize to a stream 
573
 */
574
ulong CVmObjStrComp::write_to_stream(VMG_ CVmStream *str, ulong *bytes_avail)
575
{
576
    wchar_t ref_ch;
577
    vmobj_strcmp_ext *ext = get_ext();
578
    size_t i;
579
    vmobj_strcmp_equiv ***p;
580
    size_t total_value_ch;
581
    size_t equiv_cnt;
582
    size_t need_size;
583
584
    /* get the mapping totals */
585
    count_equiv_mappings(&equiv_cnt, &total_value_ch);
586
587
    /* 
588
     *   Calculate our space needs.  We need 8 bytes for the fixed header,
589
     *   11 bytes per equivalent mapping, and 2 bytes per value string
590
     *   character. 
591
     */
592
    need_size = 8 + (11 * equiv_cnt) + (2 * total_value_ch);
593
    
594
    /* if we have a size limit, check to make sure we can abide by it */
595
    if (bytes_avail != 0 && need_size > *bytes_avail)
596
    {
597
        /* 
598
         *   there's not enough space in the output stream for us, so don't
599
         *   write anything at all; simply return the amount of space we
600
         *   need 
601
         */
602
        return need_size;
603
    }
604
605
    /* write out the serialization structure header */
606
    str->write_int2(ext->trunc_len);
607
    str->write_int2(ext->case_sensitive ? 0x0001 : 0x0000);
608
    str->write_int2(equiv_cnt);
609
    str->write_int2(total_value_ch);
610
611
    /* run through our equivalence table again and write the mappings */
612
    for (ref_ch = 0, i = 0, p = ext->equiv ;
613
         i < sizeof(ext->equiv)/sizeof(ext->equiv[0]) ; ++i, ++p, ++ref_ch)
614
    {
615
        vmobj_strcmp_equiv **ep;
616
        size_t j;
617
618
        /* if this first-tier mapping is unused, skip it */
619
        if (*p == 0)
620
            continue;
621
622
        /* run through our second-level table */
623
        for (j = 0, ep = *p ; j < 256 ; ++j, ++ep)
624
        {
625
            /* if this mapping is used, write it out */
626
            if (*ep != 0)
627
            {
628
                size_t k;
629
                wchar_t *vp;
630
                
631
                /* write the fixed part of the mapping */
632
                str->write_int2(ref_ch);
633
                str->write_byte((uchar)(*ep)->val_ch_cnt);
634
                str->write_int4((*ep)->uc_result_flags);
635
                str->write_int4((*ep)->lc_result_flags);
636
637
                /* write the value mapping characters */
638
                for (k = (*ep)->val_ch_cnt, vp = (*ep)->val_ch ; k != 0 ;
639
                     --k, ++vp)
640
                {
641
                    /* write this character */
642
                    str->write_int2(*vp);
643
                }
644
            }
645
        }
646
    }
647
648
    /* return our space needs */
649
    return need_size;
650
}
651
652
/*
653
 *   Count the equivalence mappings. 
654
 */
655
void CVmObjStrComp::count_equiv_mappings(size_t *equiv_cnt,
656
                                         size_t *total_value_ch)
657
{
658
    vmobj_strcmp_ext *ext = get_ext();
659
    size_t i;
660
    vmobj_strcmp_equiv ***p;
661
662
    /* run through our table and count up the mappings */
663
    for (*total_value_ch = 0, *equiv_cnt = 0, i = 0, p = ext->equiv ;
664
         i < sizeof(ext->equiv)/sizeof(ext->equiv[0]) ; ++i, ++p)
665
    {
666
        vmobj_strcmp_equiv **ep;
667
        size_t j;
668
669
        /* if this first-tier mapping is unused, skip it */
670
        if (*p == 0)
671
            continue;
672
673
        /* run through our second-level table */
674
        for (j = 0, ep = *p ; j < 256 ; ++j, ++ep)
675
        {
676
            /* if this mapping is used, count it */
677
            if (*ep != 0)
678
            {
679
                /* count this equivalent mapping */
680
                ++(*equiv_cnt);
681
682
                /* count its value mapping characters in the total */
683
                *total_value_ch += (*ep)->val_ch_cnt;
684
            }
685
        }
686
    }
687
}
688
689
/* ------------------------------------------------------------------------ */
690
/* 
691
 *   restore from a file 
692
 */
693
void CVmObjStrComp::restore_from_file(VMG_ vm_obj_id_t /*self*/,
694
                                      class CVmFile *fp, CVmObjFixup *)
695
{
696
    /* load from the file */
697
    CVmFileStream str(fp);
698
    load_from_stream(vmg_ &str);
699
}
700
701
/* ------------------------------------------------------------------------ */
702
/* 
703
 *   property evaluator - calculate a hash value
704
 */
705
int CVmObjStrComp::getp_calc_hash(VMG_ vm_obj_id_t /*self*/,
706
                                  vm_val_t *retval, uint *argc)
707
{
708
    static CVmNativeCodeDesc desc(1);
709
    const char *strp;
710
711
    /* check arguments */
712
    if (get_prop_check_argc(retval, argc, &desc))
713
        return TRUE;
714
715
    /* 
716
     *   retrieve the string argument (it must be a string), but leave it on
717
     *   the stack for now, for gc protection 
718
     */
719
    strp = G_stk->get(0)->get_as_string(vmg0_);
720
    if (strp == 0)
721
        err_throw(VMERR_BAD_TYPE_BIF);
722
723
    /* calculate the hash value and return it */
724
    retval->set_int(calc_str_hash(strp + VMB_LEN, vmb_get_len(strp)));
725
726
    /* discard gc protection */
727
    G_stk->discard();
728
729
    /* handled */
730
    return TRUE;
731
}
732
733
/*
734
 *   Calculate a hash value 
735
 */
736
unsigned int CVmObjStrComp::calc_str_hash(const char *strp, size_t len)
737
{
738
    vmobj_strcmp_ext *ext = get_ext();
739
    utf8_ptr p;
740
    unsigned int hash;
741
    size_t char_limit;
742
743
    /* set up to scan the string */
744
    p.set((char *)strp);
745
746
    /*
747
     *   Limit the scan to our truncation length, because we can't
748
     *   distinguish hash buckets beyond the truncation point (if we did, a
749
     *   truncated string wouldn't hash into the same bucket as a longer
750
     *   string it matches; but all matching strings are required to go into
751
     *   the same bucket, so such a hash mismatch is not allowed).
752
     */
753
    char_limit = ext->trunc_len;
754
755
    /* scan the string */
756
    for (hash = 0 ; len != 0 ; p.inc(&len))
757
    {
758
        wchar_t ch;
759
        vmobj_strcmp_equiv **t1;
760
        vmobj_strcmp_equiv *eq;
761
762
        /* get the current character */
763
        ch = p.getch();
764
765
        /* check for a substitution mapping for this character */
766
        if ((t1 = ext->equiv[(ch >> 8) & 0xFF]) != 0
767
            && (eq = t1[ch & 0xFF]) != 0)
768
        {
769
            wchar_t *vp;
770
            size_t vlen;
771
            
772
            /* 
773
             *   This character has a mapping, so add the contribution from
774
             *   the canonical form of the character, which is the value
775
             *   side of the mapping.  
776
             */
777
            for (vp = eq->val_ch, vlen = eq->val_ch_cnt ; vlen != 0 ;
778
                 ++vp, --vlen)
779
            {
780
                /* get this character */
781
                ch = *vp;
782
783
                /* convert to lower case if we're insensitive to case */
784
                if (!ext->case_sensitive)
785
                    ch = t3_to_lower(ch);
786
787
                /* add it to the hash code */
788
                hash += ch;
789
                hash &= 0xFFFF;
790
791
                /* if we've reached the truncation limit, we're done */
792
                if (char_limit == 1)
793
                    return hash;
794
                else if (char_limit != 0)
795
                    --char_limit;
796
            }
797
        }
798
        else
799
        {
800
            /* 
801
             *   if we are not sensitive to case, always use the lower-case
802
             *   representation of a character for its hash value 
803
             */
804
            if (!ext->case_sensitive)
805
                ch = t3_to_lower(ch);
806
            
807
            /* add the contribution from this character */
808
            hash += ch;
809
            hash &= 0xFFFF;
810
811
            /* if we've reached the truncation limit, we're done */
812
            if (char_limit == 1)
813
                return hash;
814
            else if (char_limit != 0)
815
                --char_limit;
816
        }
817
    }
818
819
    /* return the hash code */
820
    return hash;
821
}
822
823
/* ------------------------------------------------------------------------ */
824
/*
825
 *   Pre-defined return flag values. 
826
 */
827
828
#define RF_MATCH     0x0001                           /* the string matched */
829
#define RF_CASEFOLD  0x0002                    /* matched with case folding */
830
#define RF_TRUNC     0x0004                      /* matched with truncation */
831
832
833
/* 
834
 *   property evaluator - calculate a hash value 
835
 */
836
int CVmObjStrComp::getp_match_values(VMG_ vm_obj_id_t /*self*/,
837
                                     vm_val_t *retval, uint *argc)
838
{
839
    static CVmNativeCodeDesc desc(2);
840
    const char *valstr;
841
    const char *refstr;
842
843
    /* check arguments */
844
    if (get_prop_check_argc(retval, argc, &desc))
845
        return TRUE;
846
847
    /* retrieve the strings, but leave them on the stack for gc protection */
848
    valstr = G_stk->get(0)->get_as_string(vmg0_);
849
    refstr = G_stk->get(1)->get_as_string(vmg0_);
850
851
    /* make sure they're valid strings */
852
    if (valstr == 0 || refstr == 0)
853
        err_throw(VMERR_BAD_TYPE_BIF);
854
855
    /* compare the strings and return the result */
856
    retval->set_int(match_strings(valstr + VMB_LEN, vmb_get_len(valstr),
857
                                  refstr + VMB_LEN, vmb_get_len(refstr)));
858
859
    /* discard the gc protection */
860
    G_stk->discard(2);
861
862
    /* handled */
863
    return TRUE;
864
}
865
866
/*
867
 *   Match two strings
868
 */
869
unsigned long CVmObjStrComp::match_strings(const char *valstr, size_t vallen,
870
                                           const char *refstr, size_t reflen)
871
{
872
    vmobj_strcmp_ext *ext = get_ext();
873
    utf8_ptr valp;
874
    utf8_ptr refp;
875
    unsigned long ret;
876
    int fold_case = !(ext->case_sensitive);
877
    size_t valcharlen;
878
879
    /* set up to scan the strings */
880
    valp.set((char *)valstr);
881
    refp.set((char *)refstr);
882
883
    /* start with no return flags */
884
    ret = 0;
885
886
    /* scan the strings */
887
    for (valcharlen = 0 ; vallen != 0 && reflen != 0 ; refp.inc(&reflen))
888
    {
889
        wchar_t valch;
890
        wchar_t refch;
891
        vmobj_strcmp_equiv **t1;
892
        vmobj_strcmp_equiv *eq;
893
894
        /* get each character */
895
        valch = valp.getch();
896
        refch = refp.getch();
897
898
        /* check for an exact match first */
899
        if (refch == valch)
900
        {
901
            /* it's an exact match - skip this input character */
902
            valp.inc(&vallen);
903
            ++valcharlen;
904
            continue;
905
        }
906
907
        /* check for a case-folded match if we're insensitive to case */
908
        if (fold_case && t3_to_lower(valch) == t3_to_lower(refch))
909
        {
910
            /* it's a case-folded match - skip this input character */
911
            valp.inc(&vallen);
912
            ++valcharlen;
913
914
            /* note in the flags that we have differing cases in the match */
915
            ret |= RF_CASEFOLD;
916
917
            /* keep going */
918
            continue;
919
        }
920
921
        /* check for a reference equivalence mapping */
922
        if ((t1 = ext->equiv[(refch >> 8) & 0xFF]) != 0
923
            && (eq = t1[refch & 0xFF]) != 0)
924
        {
925
            wchar_t *vp;
926
            size_t vlen;
927
            
928
            /* 
929
             *   In case we match, apply the appropriate flags added for the
930
             *   equivalence mapping, based on the case of the first value
931
             *   character we're testing.  (If we don't match, we'll simply
932
             *   return failure, so it won't matter that we messed with the
933
             *   flags.)  
934
             */
935
            ret |= (t3_is_upper(valch) ? eq->uc_result_flags
936
                                              : eq->lc_result_flags);
937
938
            /* match each character from the mapping string */
939
            for (vp = eq->val_ch, vlen = eq->val_ch_cnt ;
940
                 vallen != 0 && vlen != 0 ; ++vp, --vlen)
941
            {
942
                /* get this character */
943
                refch = *vp;
944
                
945
                /* if we have an exact match, keep going */
946
                if (refch == valch)
947
                {
948
                    /* matched - skip this character and keep going */
949
                    valp.inc(&vallen);
950
                    ++valcharlen;
951
                    continue;
952
                }
953
954
                /* check for a case-folded match if appropriate */
955
                if (fold_case && t3_to_lower(valch) == t3_to_lower(refch))
956
                {
957
                    /* matched - skip this input character */
958
                    valp.inc(&vallen);
959
                    ++valcharlen;
960
961
                    /* note the case-folded match and keep going */
962
                    ret |= RF_CASEFOLD;
963
                    continue;
964
                }
965
966
                /* no match */
967
                return 0;
968
            }
969
970
            /* 
971
             *   if we make it here, we matched the equivalence mapping -
972
             *   we've already skipped the input we matched, so simply keep
973
             *   going 
974
             */
975
            continue;
976
        }
977
978
        /* we don't have anything else to try, so we don't have a match */
979
        return 0;
980
    }
981
982
    /* 
983
     *   If we ran out of reference string before we ran out of value
984
     *   string, we definitely do not have a match.  If we ran out of value
985
     *   string before we ran out reference string, we have a match as long
986
     *   as we matched at least the truncation length. 
987
     */
988
    if (reflen == 0 && vallen == 0)
989
    {
990
        /* 
991
         *   We ran out of both at the same time - it's a match.  Return the
992
         *   result code up to this point OR'd with RF_MATCH, which is our
993
         *   pre-defined bit that we set for every match.  
994
         */
995
        return (ret | RF_MATCH);
996
    }
997
    else if (vallen != 0)
998
    {
999
        /* we ran out of reference string first - it's not a match */
1000
        return 0;
1001
    }
1002
    else
1003
    {
1004
        /* 
1005
         *   We ran out of value string first, so it's a truncated match if
1006
         *   we matched at least up to the truncation length (assuming we
1007
         *   allow truncation at all).  If we didn't make it to the
1008
         *   truncation length, or we don't allow truncation, it's not a
1009
         *   match. 
1010
         */
1011
        if (ext->trunc_len != 0 && valcharlen >= ext->trunc_len)
1012
        {
1013
            /* 
1014
             *   it's a truncated match - return the result code up to this
1015
             *   point, OR'd with RF_MATCH (our pre-defined bit we set for
1016
             *   every match) and RF_TRUNC (our pre-defined bit we set for
1017
             *   truncated matches) 
1018
             */
1019
            return (ret | RF_MATCH | RF_TRUNC);
1020
        }
1021
        else
1022
        {
1023
            /* didn't make it to the truncation length, so it's not a match */
1024
            return 0;
1025
        }
1026
    }
1027
}
1028