cfad47cfa3/tads3/vmhash.cpp

4b825dc642cb6eb9a060e54bf8d69288fbee4904cfad47cfa334b206c65f22086bcc5d63e6f70944
1
#ifdef RCSID
2
static char RCSid[] =
3
"$Header: d:/cvsroot/tads/tads3/vmhash.cpp,v 1.3 1999/07/11 00:46:59 MJRoberts Exp $";
4
#endif
5
6
/* 
7
 *   Copyright (c) 1997, 2002 Michael J. Roberts.  All Rights Reserved.
8
 *   
9
 *   Please see the accompanying license file, LICENSE.TXT, for information
10
 *   on using and copying this software.  
11
 */
12
/*
13
Name
14
  vmhash.cpp - hash table implementation
15
Function
16
  
17
Notes
18
  
19
Modified
20
  10/25/97 MJRoberts  - Creation
21
*/
22
23
#include <assert.h>
24
#include <memory.h>
25
#include <string.h>
26
27
#ifndef STD_H
28
#include "t3std.h"
29
#endif
30
#ifndef VMHASH_H
31
#include "vmhash.h"
32
#endif
33
34
35
/* ------------------------------------------------------------------------ */
36
/*
37
 *   Simple case-insensitive hash function implementation.
38
 */
39
unsigned int CVmHashFuncCI::compute_hash(const char *s, size_t l) const
40
{
41
    uint acc;
42
43
    /*
44
     *   Add up all the character values in the string, converting all
45
     *   characters to upper-case.
46
     */
47
    for (acc = 0 ; l != 0 ; ++s, --l)
48
    {
49
        uchar c;
50
51
        c = (uchar)(is_lower(*s) ? to_upper(*s) : *s);
52
        acc += c;
53
    }
54
55
    /* return the accumulated value */
56
    return acc;
57
}
58
59
/* ------------------------------------------------------------------------ */
60
/*
61
 *   Simple case-sensitive hash function implementation 
62
 */
63
unsigned int CVmHashFuncCS::compute_hash(const char *s, size_t l) const
64
{
65
    uint acc;
66
67
    /*
68
     *   add up all the character values in the string, treating case as
69
     *   significant 
70
     */
71
    for (acc = 0 ; l != 0 ; ++s, --l)
72
    {
73
        uchar c;
74
75
        c = (uchar)*s;
76
        acc += c;
77
    }
78
79
    /* return the accumulated value */
80
    return acc;
81
}
82
83
84
/* ------------------------------------------------------------------------ */
85
/*
86
 *   Hash table symbol entry.  This is an abstract class; subclasses must
87
 *   provide a symbol-matching method.  
88
 */
89
CVmHashEntry::CVmHashEntry(const char *str, size_t len, int copy)
90
{
91
    /* not linked into a list yet */
92
    nxt_ = 0;
93
94
    /* see if we can use the original string or need to make a private copy */
95
    if (copy)
96
    {
97
        char *buf;
98
99
        /* allocate space for a copy */
100
        buf = new char[len];
101
102
        /* copy it into our buffer */
103
        memcpy(buf, str, len * sizeof(*buf));
104
105
        /* remember it */
106
        str_ = buf;
107
    }
108
    else
109
    {
110
        /* we can use the original */
111
        str_ = str;
112
    }
113
114
    /* remember the length */
115
    len_ = len;
116
117
    /* remember whether or not we own the string */
118
    copy_ = copy;
119
}
120
121
CVmHashEntry::~CVmHashEntry()
122
{
123
    /* if we made a private copy of the string, we own it, so delete it */
124
    if (copy_)
125
        delete [] (char *)str_;
126
}
127
128
129
/* ------------------------------------------------------------------------ */
130
/*
131
 *   Concrete subclass of CVmHashEntry providing a case-insensitive
132
 *   symbol match implementation 
133
 */
134
int CVmHashEntryCI::matches(const char *str, size_t len) const
135
{
136
    /*
137
     *   it's a match if the strings are the same length and all
138
     *   characters match, ignoring case 
139
     */
140
    return (len == len_
141
            && memicmp(str, str_, len * sizeof(*str)) == 0);
142
}
143
144
145
/* ------------------------------------------------------------------------ */
146
/*
147
 *   Concrete subclass of CVmHashEntry providing a case-sensitive symbol
148
 *   match implementation 
149
 */
150
int CVmHashEntryCS::matches(const char *str, size_t len) const
151
{
152
    /*
153
     *   it's a match if the strings are the same length and all
154
     *   characters match, treating case as significant 
155
     */
156
    return (len == len_
157
            && memcmp(str, str_, len * sizeof(*str)) == 0);
158
}
159
160
/* ------------------------------------------------------------------------ */
161
/*
162
 *   Hash table 
163
 */
164
165
void CVmHashTable::init(int hash_table_size,
166
                        CVmHashFunc *hash_function, int own_hash_func,
167
                        CVmHashEntry **hash_array)
168
{
169
    CVmHashEntry **entry;
170
    size_t i;
171
172
    /* make sure it's a power of two */
173
    assert(is_power_of_two(hash_table_size));
174
175
    /* make sure we got a hash function */
176
    assert(hash_function != 0);
177
178
    /* save the hash function */
179
    hash_function_ = hash_function;
180
    own_hash_func_ = own_hash_func;
181
182
    /* check to see if the caller provided the hash array */
183
    if (hash_array != 0)
184
    {
185
        /* the caller allocated the table - store a reference to it */
186
        table_ = hash_array;
187
        table_size_ = hash_table_size;
188
189
        /* note that the table belongs to the caller, so we don't delete it */
190
        own_hash_table_ = FALSE;
191
    }
192
    else
193
    {
194
        /* allocate the table */
195
        table_ = new CVmHashEntry *[hash_table_size];
196
        table_size_ = hash_table_size;
197
198
        /* note that we own the table and must delete it */
199
        own_hash_table_ = TRUE;
200
    }
201
202
    /* clear the table */
203
    for (entry = table_, i = 0 ; i < table_size_ ; ++i, ++entry)
204
        *entry = 0;
205
}
206
207
CVmHashTable::~CVmHashTable()
208
{
209
    /* delete the hash function object if I own it */
210
    if (own_hash_func_)
211
        delete hash_function_;
212
213
    /* delete each entry in the hash table */
214
    delete_all_entries();
215
216
    /* delete the hash table */
217
    if (own_hash_table_)
218
        delete [] table_;
219
}
220
221
/*
222
 *   delete all entries in the hash table, but keep the table itself 
223
 */
224
void CVmHashTable::delete_all_entries()
225
{
226
    CVmHashEntry **tableptr;
227
    size_t i;
228
229
    for (tableptr = table_, i = 0 ; i < table_size_ ; ++i, ++tableptr)
230
    {
231
        CVmHashEntry *entry;
232
        CVmHashEntry *nxt;
233
234
        /* delete each entry in the list at this element */
235
        for (entry = *tableptr ; entry ; entry = nxt)
236
        {
237
            /* remember the next entry */
238
            nxt = entry->nxt_;
239
240
            /* delete this entry */
241
            delete entry;
242
        }
243
244
        /* there's nothing at this table entry now */
245
        *tableptr = 0;
246
    }
247
}
248
249
/*
250
 *   Verify that a value is a power of two.  Hash table sizes must be
251
 *   powers of two. 
252
 */
253
int CVmHashTable::is_power_of_two(int n)
254
{
255
    /* divide by two until we have an odd number */
256
    while ((n & 1) == 0) n >>= 1;
257
258
    /* make sure the result is 1 */
259
    return (n == 1);
260
}
261
262
/*
263
 *   Compute the hash value for an entry 
264
 */
265
unsigned int CVmHashTable::compute_hash(CVmHashEntry *entry) const
266
{
267
    return compute_hash(entry->getstr(), entry->getlen());
268
}
269
270
/*
271
 *   Compute the hash value for a string 
272
 */
273
unsigned int CVmHashTable::compute_hash(const char *str, size_t len) const
274
{
275
    return adjust_hash(hash_function_->compute_hash(str, len));
276
}
277
278
/*
279
 *   Add an object to the table
280
 */
281
void CVmHashTable::add(CVmHashEntry *entry)
282
{
283
    unsigned int hash;
284
285
    /* compute the hash value for this entry */
286
    hash = compute_hash(entry);
287
288
    /* link it into the slot for this hash value */
289
    entry->nxt_ = table_[hash];
290
    table_[hash] = entry;
291
}
292
293
/*
294
 *   Remove an object 
295
 */
296
void CVmHashTable::remove(CVmHashEntry *entry)
297
{
298
    unsigned int hash;
299
300
    /* compute the hash value for this entry */
301
    hash = compute_hash(entry);
302
303
    /* 
304
     *   if it's the first item in the chain, advance the head over it;
305
     *   otherwise, we'll need to find the previous item to unlink it 
306
     */
307
    if (table_[hash] == entry)
308
    {
309
        /* it's the first item - simply advance the head to the next item */
310
        table_[hash] = entry->nxt_;
311
    }
312
    else
313
    {
314
        CVmHashEntry *prv;
315
        
316
        /* find the previous item in the list for this hash value */
317
        for (prv = table_[hash] ; prv != 0 && prv->nxt_ != entry ;
318
             prv = prv->nxt_) ;
319
320
        /* if we found it, unlink this item */
321
        if (prv != 0)
322
            prv->nxt_ = entry->nxt_;
323
    }
324
}
325
326
/*
327
 *   Find an object in the table matching a given string. 
328
 */
329
CVmHashEntry *CVmHashTable::find(const char *str, size_t len) const
330
{
331
    unsigned int hash;
332
    CVmHashEntry *entry;
333
334
    /* compute the hash value for this entry */
335
    hash = compute_hash(str, len);
336
337
    /* scan the list at this hash value looking for a match */
338
    for (entry = table_[hash] ; entry ; entry = entry->nxt_)
339
    {
340
        /* if this one matches, return it */
341
        if (entry->matches(str, len))
342
            return entry;
343
    }
344
345
    /* didn't find anything */
346
    return 0;
347
}
348
349
/*
350
 *   Enumerate hash matches for a given string
351
 */
352
void CVmHashTable::enum_hash_matches(const char *str, size_t len,
353
                                     void (*cb)(void *cbctx,
354
                                                CVmHashEntry *entry),
355
                                     void *cbctx)
356
{
357
    unsigned int hash;
358
359
    /* compute the hash value for this entry */
360
    hash = compute_hash(str, len);
361
362
    /* enumerate matches at the hash value */
363
    enum_hash_matches(hash, cb, cbctx);
364
}
365
366
/*
367
 *   Enumerate hash matches for a given hash code 
368
 */
369
void CVmHashTable::enum_hash_matches(unsigned int hash,
370
                                     void (*cb)(void *cbctx,
371
                                                CVmHashEntry *entry),
372
                                     void *cbctx)
373
{
374
    CVmHashEntry *entry;
375
376
    /* adjust the hash value for the table size */
377
    hash = adjust_hash(hash);
378
379
    /* enumerate the complete list of entries at this hash value */
380
    for (entry = table_[hash] ; entry ; entry = entry->nxt_)
381
    {
382
        /* call the callback with this entry */
383
        (*cb)(cbctx, entry);
384
    }
385
}
386
387
/*
388
 *   Find an object in the table matching a given leading substring.
389
 *   We'll return the longest-named entry that matches a leading substring
390
 *   of the given string.  For example, if there's are entires A, AB, ABC,
391
 *   and ABCE, and this routine is called to find something matching
392
 *   ABCDEFGH, we'll return ABC as the match (not ABCE, since it doesn't
393
 *   match any leading substring of the given string, and not A or AB,
394
 *   even though they match, since ABC also matches and it's longer).  
395
 */
396
CVmHashEntry *CVmHashTable::find_leading_substr(const char *str, size_t len)
397
{
398
    size_t sublen;
399
    CVmHashEntry *entry;
400
401
    /* 
402
     *   try to find each leading substring, starting with the longest,
403
     *   decreasing by one character on each iteration, until we've used
404
     *   the whole string 
405
     */
406
    for (sublen = len ; sublen > 0 ; --sublen)
407
    {
408
        /* if this substring matches, use it */
409
        if ((entry = find(str, sublen)) != 0)
410
            return entry;
411
    }
412
413
    /* we didn't find it */
414
    return 0;
415
}
416
417
/*
418
 *   Enumerate all entries 
419
 */
420
void CVmHashTable::enum_entries(void (*func)(void *, CVmHashEntry *),
421
                                void *ctx)
422
{
423
    CVmHashEntry **tableptr;
424
    size_t i;
425
426
    /* go through each hash value */
427
    for (tableptr = table_, i = 0 ; i < table_size_ ; ++i, ++tableptr)
428
    {
429
        CVmHashEntry *entry;
430
        CVmHashEntry *nxt;
431
432
        /* go through each entry at this hash value */
433
        for (entry = *tableptr ; entry ; entry = nxt)
434
        {
435
            /* 
436
             *   remember the next entry, in case the callback deletes the
437
             *   current entry 
438
             */
439
            nxt = entry->nxt_;
440
441
            /* invoke the callback on this entry */
442
            (*func)(ctx, entry);
443
        }
444
    }
445
}
446
447
/*
448
 *   Enumerate all entries - ultra-safe version.  This version should be
449
 *   used when the callback code might delete arbitrary entries from the
450
 *   hash table.  This version is slower than the standard enum_entries, but
451
 *   will tolerate any changes to the table made in the callback.  
452
 */
453
void CVmHashTable::safe_enum_entries(void (*func)(void *, CVmHashEntry *),
454
                                     void *ctx)
455
{
456
    CVmHashEntry **tableptr;
457
    size_t i;
458
459
    /* go through each hash value */
460
    for (tableptr = table_, i = 0 ; i < table_size_ ; ++i, ++tableptr)
461
    {
462
        size_t list_idx;
463
464
        /* 
465
         *   start at the first (0th) entry in the current hash chain, and
466
         *   keep going until we run out of entries in the chain 
467
         */
468
        for (list_idx = 0 ; ; ++list_idx)
469
        {
470
            CVmHashEntry *entry;
471
            size_t j;
472
473
            /* 
474
             *   Scan the hash chain for the current entry index.
475
             *   
476
             *   This is the part that makes this version slower than the
477
             *   standard version and safer than the standard version.  It's
478
             *   slower than enum_entries() because we must scan the chain
479
             *   list on every iteration to find the next entry, whereas
480
             *   enum_entries() simply keeps a pointer to the next entry.
481
             *   It's safer because we don't keep any pointers - if next
482
             *   element is deleted in the callback in enum_entries(), that
483
             *   stored next pointer would be invalid, but we store no
484
             *   pointers that could become stale.  
485
             */
486
            for (j = 0, entry = *tableptr ; j < list_idx && entry != 0 ;
487
                 entry = entry->nxt_, ++j) ;
488
489
            /* if we failed to find the entry, we're done with this chain */
490
            if (entry == 0)
491
                break;
492
493
            /* invoke the callback on this entry */
494
            (*func)(ctx, entry);
495
        }
496
    }
497
}
498
499
500
/*
501
 *   Move all entries in this table to a new table 
502
 */
503
void CVmHashTable::move_entries_to(CVmHashTable *new_tab)
504
{
505
    CVmHashEntry **tableptr;
506
    size_t i;
507
508
    /* go through each hash value */
509
    for (tableptr = table_, i = 0 ; i < table_size_ ; ++i, ++tableptr)
510
    {
511
        CVmHashEntry *entry;
512
        CVmHashEntry *nxt;
513
514
        /* go through each entry at this hash value */
515
        for (entry = *tableptr ; entry ; entry = nxt)
516
        {
517
            /* 
518
             *   remember the next entry, since we'll be unlinking it from
519
             *   this table, which will render the nxt_ member unusable
520
             *   for the purposes of completing this enumeration 
521
             */
522
            nxt = entry->nxt_;
523
524
            /* 
525
             *   clear the 'next' pointer in this entry, to unlink it from
526
             *   our table - since everything is being removed, there's no
527
             *   need to worry about what came before us 
528
             */
529
            entry->nxt_ = 0;
530
531
            /* add the entry to the new hash table */
532
            new_tab->add(entry);
533
        }
534
535
        /* 
536
         *   clear this hash value chain head - we've now removed
537
         *   everything from it 
538
         */
539
        *tableptr = 0;
540
    }
541
}
542
543
/* ------------------------------------------------------------------------ */
544
/*
545
 *   Debugging Functions 
546
 */
547
548
#ifdef T3_DEBUG
549
550
/*
551
 *   dump information the hash table to stderr
552
 */
553
void CVmHashTable::debug_dump() const
554
{
555
    long total;
556
    long longest;
557
    long avg;
558
    int over_avg;
559
    int empty;
560
    size_t i;
561
    
562
    /* gather statistics on the hash table */
563
    for (total = longest = 0, empty = 0, i = 0 ; i < table_size_ ; ++i)
564
    {
565
        CVmHashEntry *cur;
566
        int curlen;
567
        
568
        /* scan this chain */
569
        for (curlen = 0, cur = table_[i] ; cur != 0 ; cur = cur->nxt_)
570
        {
571
            ++curlen;
572
            ++total;
573
        }
574
575
        /* if it's empty, so note */
576
        if (curlen == 0)
577
            ++empty;
578
579
        /* if it's longer than the longest, so note */
580
        if (curlen > longest)
581
            longest = curlen;
582
    }
583
584
    /* calculate the average length */
585
    avg = total/table_size_;
586
587
    /* count chains over average length */
588
    for (over_avg = 0, i = 0 ; i < table_size_ ; ++i)
589
    {
590
        CVmHashEntry *cur;
591
        int curlen;
592
593
        /* scan this chain */
594
        for (curlen = 0, cur = table_[i] ; cur != 0 ; cur = cur->nxt_)
595
            ++curlen;
596
597
        /* if it's over average length, note it */
598
        if (curlen > avg)
599
            ++over_avg;
600
    }
601
602
        
603
    /* display the statistics */
604
    fprintf(stderr,
605
            "hash table: total %ld, longest %ld, average %ld\n"
606
            "number of buckets over average length: %d\n"
607
            "number of empty buckets: %d\n",
608
            total, longest, avg, over_avg, empty);
609
}
610
611
#else /* T3_DEBUG */
612
613
/* dummy functions for release builds */
614
void CVmHashTable::debug_dump() const { }
615
616
617
#endif /* T3_DEBUG */
618