| | 1 | #ifdef RCSID |
| | 2 | static char RCSid[] = |
| | 3 | "$Header: d:/cvsroot/tads/tads3/vmhash.cpp,v 1.3 1999/07/11 00:46:59 MJRoberts Exp $"; |
| | 4 | #endif |
| | 5 | |
| | 6 | /* |
| | 7 | * Copyright (c) 1997, 2002 Michael J. Roberts. All Rights Reserved. |
| | 8 | * |
| | 9 | * Please see the accompanying license file, LICENSE.TXT, for information |
| | 10 | * on using and copying this software. |
| | 11 | */ |
| | 12 | /* |
| | 13 | Name |
| | 14 | vmhash.cpp - hash table implementation |
| | 15 | Function |
| | 16 | |
| | 17 | Notes |
| | 18 | |
| | 19 | Modified |
| | 20 | 10/25/97 MJRoberts - Creation |
| | 21 | */ |
| | 22 | |
| | 23 | #include <assert.h> |
| | 24 | #include <memory.h> |
| | 25 | #include <string.h> |
| | 26 | |
| | 27 | #ifndef STD_H |
| | 28 | #include "t3std.h" |
| | 29 | #endif |
| | 30 | #ifndef VMHASH_H |
| | 31 | #include "vmhash.h" |
| | 32 | #endif |
| | 33 | |
| | 34 | |
| | 35 | /* ------------------------------------------------------------------------ */ |
| | 36 | /* |
| | 37 | * Simple case-insensitive hash function implementation. |
| | 38 | */ |
| | 39 | unsigned int CVmHashFuncCI::compute_hash(const char *s, size_t l) const |
| | 40 | { |
| | 41 | uint acc; |
| | 42 | |
| | 43 | /* |
| | 44 | * Add up all the character values in the string, converting all |
| | 45 | * characters to upper-case. |
| | 46 | */ |
| | 47 | for (acc = 0 ; l != 0 ; ++s, --l) |
| | 48 | { |
| | 49 | uchar c; |
| | 50 | |
| | 51 | c = (uchar)(is_lower(*s) ? to_upper(*s) : *s); |
| | 52 | acc += c; |
| | 53 | } |
| | 54 | |
| | 55 | /* return the accumulated value */ |
| | 56 | return acc; |
| | 57 | } |
| | 58 | |
| | 59 | /* ------------------------------------------------------------------------ */ |
| | 60 | /* |
| | 61 | * Simple case-sensitive hash function implementation |
| | 62 | */ |
| | 63 | unsigned int CVmHashFuncCS::compute_hash(const char *s, size_t l) const |
| | 64 | { |
| | 65 | uint acc; |
| | 66 | |
| | 67 | /* |
| | 68 | * add up all the character values in the string, treating case as |
| | 69 | * significant |
| | 70 | */ |
| | 71 | for (acc = 0 ; l != 0 ; ++s, --l) |
| | 72 | { |
| | 73 | uchar c; |
| | 74 | |
| | 75 | c = (uchar)*s; |
| | 76 | acc += c; |
| | 77 | } |
| | 78 | |
| | 79 | /* return the accumulated value */ |
| | 80 | return acc; |
| | 81 | } |
| | 82 | |
| | 83 | |
| | 84 | /* ------------------------------------------------------------------------ */ |
| | 85 | /* |
| | 86 | * Hash table symbol entry. This is an abstract class; subclasses must |
| | 87 | * provide a symbol-matching method. |
| | 88 | */ |
| | 89 | CVmHashEntry::CVmHashEntry(const char *str, size_t len, int copy) |
| | 90 | { |
| | 91 | /* not linked into a list yet */ |
| | 92 | nxt_ = 0; |
| | 93 | |
| | 94 | /* see if we can use the original string or need to make a private copy */ |
| | 95 | if (copy) |
| | 96 | { |
| | 97 | char *buf; |
| | 98 | |
| | 99 | /* allocate space for a copy */ |
| | 100 | buf = new char[len]; |
| | 101 | |
| | 102 | /* copy it into our buffer */ |
| | 103 | memcpy(buf, str, len * sizeof(*buf)); |
| | 104 | |
| | 105 | /* remember it */ |
| | 106 | str_ = buf; |
| | 107 | } |
| | 108 | else |
| | 109 | { |
| | 110 | /* we can use the original */ |
| | 111 | str_ = str; |
| | 112 | } |
| | 113 | |
| | 114 | /* remember the length */ |
| | 115 | len_ = len; |
| | 116 | |
| | 117 | /* remember whether or not we own the string */ |
| | 118 | copy_ = copy; |
| | 119 | } |
| | 120 | |
| | 121 | CVmHashEntry::~CVmHashEntry() |
| | 122 | { |
| | 123 | /* if we made a private copy of the string, we own it, so delete it */ |
| | 124 | if (copy_) |
| | 125 | delete [] (char *)str_; |
| | 126 | } |
| | 127 | |
| | 128 | |
| | 129 | /* ------------------------------------------------------------------------ */ |
| | 130 | /* |
| | 131 | * Concrete subclass of CVmHashEntry providing a case-insensitive |
| | 132 | * symbol match implementation |
| | 133 | */ |
| | 134 | int CVmHashEntryCI::matches(const char *str, size_t len) const |
| | 135 | { |
| | 136 | /* |
| | 137 | * it's a match if the strings are the same length and all |
| | 138 | * characters match, ignoring case |
| | 139 | */ |
| | 140 | return (len == len_ |
| | 141 | && memicmp(str, str_, len * sizeof(*str)) == 0); |
| | 142 | } |
| | 143 | |
| | 144 | |
| | 145 | /* ------------------------------------------------------------------------ */ |
| | 146 | /* |
| | 147 | * Concrete subclass of CVmHashEntry providing a case-sensitive symbol |
| | 148 | * match implementation |
| | 149 | */ |
| | 150 | int CVmHashEntryCS::matches(const char *str, size_t len) const |
| | 151 | { |
| | 152 | /* |
| | 153 | * it's a match if the strings are the same length and all |
| | 154 | * characters match, treating case as significant |
| | 155 | */ |
| | 156 | return (len == len_ |
| | 157 | && memcmp(str, str_, len * sizeof(*str)) == 0); |
| | 158 | } |
| | 159 | |
| | 160 | /* ------------------------------------------------------------------------ */ |
| | 161 | /* |
| | 162 | * Hash table |
| | 163 | */ |
| | 164 | |
| | 165 | void CVmHashTable::init(int hash_table_size, |
| | 166 | CVmHashFunc *hash_function, int own_hash_func, |
| | 167 | CVmHashEntry **hash_array) |
| | 168 | { |
| | 169 | CVmHashEntry **entry; |
| | 170 | size_t i; |
| | 171 | |
| | 172 | /* make sure it's a power of two */ |
| | 173 | assert(is_power_of_two(hash_table_size)); |
| | 174 | |
| | 175 | /* make sure we got a hash function */ |
| | 176 | assert(hash_function != 0); |
| | 177 | |
| | 178 | /* save the hash function */ |
| | 179 | hash_function_ = hash_function; |
| | 180 | own_hash_func_ = own_hash_func; |
| | 181 | |
| | 182 | /* check to see if the caller provided the hash array */ |
| | 183 | if (hash_array != 0) |
| | 184 | { |
| | 185 | /* the caller allocated the table - store a reference to it */ |
| | 186 | table_ = hash_array; |
| | 187 | table_size_ = hash_table_size; |
| | 188 | |
| | 189 | /* note that the table belongs to the caller, so we don't delete it */ |
| | 190 | own_hash_table_ = FALSE; |
| | 191 | } |
| | 192 | else |
| | 193 | { |
| | 194 | /* allocate the table */ |
| | 195 | table_ = new CVmHashEntry *[hash_table_size]; |
| | 196 | table_size_ = hash_table_size; |
| | 197 | |
| | 198 | /* note that we own the table and must delete it */ |
| | 199 | own_hash_table_ = TRUE; |
| | 200 | } |
| | 201 | |
| | 202 | /* clear the table */ |
| | 203 | for (entry = table_, i = 0 ; i < table_size_ ; ++i, ++entry) |
| | 204 | *entry = 0; |
| | 205 | } |
| | 206 | |
| | 207 | CVmHashTable::~CVmHashTable() |
| | 208 | { |
| | 209 | /* delete the hash function object if I own it */ |
| | 210 | if (own_hash_func_) |
| | 211 | delete hash_function_; |
| | 212 | |
| | 213 | /* delete each entry in the hash table */ |
| | 214 | delete_all_entries(); |
| | 215 | |
| | 216 | /* delete the hash table */ |
| | 217 | if (own_hash_table_) |
| | 218 | delete [] table_; |
| | 219 | } |
| | 220 | |
| | 221 | /* |
| | 222 | * delete all entries in the hash table, but keep the table itself |
| | 223 | */ |
| | 224 | void CVmHashTable::delete_all_entries() |
| | 225 | { |
| | 226 | CVmHashEntry **tableptr; |
| | 227 | size_t i; |
| | 228 | |
| | 229 | for (tableptr = table_, i = 0 ; i < table_size_ ; ++i, ++tableptr) |
| | 230 | { |
| | 231 | CVmHashEntry *entry; |
| | 232 | CVmHashEntry *nxt; |
| | 233 | |
| | 234 | /* delete each entry in the list at this element */ |
| | 235 | for (entry = *tableptr ; entry ; entry = nxt) |
| | 236 | { |
| | 237 | /* remember the next entry */ |
| | 238 | nxt = entry->nxt_; |
| | 239 | |
| | 240 | /* delete this entry */ |
| | 241 | delete entry; |
| | 242 | } |
| | 243 | |
| | 244 | /* there's nothing at this table entry now */ |
| | 245 | *tableptr = 0; |
| | 246 | } |
| | 247 | } |
| | 248 | |
| | 249 | /* |
| | 250 | * Verify that a value is a power of two. Hash table sizes must be |
| | 251 | * powers of two. |
| | 252 | */ |
| | 253 | int CVmHashTable::is_power_of_two(int n) |
| | 254 | { |
| | 255 | /* divide by two until we have an odd number */ |
| | 256 | while ((n & 1) == 0) n >>= 1; |
| | 257 | |
| | 258 | /* make sure the result is 1 */ |
| | 259 | return (n == 1); |
| | 260 | } |
| | 261 | |
| | 262 | /* |
| | 263 | * Compute the hash value for an entry |
| | 264 | */ |
| | 265 | unsigned int CVmHashTable::compute_hash(CVmHashEntry *entry) const |
| | 266 | { |
| | 267 | return compute_hash(entry->getstr(), entry->getlen()); |
| | 268 | } |
| | 269 | |
| | 270 | /* |
| | 271 | * Compute the hash value for a string |
| | 272 | */ |
| | 273 | unsigned int CVmHashTable::compute_hash(const char *str, size_t len) const |
| | 274 | { |
| | 275 | return adjust_hash(hash_function_->compute_hash(str, len)); |
| | 276 | } |
| | 277 | |
| | 278 | /* |
| | 279 | * Add an object to the table |
| | 280 | */ |
| | 281 | void CVmHashTable::add(CVmHashEntry *entry) |
| | 282 | { |
| | 283 | unsigned int hash; |
| | 284 | |
| | 285 | /* compute the hash value for this entry */ |
| | 286 | hash = compute_hash(entry); |
| | 287 | |
| | 288 | /* link it into the slot for this hash value */ |
| | 289 | entry->nxt_ = table_[hash]; |
| | 290 | table_[hash] = entry; |
| | 291 | } |
| | 292 | |
| | 293 | /* |
| | 294 | * Remove an object |
| | 295 | */ |
| | 296 | void CVmHashTable::remove(CVmHashEntry *entry) |
| | 297 | { |
| | 298 | unsigned int hash; |
| | 299 | |
| | 300 | /* compute the hash value for this entry */ |
| | 301 | hash = compute_hash(entry); |
| | 302 | |
| | 303 | /* |
| | 304 | * if it's the first item in the chain, advance the head over it; |
| | 305 | * otherwise, we'll need to find the previous item to unlink it |
| | 306 | */ |
| | 307 | if (table_[hash] == entry) |
| | 308 | { |
| | 309 | /* it's the first item - simply advance the head to the next item */ |
| | 310 | table_[hash] = entry->nxt_; |
| | 311 | } |
| | 312 | else |
| | 313 | { |
| | 314 | CVmHashEntry *prv; |
| | 315 | |
| | 316 | /* find the previous item in the list for this hash value */ |
| | 317 | for (prv = table_[hash] ; prv != 0 && prv->nxt_ != entry ; |
| | 318 | prv = prv->nxt_) ; |
| | 319 | |
| | 320 | /* if we found it, unlink this item */ |
| | 321 | if (prv != 0) |
| | 322 | prv->nxt_ = entry->nxt_; |
| | 323 | } |
| | 324 | } |
| | 325 | |
| | 326 | /* |
| | 327 | * Find an object in the table matching a given string. |
| | 328 | */ |
| | 329 | CVmHashEntry *CVmHashTable::find(const char *str, size_t len) const |
| | 330 | { |
| | 331 | unsigned int hash; |
| | 332 | CVmHashEntry *entry; |
| | 333 | |
| | 334 | /* compute the hash value for this entry */ |
| | 335 | hash = compute_hash(str, len); |
| | 336 | |
| | 337 | /* scan the list at this hash value looking for a match */ |
| | 338 | for (entry = table_[hash] ; entry ; entry = entry->nxt_) |
| | 339 | { |
| | 340 | /* if this one matches, return it */ |
| | 341 | if (entry->matches(str, len)) |
| | 342 | return entry; |
| | 343 | } |
| | 344 | |
| | 345 | /* didn't find anything */ |
| | 346 | return 0; |
| | 347 | } |
| | 348 | |
| | 349 | /* |
| | 350 | * Enumerate hash matches for a given string |
| | 351 | */ |
| | 352 | void CVmHashTable::enum_hash_matches(const char *str, size_t len, |
| | 353 | void (*cb)(void *cbctx, |
| | 354 | CVmHashEntry *entry), |
| | 355 | void *cbctx) |
| | 356 | { |
| | 357 | unsigned int hash; |
| | 358 | |
| | 359 | /* compute the hash value for this entry */ |
| | 360 | hash = compute_hash(str, len); |
| | 361 | |
| | 362 | /* enumerate matches at the hash value */ |
| | 363 | enum_hash_matches(hash, cb, cbctx); |
| | 364 | } |
| | 365 | |
| | 366 | /* |
| | 367 | * Enumerate hash matches for a given hash code |
| | 368 | */ |
| | 369 | void CVmHashTable::enum_hash_matches(unsigned int hash, |
| | 370 | void (*cb)(void *cbctx, |
| | 371 | CVmHashEntry *entry), |
| | 372 | void *cbctx) |
| | 373 | { |
| | 374 | CVmHashEntry *entry; |
| | 375 | |
| | 376 | /* adjust the hash value for the table size */ |
| | 377 | hash = adjust_hash(hash); |
| | 378 | |
| | 379 | /* enumerate the complete list of entries at this hash value */ |
| | 380 | for (entry = table_[hash] ; entry ; entry = entry->nxt_) |
| | 381 | { |
| | 382 | /* call the callback with this entry */ |
| | 383 | (*cb)(cbctx, entry); |
| | 384 | } |
| | 385 | } |
| | 386 | |
| | 387 | /* |
| | 388 | * Find an object in the table matching a given leading substring. |
| | 389 | * We'll return the longest-named entry that matches a leading substring |
| | 390 | * of the given string. For example, if there's are entires A, AB, ABC, |
| | 391 | * and ABCE, and this routine is called to find something matching |
| | 392 | * ABCDEFGH, we'll return ABC as the match (not ABCE, since it doesn't |
| | 393 | * match any leading substring of the given string, and not A or AB, |
| | 394 | * even though they match, since ABC also matches and it's longer). |
| | 395 | */ |
| | 396 | CVmHashEntry *CVmHashTable::find_leading_substr(const char *str, size_t len) |
| | 397 | { |
| | 398 | size_t sublen; |
| | 399 | CVmHashEntry *entry; |
| | 400 | |
| | 401 | /* |
| | 402 | * try to find each leading substring, starting with the longest, |
| | 403 | * decreasing by one character on each iteration, until we've used |
| | 404 | * the whole string |
| | 405 | */ |
| | 406 | for (sublen = len ; sublen > 0 ; --sublen) |
| | 407 | { |
| | 408 | /* if this substring matches, use it */ |
| | 409 | if ((entry = find(str, sublen)) != 0) |
| | 410 | return entry; |
| | 411 | } |
| | 412 | |
| | 413 | /* we didn't find it */ |
| | 414 | return 0; |
| | 415 | } |
| | 416 | |
| | 417 | /* |
| | 418 | * Enumerate all entries |
| | 419 | */ |
| | 420 | void CVmHashTable::enum_entries(void (*func)(void *, CVmHashEntry *), |
| | 421 | void *ctx) |
| | 422 | { |
| | 423 | CVmHashEntry **tableptr; |
| | 424 | size_t i; |
| | 425 | |
| | 426 | /* go through each hash value */ |
| | 427 | for (tableptr = table_, i = 0 ; i < table_size_ ; ++i, ++tableptr) |
| | 428 | { |
| | 429 | CVmHashEntry *entry; |
| | 430 | CVmHashEntry *nxt; |
| | 431 | |
| | 432 | /* go through each entry at this hash value */ |
| | 433 | for (entry = *tableptr ; entry ; entry = nxt) |
| | 434 | { |
| | 435 | /* |
| | 436 | * remember the next entry, in case the callback deletes the |
| | 437 | * current entry |
| | 438 | */ |
| | 439 | nxt = entry->nxt_; |
| | 440 | |
| | 441 | /* invoke the callback on this entry */ |
| | 442 | (*func)(ctx, entry); |
| | 443 | } |
| | 444 | } |
| | 445 | } |
| | 446 | |
| | 447 | /* |
| | 448 | * Enumerate all entries - ultra-safe version. This version should be |
| | 449 | * used when the callback code might delete arbitrary entries from the |
| | 450 | * hash table. This version is slower than the standard enum_entries, but |
| | 451 | * will tolerate any changes to the table made in the callback. |
| | 452 | */ |
| | 453 | void CVmHashTable::safe_enum_entries(void (*func)(void *, CVmHashEntry *), |
| | 454 | void *ctx) |
| | 455 | { |
| | 456 | CVmHashEntry **tableptr; |
| | 457 | size_t i; |
| | 458 | |
| | 459 | /* go through each hash value */ |
| | 460 | for (tableptr = table_, i = 0 ; i < table_size_ ; ++i, ++tableptr) |
| | 461 | { |
| | 462 | size_t list_idx; |
| | 463 | |
| | 464 | /* |
| | 465 | * start at the first (0th) entry in the current hash chain, and |
| | 466 | * keep going until we run out of entries in the chain |
| | 467 | */ |
| | 468 | for (list_idx = 0 ; ; ++list_idx) |
| | 469 | { |
| | 470 | CVmHashEntry *entry; |
| | 471 | size_t j; |
| | 472 | |
| | 473 | /* |
| | 474 | * Scan the hash chain for the current entry index. |
| | 475 | * |
| | 476 | * This is the part that makes this version slower than the |
| | 477 | * standard version and safer than the standard version. It's |
| | 478 | * slower than enum_entries() because we must scan the chain |
| | 479 | * list on every iteration to find the next entry, whereas |
| | 480 | * enum_entries() simply keeps a pointer to the next entry. |
| | 481 | * It's safer because we don't keep any pointers - if next |
| | 482 | * element is deleted in the callback in enum_entries(), that |
| | 483 | * stored next pointer would be invalid, but we store no |
| | 484 | * pointers that could become stale. |
| | 485 | */ |
| | 486 | for (j = 0, entry = *tableptr ; j < list_idx && entry != 0 ; |
| | 487 | entry = entry->nxt_, ++j) ; |
| | 488 | |
| | 489 | /* if we failed to find the entry, we're done with this chain */ |
| | 490 | if (entry == 0) |
| | 491 | break; |
| | 492 | |
| | 493 | /* invoke the callback on this entry */ |
| | 494 | (*func)(ctx, entry); |
| | 495 | } |
| | 496 | } |
| | 497 | } |
| | 498 | |
| | 499 | |
| | 500 | /* |
| | 501 | * Move all entries in this table to a new table |
| | 502 | */ |
| | 503 | void CVmHashTable::move_entries_to(CVmHashTable *new_tab) |
| | 504 | { |
| | 505 | CVmHashEntry **tableptr; |
| | 506 | size_t i; |
| | 507 | |
| | 508 | /* go through each hash value */ |
| | 509 | for (tableptr = table_, i = 0 ; i < table_size_ ; ++i, ++tableptr) |
| | 510 | { |
| | 511 | CVmHashEntry *entry; |
| | 512 | CVmHashEntry *nxt; |
| | 513 | |
| | 514 | /* go through each entry at this hash value */ |
| | 515 | for (entry = *tableptr ; entry ; entry = nxt) |
| | 516 | { |
| | 517 | /* |
| | 518 | * remember the next entry, since we'll be unlinking it from |
| | 519 | * this table, which will render the nxt_ member unusable |
| | 520 | * for the purposes of completing this enumeration |
| | 521 | */ |
| | 522 | nxt = entry->nxt_; |
| | 523 | |
| | 524 | /* |
| | 525 | * clear the 'next' pointer in this entry, to unlink it from |
| | 526 | * our table - since everything is being removed, there's no |
| | 527 | * need to worry about what came before us |
| | 528 | */ |
| | 529 | entry->nxt_ = 0; |
| | 530 | |
| | 531 | /* add the entry to the new hash table */ |
| | 532 | new_tab->add(entry); |
| | 533 | } |
| | 534 | |
| | 535 | /* |
| | 536 | * clear this hash value chain head - we've now removed |
| | 537 | * everything from it |
| | 538 | */ |
| | 539 | *tableptr = 0; |
| | 540 | } |
| | 541 | } |
| | 542 | |
| | 543 | /* ------------------------------------------------------------------------ */ |
| | 544 | /* |
| | 545 | * Debugging Functions |
| | 546 | */ |
| | 547 | |
| | 548 | #ifdef T3_DEBUG |
| | 549 | |
| | 550 | /* |
| | 551 | * dump information the hash table to stderr |
| | 552 | */ |
| | 553 | void CVmHashTable::debug_dump() const |
| | 554 | { |
| | 555 | long total; |
| | 556 | long longest; |
| | 557 | long avg; |
| | 558 | int over_avg; |
| | 559 | int empty; |
| | 560 | size_t i; |
| | 561 | |
| | 562 | /* gather statistics on the hash table */ |
| | 563 | for (total = longest = 0, empty = 0, i = 0 ; i < table_size_ ; ++i) |
| | 564 | { |
| | 565 | CVmHashEntry *cur; |
| | 566 | int curlen; |
| | 567 | |
| | 568 | /* scan this chain */ |
| | 569 | for (curlen = 0, cur = table_[i] ; cur != 0 ; cur = cur->nxt_) |
| | 570 | { |
| | 571 | ++curlen; |
| | 572 | ++total; |
| | 573 | } |
| | 574 | |
| | 575 | /* if it's empty, so note */ |
| | 576 | if (curlen == 0) |
| | 577 | ++empty; |
| | 578 | |
| | 579 | /* if it's longer than the longest, so note */ |
| | 580 | if (curlen > longest) |
| | 581 | longest = curlen; |
| | 582 | } |
| | 583 | |
| | 584 | /* calculate the average length */ |
| | 585 | avg = total/table_size_; |
| | 586 | |
| | 587 | /* count chains over average length */ |
| | 588 | for (over_avg = 0, i = 0 ; i < table_size_ ; ++i) |
| | 589 | { |
| | 590 | CVmHashEntry *cur; |
| | 591 | int curlen; |
| | 592 | |
| | 593 | /* scan this chain */ |
| | 594 | for (curlen = 0, cur = table_[i] ; cur != 0 ; cur = cur->nxt_) |
| | 595 | ++curlen; |
| | 596 | |
| | 597 | /* if it's over average length, note it */ |
| | 598 | if (curlen > avg) |
| | 599 | ++over_avg; |
| | 600 | } |
| | 601 | |
| | 602 | |
| | 603 | /* display the statistics */ |
| | 604 | fprintf(stderr, |
| | 605 | "hash table: total %ld, longest %ld, average %ld\n" |
| | 606 | "number of buckets over average length: %d\n" |
| | 607 | "number of empty buckets: %d\n", |
| | 608 | total, longest, avg, over_avg, empty); |
| | 609 | } |
| | 610 | |
| | 611 | #else /* T3_DEBUG */ |
| | 612 | |
| | 613 | /* dummy functions for release builds */ |
| | 614 | void CVmHashTable::debug_dump() const { } |
| | 615 | |
| | 616 | |
| | 617 | #endif /* T3_DEBUG */ |
| | 618 | |