cfad47cfa3/tads3/vmstrcmp.cpp
Commiter: Nikos Chantziaras
Author: Nikos Chantziaras
Revision: cfad47cfa3
File Size: 31 KB
(June 01, 2009 20:54 UTC) Almost 3 years ago
Initial commit.
Showing without highlighting since it looks like a big file and may slow your browser - show with highlighting
Show/hide line numbers/*
* Copyright (c) 2002 by Michael J. Roberts. All Rights Reserved.
*
* Please see the accompanying license file, LICENSE.TXT, for information
* on using and copying this software.
*/
/*
Name
vmstrcmp.cpp - T3 String Comparator intrinsic class
Function
Notes
Modified
09/05/02 MJRoberts - Creation
*/
#include <stdlib.h>
#include <os.h>
#include "utf8.h"
#include "vmuni.h"
#include "vmtype.h"
#include "vmobj.h"
#include "vmmeta.h"
#include "vmglob.h"
#include "vmstrcmp.h"
#include "vmstack.h"
#include "vmbif.h"
#include "vmfile.h"
#include "vmlst.h"
/* ------------------------------------------------------------------------ */
/*
* Statics
*/
/* metaclass registration */
static CVmMetaclassStrComp metaclass_reg_obj;
CVmMetaclass *CVmObjStrComp::metaclass_reg_ = &metaclass_reg_obj;
/* function table */
int (CVmObjStrComp::
*CVmObjStrComp::func_table_[])(VMG_ vm_obj_id_t self,
vm_val_t *retval, uint *argc) =
{
&CVmObjStrComp::getp_undef,
&CVmObjStrComp::getp_calc_hash,
&CVmObjStrComp::getp_match_values
};
/* ------------------------------------------------------------------------ */
/*
* notify of deletion
*/
void CVmObjStrComp::notify_delete(VMG_ int in_root_set)
{
/* delete my extension data */
delete_ext(vmg0_);
}
/*
* Delete my extension data
*/
void CVmObjStrComp::delete_ext(VMG0_)
{
vmobj_strcmp_ext *ext = get_ext();
/* if I have an extension, delete it */
if (ext != 0)
{
size_t i;
/* delete each first-level mapping table */
for (i = 0 ; i < sizeof(ext->equiv)/sizeof(ext->equiv[0]) ; ++i)
{
/* if this table is present, delete it */
if (ext->equiv[i] != 0)
t3free(ext->equiv[i]);
}
/* delete and forget our extension */
G_mem->get_var_heap()->free_mem(ext_);
ext_ = 0;
}
}
/* ------------------------------------------------------------------------ */
/*
* set a property
*/
void CVmObjStrComp::set_prop(VMG_ class CVmUndo *,
vm_obj_id_t, vm_prop_id_t,
const vm_val_t *)
{
/* we have no properties to set */
err_throw(VMERR_INVALID_SETPROP);
}
/* ------------------------------------------------------------------------ */
/*
* Get a property
*/
int CVmObjStrComp::get_prop(VMG_ vm_prop_id_t prop, vm_val_t *val,
vm_obj_id_t self, vm_obj_id_t *source_obj,
uint *argc)
{
uint func_idx;
/* translate the property into a function vector index */
func_idx = G_meta_table
->prop_to_vector_idx(metaclass_reg_->get_reg_idx(), prop);
/* call the function, if we found it */
if ((this->*func_table_[func_idx])(vmg_ self, val, argc))
{
*source_obj = metaclass_reg_->get_class_obj(vmg0_);
return TRUE;
}
/* not found - inherit default handling */
return CVmObject::get_prop(vmg_ prop, val, self, source_obj, argc);
}
/* ------------------------------------------------------------------------ */
/*
* Abstract equivalence mapping reader. This can be implemented on
* different underlying data sources.
*/
class CVmObjStrCompMapReader
{
public:
/*
* Read the next equivalence mapping. We fill in *ref_ch with the
* reference character, *uc_result_flags with the upper-case result
* flags, and *lc_result_flags with the lower-case result flags. On
* input, *value_ch_cnt is the maximum number of value characters we
* can store in the buffer at val_buf; on return, this is the number
* of characters in the mapping, which might be higher than the number
* originally in the buffer. In any case, we will write no more than
* the allowed buffer size as given by *value_ch_cnt on input (so we
* might indicate a higher number than we actually wrote: we always
* return the actual value mapping size, even if we couldn't store the
* whole thing because of a lack of buffer space).
*
* This routine should retrieve one mapping each time it's called, and
* then move on to the next mapping. The caller is responsible for
* making sure that this routine is called the correct number of
* times, so we don't have to worry about running out of mappings.
*/
virtual void read_mapping(VMG_ wchar_t *ref_ch,
unsigned long *uc_result_flags,
unsigned long *lc_result_flags,
wchar_t *val_buf, size_t *value_ch_cnt) = 0;
};
/*
* Stream-based mapping reader. This reads mappings from a stream that
* uses our serialization format for image and saved-state files.
*/
class CVmObjStrCompMapReaderStream: public CVmObjStrCompMapReader
{
public:
CVmObjStrCompMapReaderStream(CVmStream *str) { str_ = str; }
/* read a mapping */
virtual void read_mapping(VMG_ wchar_t *ref_ch,
unsigned long *uc_result_flags,
unsigned long *lc_result_flags,
wchar_t *val_buf, size_t *value_ch_cnt)
{
size_t copy_limit;
size_t copy_size;
size_t i;
/* limit our value character copying to the actual buffer size */
copy_limit = *value_ch_cnt;
/* read the header values */
*ref_ch = (wchar_t)str_->read_uint2();
copy_size = *value_ch_cnt = str_->read_byte();
*uc_result_flags = str_->read_uint4();
*lc_result_flags = str_->read_uint4();
/* limit copying to the actual buffer size */
if (copy_size > copy_limit)
copy_size = copy_limit;
/* read the values */
for (i = 0 ; i < copy_size ; ++i)
*val_buf++ = (wchar_t)str_->read_uint2();
/* skip any values from the input that we weren't able to store */
for ( ; i < *value_ch_cnt ; ++i)
str_->read_uint2();
}
protected:
/* my stream */
CVmStream *str_;
};
/*
* Constructor-list mapping reader. This reads mappings from list data in
* the format that we use in our constructor.
*/
class CVmObjStrCompMapReaderList: public CVmObjStrCompMapReader
{
public:
CVmObjStrCompMapReaderList(const char *lst)
{
/* remember the list */
lst_ = lst;
/* start at the first element */
idx_ = 1;
}
/* read a mapping */
virtual void read_mapping(VMG_ wchar_t *ref_ch,
unsigned long *uc_result_flags,
unsigned long *lc_result_flags,
wchar_t *val_buf, size_t *value_ch_cnt)
{
vm_val_t val;
const char *sublst;
const char *refstr;
const char *valstr;
size_t copy_rem;
size_t rem;
utf8_ptr p;
/* note the size limit of the caller's value string buffer */
copy_rem = *value_ch_cnt;
/* retrieve the next element of the list */
CVmObjList::index_list(vmg_ &val, lst_, idx_);
/* get the value as a sublist */
if ((sublst = val.get_as_list(vmg0_)) == 0)
err_throw(VMERR_BAD_TYPE_BIF);
/* retrieve the reference character string (sublst's 1st element) */
CVmObjList::index_list(vmg_ &val, sublst, 1);
refstr = val.get_as_string(vmg0_);
/* retrieve the value string (sublst's 2nd element) */
CVmObjList::index_list(vmg_ &val, sublst, 2);
valstr = val.get_as_string(vmg0_);
/* make sure the reference and value strings are indeed strings */
if (refstr == 0 || valstr == 0)
err_throw(VMERR_BAD_TYPE_BIF);
/* we need at least one character in each string */
if (vmb_get_len(refstr) == 0 || vmb_get_len(valstr) == 0)
err_throw(VMERR_BAD_VAL_BIF);
/* fill in the caller's reference character from the ref string */
*ref_ch = utf8_ptr::s_getch(refstr + VMB_LEN);
/* store the characters of the value string, up to the buffer limit */
p.set((char *)valstr + VMB_LEN);
rem = vmb_get_len(valstr);
for (*value_ch_cnt = 0 ; rem != 0 ; p.inc(&rem))
{
/* if we have room, copy this character */
if (copy_rem != 0)
{
/* copy the character */
*val_buf++ = p.getch();
/* count it */
--copy_rem;
}
/* count it in the actual length */
++(*value_ch_cnt);
}
/* get the upper-case flags (sublst's 3rd element) */
CVmObjList::index_list(vmg_ &val, sublst, 3);
if (val.typ != VM_INT)
err_throw(VMERR_BAD_TYPE_BIF);
*uc_result_flags = val.val.intval;
/* get the lower-case flags (sublst's 4th element) */
CVmObjList::index_list(vmg_ &val, sublst, 4);
if (val.typ != VM_INT)
err_throw(VMERR_BAD_TYPE_BIF);
*lc_result_flags = val.val.intval;
/* we're done with this mapping, so advance to the next one */
++idx_;
}
protected:
/* my list data */
const char *lst_;
/* the list index of the next mapping to retrieve */
size_t idx_;
};
/* ------------------------------------------------------------------------ */
/*
* Create from stack. We take the following constructor arguments:
*
* trunc_len (int: truncation length)
*. case_sensitive (bool: case sensitivity flag)
*. mappings (list: equivalence mappings)
*
* Our equivalence mappings are given as a list of lists; the main list
* consists of one sublist per mapping. Each mapping sublist looks like
* this:
*
* ['ref_char', 'val_string', uc_flags, lc_flag]
*
* The 'ref_char' is a one-character string giving the reference character
* of the mapping, and 'val_string' is a string of one or more characters
* that can match the reference character in a value (input) string.
* uc_flags and lc_flags are integers giving the upper-case and lower-case
* flags (respectively) that are to be added to the match result code when
* the mapping is used to match a pair of strings.
*/
vm_obj_id_t CVmObjStrComp::create_from_stack(
VMG_ const uchar **pc_ptr, uint argc)
{
size_t trunc_len;
int case_sensitive;
const char *lst;
vm_obj_id_t id;
CVmObjStrComp *obj;
size_t equiv_cnt;
size_t total_chars;
/* check arguments */
if (argc != 3)
err_throw(VMERR_WRONG_NUM_OF_ARGS);
/* pop the truncation length parameter */
if (G_stk->get(0)->typ == VM_NIL)
{
/* it's nil, so truncation is not allowed */
trunc_len = 0;
G_stk->discard();
}
else
{
/* retrieve the truncation length as an integer */
trunc_len = CVmBif::pop_int_val(vmg0_);
}
/* get the case sensitivity flag */
case_sensitive = CVmBif::pop_bool_val(vmg0_);
/*
* retrieve the mapping list, but leave it on the stack (for gc
* protection)
*/
if (G_stk->get(0)->typ == VM_NIL)
{
/* there are no mappings */
lst = 0;
equiv_cnt = 0;
total_chars = 0;
}
else
{
size_t i;
/* get the list value from the argument */
lst = G_stk->get(0)->get_as_list(vmg0_);
if (lst == 0)
err_throw(VMERR_BAD_TYPE_BIF);
/* the list contains one entry per equivalence mapping */
equiv_cnt = vmb_get_len(lst);
/* run through the list and count the value string characters */
for (i = 1, total_chars = 0 ; i <= equiv_cnt ; ++i)
{
vm_val_t val;
const char *sublst;
const char *strp;
utf8_ptr ustrp;
/* get this mapping from the list */
CVmObjList::index_list(vmg_ &val, lst, i);
/* make sure it's a sublist */
if ((sublst = val.get_as_list(vmg0_)) == 0)
err_throw(VMERR_BAD_TYPE_BIF);
/*
* get the second element of the mapping sublist - this is the
* value string
*/
CVmObjList::index_list(vmg_ &val, sublst, 2);
if ((strp = val.get_as_string(vmg0_)) == 0)
err_throw(VMERR_BAD_TYPE_BIF);
/* add the character length of the string to the total */
ustrp.set((char *)strp + VMB_LEN);
total_chars += ustrp.len(vmb_get_len(strp));
}
}
/* create the new object */
id = vm_new_id(vmg_ FALSE, FALSE, FALSE);
obj = new (vmg_ id) CVmObjStrComp();
/* set up a list-based mapping reader */
CVmObjStrCompMapReaderList reader(lst);
/* allocate and initialize the new object's extension */
obj->alloc_ext(vmg_ trunc_len, case_sensitive, equiv_cnt, total_chars,
&reader);
/* discard the gc protection */
G_stk->discard();
/* return the new object */
return id;
}
/* ------------------------------------------------------------------------ */
/*
* Load from an image file
*/
void CVmObjStrComp::load_from_image(VMG_ vm_obj_id_t /*self*/,
const char *ptr, size_t len)
{
/* load my image data */
CVmReadOnlyMemoryStream str(ptr, len);
load_from_stream(vmg_ &str);
}
/*
* Load from an abstract stream
*/
void CVmObjStrComp::load_from_stream(VMG_ CVmStream *str)
{
unsigned int trunc_len;
unsigned int flags;
unsigned int equiv_cnt;
unsigned int total_chars;
/* load the fixed header */
trunc_len = str->read_uint2();
flags = str->read_uint2();
equiv_cnt = str->read_uint2();
total_chars = str->read_uint2();
/* set up a stream-based mapping reader */
CVmObjStrCompMapReaderStream reader(str);
/* allocate and initialize our extension */
alloc_ext(vmg_ trunc_len, (flags & 0x0001) != 0, equiv_cnt, total_chars,
&reader);
}
/*
* Allocate and initialize our extension
*/
void CVmObjStrComp::alloc_ext(VMG_ size_t trunc_len, int case_sensitive,
size_t equiv_cnt, size_t total_chars,
CVmObjStrCompMapReader *reader)
{
size_t siz;
vmobj_strcmp_ext *ext;
vmobj_strcmp_equiv *nxt_equiv;
wchar_t *nxt_ch;
size_t ch_rem;
size_t i;
size_t idx1, idx2;
/* delete my extension, if I have one already */
delete_ext(vmg0_);
/*
* Calculate how much space we need for our extension. In addition to
* the fixed part, allocate space for one vmobj_strcmp_equiv structure
* per mapping, plus the wchar_t's for the value mappings.
*/
siz = sizeof(vmobj_strcmp_ext)
+ (equiv_cnt * sizeof(vmobj_strcmp_equiv))
+ (total_chars * sizeof(wchar_t));
/* allocate our new extension */
ext_ = (char *)G_mem->get_var_heap()->alloc_mem(siz, this);
ext = get_ext();
/*
* set up our suballocation pool pointers: put the equivalence mapping
* structures after the fixed part of the extension, and put the
* wchar_t's after the equivalence mappings
*/
nxt_equiv = (vmobj_strcmp_equiv *)(ext + 1);
nxt_ch = (wchar_t *)(&nxt_equiv[equiv_cnt]);
ch_rem = total_chars;
/* initialize the extension structure */
ext->trunc_len = trunc_len;
ext->case_sensitive = case_sensitive;
/*
* we have no equivalence mappings installed yet, so clear out the
* first tier of the mapping array
*/
for (i = 0 ; i < sizeof(ext->equiv)/sizeof(ext->equiv[0]) ; ++i)
ext->equiv[i] = 0;
/* load the mappings */
for (i = 0 ; i < equiv_cnt ; ++i, ++nxt_equiv)
{
wchar_t ref_ch;
/*
* set up our equivalent's value buffer with the remainder of our
* main buffer
*/
nxt_equiv->val_ch = nxt_ch;
nxt_equiv->val_ch_cnt = ch_rem;
/* read the mapping */
reader->read_mapping(vmg_ &ref_ch,
&nxt_equiv->uc_result_flags,
&nxt_equiv->lc_result_flags,
nxt_equiv->val_ch, &nxt_equiv->val_ch_cnt);
/* deduct this mapping from our main value character buffer */
nxt_ch += nxt_equiv->val_ch_cnt;
ch_rem -= nxt_equiv->val_ch_cnt;
/* if we don't have a first-tier table for this character, add one */
idx1 = (ref_ch >> 8) & 0xFF;
idx2 = (ref_ch & 0xFF);
if (ext->equiv[idx1] == 0)
{
vmobj_strcmp_equiv **p;
size_t j;
/* allocate a first-tier table for this index */
ext->equiv[idx1] = p = (vmobj_strcmp_equiv **)t3malloc(
256 * sizeof(vmobj_strcmp_equiv *));
/* clear out the first-tier table */
for (j = 0 ; j < 256 ; ++j, ++p)
*p = 0;
}
/* set the mapping for this character */
ext->equiv[idx1][idx2] = nxt_equiv;
}
}
/* ------------------------------------------------------------------------ */
/*
* save to a file
*/
void CVmObjStrComp::save_to_file(VMG_ class CVmFile *fp)
{
/* write our data to the file */
CVmFileStream str(fp);
write_to_stream(vmg_ &str, 0);
}
/*
* Serialize to a stream
*/
ulong CVmObjStrComp::write_to_stream(VMG_ CVmStream *str, ulong *bytes_avail)
{
wchar_t ref_ch;
vmobj_strcmp_ext *ext = get_ext();
size_t i;
vmobj_strcmp_equiv ***p;
size_t total_value_ch;
size_t equiv_cnt;
size_t need_size;
/* get the mapping totals */
count_equiv_mappings(&equiv_cnt, &total_value_ch);
/*
* Calculate our space needs. We need 8 bytes for the fixed header,
* 11 bytes per equivalent mapping, and 2 bytes per value string
* character.
*/
need_size = 8 + (11 * equiv_cnt) + (2 * total_value_ch);
/* if we have a size limit, check to make sure we can abide by it */
if (bytes_avail != 0 && need_size > *bytes_avail)
{
/*
* there's not enough space in the output stream for us, so don't
* write anything at all; simply return the amount of space we
* need
*/
return need_size;
}
/* write out the serialization structure header */
str->write_int2(ext->trunc_len);
str->write_int2(ext->case_sensitive ? 0x0001 : 0x0000);
str->write_int2(equiv_cnt);
str->write_int2(total_value_ch);
/* run through our equivalence table again and write the mappings */
for (ref_ch = 0, i = 0, p = ext->equiv ;
i < sizeof(ext->equiv)/sizeof(ext->equiv[0]) ; ++i, ++p, ++ref_ch)
{
vmobj_strcmp_equiv **ep;
size_t j;
/* if this first-tier mapping is unused, skip it */
if (*p == 0)
continue;
/* run through our second-level table */
for (j = 0, ep = *p ; j < 256 ; ++j, ++ep)
{
/* if this mapping is used, write it out */
if (*ep != 0)
{
size_t k;
wchar_t *vp;
/* write the fixed part of the mapping */
str->write_int2(ref_ch);
str->write_byte((uchar)(*ep)->val_ch_cnt);
str->write_int4((*ep)->uc_result_flags);
str->write_int4((*ep)->lc_result_flags);
/* write the value mapping characters */
for (k = (*ep)->val_ch_cnt, vp = (*ep)->val_ch ; k != 0 ;
--k, ++vp)
{
/* write this character */
str->write_int2(*vp);
}
}
}
}
/* return our space needs */
return need_size;
}
/*
* Count the equivalence mappings.
*/
void CVmObjStrComp::count_equiv_mappings(size_t *equiv_cnt,
size_t *total_value_ch)
{
vmobj_strcmp_ext *ext = get_ext();
size_t i;
vmobj_strcmp_equiv ***p;
/* run through our table and count up the mappings */
for (*total_value_ch = 0, *equiv_cnt = 0, i = 0, p = ext->equiv ;
i < sizeof(ext->equiv)/sizeof(ext->equiv[0]) ; ++i, ++p)
{
vmobj_strcmp_equiv **ep;
size_t j;
/* if this first-tier mapping is unused, skip it */
if (*p == 0)
continue;
/* run through our second-level table */
for (j = 0, ep = *p ; j < 256 ; ++j, ++ep)
{
/* if this mapping is used, count it */
if (*ep != 0)
{
/* count this equivalent mapping */
++(*equiv_cnt);
/* count its value mapping characters in the total */
*total_value_ch += (*ep)->val_ch_cnt;
}
}
}
}
/* ------------------------------------------------------------------------ */
/*
* restore from a file
*/
void CVmObjStrComp::restore_from_file(VMG_ vm_obj_id_t /*self*/,
class CVmFile *fp, CVmObjFixup *)
{
/* load from the file */
CVmFileStream str(fp);
load_from_stream(vmg_ &str);
}
/* ------------------------------------------------------------------------ */
/*
* property evaluator - calculate a hash value
*/
int CVmObjStrComp::getp_calc_hash(VMG_ vm_obj_id_t /*self*/,
vm_val_t *retval, uint *argc)
{
static CVmNativeCodeDesc desc(1);
const char *strp;
/* check arguments */
if (get_prop_check_argc(retval, argc, &desc))
return TRUE;
/*
* retrieve the string argument (it must be a string), but leave it on
* the stack for now, for gc protection
*/
strp = G_stk->get(0)->get_as_string(vmg0_);
if (strp == 0)
err_throw(VMERR_BAD_TYPE_BIF);
/* calculate the hash value and return it */
retval->set_int(calc_str_hash(strp + VMB_LEN, vmb_get_len(strp)));
/* discard gc protection */
G_stk->discard();
/* handled */
return TRUE;
}
/*
* Calculate a hash value
*/
unsigned int CVmObjStrComp::calc_str_hash(const char *strp, size_t len)
{
vmobj_strcmp_ext *ext = get_ext();
utf8_ptr p;
unsigned int hash;
size_t char_limit;
/* set up to scan the string */
p.set((char *)strp);
/*
* Limit the scan to our truncation length, because we can't
* distinguish hash buckets beyond the truncation point (if we did, a
* truncated string wouldn't hash into the same bucket as a longer
* string it matches; but all matching strings are required to go into
* the same bucket, so such a hash mismatch is not allowed).
*/
char_limit = ext->trunc_len;
/* scan the string */
for (hash = 0 ; len != 0 ; p.inc(&len))
{
wchar_t ch;
vmobj_strcmp_equiv **t1;
vmobj_strcmp_equiv *eq;
/* get the current character */
ch = p.getch();
/* check for a substitution mapping for this character */
if ((t1 = ext->equiv[(ch >> 8) & 0xFF]) != 0
&& (eq = t1[ch & 0xFF]) != 0)
{
wchar_t *vp;
size_t vlen;
/*
* This character has a mapping, so add the contribution from
* the canonical form of the character, which is the value
* side of the mapping.
*/
for (vp = eq->val_ch, vlen = eq->val_ch_cnt ; vlen != 0 ;
++vp, --vlen)
{
/* get this character */
ch = *vp;
/* convert to lower case if we're insensitive to case */
if (!ext->case_sensitive)
ch = t3_to_lower(ch);
/* add it to the hash code */
hash += ch;
hash &= 0xFFFF;
/* if we've reached the truncation limit, we're done */
if (char_limit == 1)
return hash;
else if (char_limit != 0)
--char_limit;
}
}
else
{
/*
* if we are not sensitive to case, always use the lower-case
* representation of a character for its hash value
*/
if (!ext->case_sensitive)
ch = t3_to_lower(ch);
/* add the contribution from this character */
hash += ch;
hash &= 0xFFFF;
/* if we've reached the truncation limit, we're done */
if (char_limit == 1)
return hash;
else if (char_limit != 0)
--char_limit;
}
}
/* return the hash code */
return hash;
}
/* ------------------------------------------------------------------------ */
/*
* Pre-defined return flag values.
*/
#define RF_MATCH 0x0001 /* the string matched */
#define RF_CASEFOLD 0x0002 /* matched with case folding */
#define RF_TRUNC 0x0004 /* matched with truncation */
/*
* property evaluator - calculate a hash value
*/
int CVmObjStrComp::getp_match_values(VMG_ vm_obj_id_t /*self*/,
vm_val_t *retval, uint *argc)
{
static CVmNativeCodeDesc desc(2);
const char *valstr;
const char *refstr;
/* check arguments */
if (get_prop_check_argc(retval, argc, &desc))
return TRUE;
/* retrieve the strings, but leave them on the stack for gc protection */
valstr = G_stk->get(0)->get_as_string(vmg0_);
refstr = G_stk->get(1)->get_as_string(vmg0_);
/* make sure they're valid strings */
if (valstr == 0 || refstr == 0)
err_throw(VMERR_BAD_TYPE_BIF);
/* compare the strings and return the result */
retval->set_int(match_strings(valstr + VMB_LEN, vmb_get_len(valstr),
refstr + VMB_LEN, vmb_get_len(refstr)));
/* discard the gc protection */
G_stk->discard(2);
/* handled */
return TRUE;
}
/*
* Match two strings
*/
unsigned long CVmObjStrComp::match_strings(const char *valstr, size_t vallen,
const char *refstr, size_t reflen)
{
vmobj_strcmp_ext *ext = get_ext();
utf8_ptr valp;
utf8_ptr refp;
unsigned long ret;
int fold_case = !(ext->case_sensitive);
size_t valcharlen;
/* set up to scan the strings */
valp.set((char *)valstr);
refp.set((char *)refstr);
/* start with no return flags */
ret = 0;
/* scan the strings */
for (valcharlen = 0 ; vallen != 0 && reflen != 0 ; refp.inc(&reflen))
{
wchar_t valch;
wchar_t refch;
vmobj_strcmp_equiv **t1;
vmobj_strcmp_equiv *eq;
/* get each character */
valch = valp.getch();
refch = refp.getch();
/* check for an exact match first */
if (refch == valch)
{
/* it's an exact match - skip this input character */
valp.inc(&vallen);
++valcharlen;
continue;
}
/* check for a case-folded match if we're insensitive to case */
if (fold_case && t3_to_lower(valch) == t3_to_lower(refch))
{
/* it's a case-folded match - skip this input character */
valp.inc(&vallen);
++valcharlen;
/* note in the flags that we have differing cases in the match */
ret |= RF_CASEFOLD;
/* keep going */
continue;
}
/* check for a reference equivalence mapping */
if ((t1 = ext->equiv[(refch >> 8) & 0xFF]) != 0
&& (eq = t1[refch & 0xFF]) != 0)
{
wchar_t *vp;
size_t vlen;
/*
* In case we match, apply the appropriate flags added for the
* equivalence mapping, based on the case of the first value
* character we're testing. (If we don't match, we'll simply
* return failure, so it won't matter that we messed with the
* flags.)
*/
ret |= (t3_is_upper(valch) ? eq->uc_result_flags
: eq->lc_result_flags);
/* match each character from the mapping string */
for (vp = eq->val_ch, vlen = eq->val_ch_cnt ;
vallen != 0 && vlen != 0 ; ++vp, --vlen)
{
/* get this character */
refch = *vp;
/* if we have an exact match, keep going */
if (refch == valch)
{
/* matched - skip this character and keep going */
valp.inc(&vallen);
++valcharlen;
continue;
}
/* check for a case-folded match if appropriate */
if (fold_case && t3_to_lower(valch) == t3_to_lower(refch))
{
/* matched - skip this input character */
valp.inc(&vallen);
++valcharlen;
/* note the case-folded match and keep going */
ret |= RF_CASEFOLD;
continue;
}
/* no match */
return 0;
}
/*
* if we make it here, we matched the equivalence mapping -
* we've already skipped the input we matched, so simply keep
* going
*/
continue;
}
/* we don't have anything else to try, so we don't have a match */
return 0;
}
/*
* If we ran out of reference string before we ran out of value
* string, we definitely do not have a match. If we ran out of value
* string before we ran out reference string, we have a match as long
* as we matched at least the truncation length.
*/
if (reflen == 0 && vallen == 0)
{
/*
* We ran out of both at the same time - it's a match. Return the
* result code up to this point OR'd with RF_MATCH, which is our
* pre-defined bit that we set for every match.
*/
return (ret | RF_MATCH);
}
else if (vallen != 0)
{
/* we ran out of reference string first - it's not a match */
return 0;
}
else
{
/*
* We ran out of value string first, so it's a truncated match if
* we matched at least up to the truncation length (assuming we
* allow truncation at all). If we didn't make it to the
* truncation length, or we don't allow truncation, it's not a
* match.
*/
if (ext->trunc_len != 0 && valcharlen >= ext->trunc_len)
{
/*
* it's a truncated match - return the result code up to this
* point, OR'd with RF_MATCH (our pre-defined bit we set for
* every match) and RF_TRUNC (our pre-defined bit we set for
* truncated matches)
*/
return (ret | RF_MATCH | RF_TRUNC);
}
else
{
/* didn't make it to the truncation length, so it's not a match */
return 0;
}
}
}
|