cfad47cfa3/tads3/vmstr.cpp

4b825dc642cb6eb9a060e54bf8d69288fbee4904cfad47cfa334b206c65f22086bcc5d63e6f70944
1
#ifdef RCSID
2
static char RCSid[] =
3
"$Header: d:/cvsroot/tads/tads3/VMSTR.CPP,v 1.3 1999/05/17 02:52:28 MJRoberts Exp $";
4
#endif
5
6
/* 
7
 *   Copyright (c) 1998, 2002 Michael J. Roberts.  All Rights Reserved.
8
 *   
9
 *   Please see the accompanying license file, LICENSE.TXT, for information
10
 *   on using and copying this software.  
11
 */
12
/*
13
Name
14
  vmstr.cpp - VM string metaclass implementation
15
Function
16
  
17
Notes
18
  
19
Modified
20
  10/28/98 MJRoberts  - Creation
21
*/
22
23
#include <stdlib.h>
24
#include <stdio.h>
25
#include <string.h>
26
27
#include "t3std.h"
28
#include "vmmcreg.h"
29
#include "vmobj.h"
30
#include "vmstr.h"
31
#include "utf8.h"
32
#include "vmerr.h"
33
#include "vmerrnum.h"
34
#include "vmfile.h"
35
#include "vmstack.h"
36
#include "vmpool.h"
37
#include "vmmeta.h"
38
#include "vmrun.h"
39
#include "vmbif.h"
40
#include "vmpredef.h"
41
#include "vmlst.h"
42
#include "vmuni.h"
43
#include "vmcset.h"
44
#include "vmbytarr.h"
45
#include "charmap.h"
46
47
48
/* ------------------------------------------------------------------------ */
49
/*
50
 *   statics 
51
 */
52
53
/* metaclass registration object */
54
static CVmMetaclassString metaclass_reg_obj;
55
CVmMetaclass *CVmObjString::metaclass_reg_ = &metaclass_reg_obj;
56
57
/* function table */
58
int (*CVmObjString::func_table_[])(VMG_ vm_val_t *retval,
59
                                   const vm_val_t *self_val,
60
                                   const char *str, uint *argc) =
61
{
62
    &CVmObjString::getp_undef,
63
    &CVmObjString::getp_len,
64
    &CVmObjString::getp_substr,
65
    &CVmObjString::getp_upper,
66
    &CVmObjString::getp_lower,
67
    &CVmObjString::getp_find,
68
    &CVmObjString::getp_to_uni,
69
    &CVmObjString::getp_htmlify,
70
    &CVmObjString::getp_starts_with,
71
    &CVmObjString::getp_ends_with,
72
    &CVmObjString::getp_to_byte_array,
73
    &CVmObjString::getp_replace
74
};
75
76
/* ------------------------------------------------------------------------ */
77
/*
78
 *   Static creation methods 
79
 */
80
81
82
/* create dynamically using stack arguments */
83
vm_obj_id_t CVmObjString::create_from_stack(VMG_ const uchar **, uint)
84
{
85
    /* dynamic string construction is not currently supported */
86
    err_throw(VMERR_BAD_DYNAMIC_NEW);
87
    
88
    /* the compiler doesn't know we won't make it here */
89
    AFTER_ERR_THROW(return VM_INVALID_OBJ;)
90
}
91
92
/* create a string with no initial contents */
93
vm_obj_id_t CVmObjString::create(VMG_ int in_root_set)
94
{
95
    vm_obj_id_t id = vm_new_id(vmg_ in_root_set, FALSE, FALSE);
96
    new (vmg_ id) CVmObjString();
97
    return id;
98
}
99
100
/* create with a given buffer size */
101
vm_obj_id_t CVmObjString::create(VMG_ int in_root_set, size_t byte_size)
102
{
103
    vm_obj_id_t id = vm_new_id(vmg_ in_root_set, FALSE, FALSE);
104
    new (vmg_ id) CVmObjString(vmg_ byte_size);
105
    return id;
106
}
107
108
/* create from a constant UTF-8 string */
109
vm_obj_id_t CVmObjString::create(VMG_ int in_root_set,
110
                                 const char *str, size_t bytelen)
111
{
112
    vm_obj_id_t id = vm_new_id(vmg_ in_root_set, FALSE, FALSE);
113
    new (vmg_ id) CVmObjString(vmg_ str, bytelen);
114
    return id;
115
}
116
117
/* ------------------------------------------------------------------------ */
118
/*
119
 *   Constructors 
120
 */
121
122
/*
123
 *   create a string object with a given buffer size
124
 */
125
CVmObjString::CVmObjString(VMG_ size_t len)
126
{
127
    /* 
128
     *   the length is limited to an unsigned 16-bit value (NB: it really is
129
     *   65535 on ALL PLATFORMS - this is a portable limit imposed by the
130
     *   portable storage format, not a local platform limit) 
131
     */
132
    if (len > 65535)
133
    {
134
        ext_ = 0;
135
        err_throw(VMERR_STR_TOO_LONG);
136
    }
137
    
138
    /* 
139
     *   allocate space for the buffer plus the length prefix in the
140
     *   variable heap 
141
     */
142
    ext_ = (char *)G_mem->get_var_heap()->alloc_mem(len + VMB_LEN, this);
143
144
    /* set the length */
145
    vmb_put_len(ext_, len);
146
}
147
148
/*
149
 *   create a string object from a given UTF8 string constant
150
 */
151
CVmObjString::CVmObjString(VMG_ const char *str, size_t len)
152
{
153
    /* check for the length limit */
154
    if (len > 65535)
155
    {
156
        ext_ = 0;
157
        err_throw(VMERR_STR_TOO_LONG);
158
    }        
159
160
    /* 
161
     *   allocate space for the string plus the length prefix in the
162
     *   variable heap 
163
     */
164
    ext_ = (char *)G_mem->get_var_heap()->alloc_mem(len + VMB_LEN, this);
165
166
    /* 
167
     *   store the length prefix in portable format (so that we can easily
168
     *   write our contents to a saved state file) 
169
     */
170
    vmb_put_len(ext_, len);
171
172
    /* copy the string's bytes */
173
    memcpy(ext_ + VMB_LEN, str, len);
174
}
175
176
/* ------------------------------------------------------------------------ */
177
/*
178
 *   receive notification of deletion 
179
 */
180
void CVmObjString::notify_delete(VMG_ int in_root_set)
181
{
182
    /* free our extension */
183
    if (ext_ != 0 && !in_root_set)
184
        G_mem->get_var_heap()->free_mem(ext_);
185
}
186
187
/* ------------------------------------------------------------------------ */
188
/*
189
 *   Set a property.  Strings have no settable properties, so simply
190
 *   signal an error indicating that the set-prop call is invalid.  
191
 */
192
void CVmObjString::set_prop(VMG_ CVmUndo *, vm_obj_id_t,
193
                            vm_prop_id_t, const vm_val_t *)
194
{
195
    err_throw(VMERR_INVALID_SETPROP);
196
}
197
198
/* ------------------------------------------------------------------------ */
199
/*
200
 *   Save the object to a file 
201
 */
202
void CVmObjString::save_to_file(VMG_ CVmFile *fp)
203
{
204
    size_t len;
205
    
206
    /* get our length */
207
    len = vmb_get_len(ext_);
208
209
    /* write the length prefix and the string */
210
    fp->write_bytes(ext_, len + VMB_LEN);
211
}
212
213
/*
214
 *   Restore the object from a file 
215
 */
216
void CVmObjString::restore_from_file(VMG_ vm_obj_id_t,
217
                                     CVmFile *fp, CVmObjFixup *)
218
{
219
    size_t len;
220
    
221
    /* read the length prefix */
222
    len = fp->read_uint2();
223
224
    /* free any existing extension */
225
    if (ext_ != 0)
226
    {
227
        G_mem->get_var_heap()->free_mem(ext_);
228
        ext_ = 0;
229
    }
230
231
    /* 
232
     *   allocate our extension - make room for the length prefix plus the
233
     *   bytes of the string
234
     */
235
    ext_ = (char *)G_mem->get_var_heap()->alloc_mem(len + VMB_LEN, this);
236
237
    /* store our length prefix */
238
    vmb_put_len(ext_, len);
239
240
    /* read the string */
241
    fp->read_bytes(ext_ + VMB_LEN, len);
242
}
243
244
/* ------------------------------------------------------------------------ */
245
/*
246
 *   Add a value to this string 
247
 */
248
void CVmObjString::add_val(VMG_ vm_val_t *result,
249
                           vm_obj_id_t self, const vm_val_t *val)
250
{
251
    /* 
252
     *   Use the generic string adder, using my extension as the constant
253
     *   string.  We store our extension in the general string format
254
     *   required by the static adder. 
255
     */
256
    add_to_str(vmg_ result, self, ext_, val);
257
}
258
259
/*
260
 *   Static string adder.  This creates a new string object that results
261
 *   from appending the given value to the given string constant.  This is
262
 *   defined statically so that this same code can be shared for adding to
263
 *   constant pool strings and adding to CVmObjString objects.
264
 *   
265
 *   'strval' must point to a constant string.  The first two bytes of the
266
 *   string are stored in portable UINT2 format and give the length in
267
 *   bytes of the string, not including the length prefix; immediately
268
 *   following the length prefix are the bytes of the string.
269
 *   
270
 *   Note that we *always* create a new object to hold the result, even if
271
 *   the new string is identical to the first, so that we consistently
272
 *   return a distinct reference from the original.  
273
 */
274
void CVmObjString::add_to_str(VMG_ vm_val_t *result,
275
                              vm_obj_id_t self, const char *strval1,
276
                              const vm_val_t *val)
277
{
278
    const char *strval2;
279
    char buf[128];
280
    vm_obj_id_t obj;
281
    size_t len1, len2;
282
    CVmObjString *objptr;
283
    vm_val_t new_obj2;
284
        
285
    /* convert the value to be appended to a string */
286
    strval2 = cvt_to_str(vmg_ &new_obj2, buf, sizeof(buf), val, 10);
287
288
    /* 
289
     *   push the new string (if any) and self, to protect the two strings
290
     *   from garbage collection 
291
     */
292
    G_stk->push()->set_obj(self);
293
    G_stk->push(&new_obj2);
294
295
    /* get the lengths of the two strings */
296
    len1 = vmb_get_len(strval1);
297
    len2 = vmb_get_len(strval2);
298
299
    /* create a new string object to hold the result */
300
    obj = create(vmg_ FALSE, len1 + len2);
301
    objptr = (CVmObjString *)vm_objp(vmg_ obj);
302
303
    /* copy the two strings into the new object's string buffer */
304
    objptr->copy_into_str(0, strval1 + VMB_LEN, len1);
305
    objptr->copy_into_str(len1, strval2 + VMB_LEN, len2);
306
307
    /* we're done with the garbage collection protection */
308
    G_stk->discard(2);
309
310
    /* return the new object in the result */
311
    result->set_obj(obj);
312
}
313
314
315
/* ------------------------------------------------------------------------ */
316
/*
317
 *   Allocate a string buffer large enough to hold a given value.  We'll
318
 *   use the provided buffer if possible.
319
 *   
320
 *   If the provided buffer is null or is not large enough, we'll allocate
321
 *   a new string object with a large enough buffer to hold the value, and
322
 *   return the object's extension as the buffer.  This object will never
323
 *   be referenced by anyone, so it will be deleted at the next garbage
324
 *   collection.
325
 *   
326
 *   The buffer size and requested size are in bytes.  
327
 */
328
char *CVmObjString::alloc_str_buf(VMG_ vm_val_t *new_obj,
329
                                  char *buf, size_t buf_size,
330
                                  size_t required_size)
331
{
332
    vm_obj_id_t obj;
333
    
334
    /* if the provided buffer is large enough, use it */
335
    if (buf != 0 && buf_size >= required_size)
336
    {
337
        /* there's no new object */
338
        new_obj->set_nil();
339
        
340
        /* return the buffer */
341
        return buf;
342
    }
343
344
    /* allocate a new string object */
345
    obj = create(vmg_ FALSE, required_size);
346
347
    /* return the new object's string buffer */
348
    return (char *)vm_objp(vmg_ obj)->cast_to_string(vmg_ obj, new_obj);
349
}
350
351
/* ------------------------------------------------------------------------ */
352
/*
353
 *   Convert a value to a string 
354
 */
355
const char *CVmObjString::cvt_to_str(VMG_ vm_val_t *new_str,
356
                                     char *result_buf,
357
                                     size_t result_buf_size,
358
                                     const vm_val_t *val, int radix)
359
{
360
    /* presume we won't need to create a new string object */
361
    new_str->set_nil();
362
    
363
    /* check the type of the value */
364
    switch(val->typ)
365
    {
366
    case VM_SSTRING:
367
        /* it's a string constant - no conversion is necessary */
368
        return G_const_pool->get_ptr(val->val.ofs);
369
370
    case VM_OBJ:
371
        /* it's an object - ask it for its string representation */
372
        return vm_objp(vmg_ val->val.obj)
373
            ->cast_to_string(vmg_ val->val.obj, new_str);
374
        break;
375
376
    case VM_INT:
377
        /* 
378
         *   It's a number - convert it to a string.  Use the provided result
379
         *   buffer if possible, but make sure we have room for the number.
380
         *   The unicode values we're storing are in the ascii range, so we
381
         *   only need one byte per character.  The longest buffer we'd need,
382
         *   then, is 32 bytes, for a conversion to a binary digit string.
383
         *   The conversion also needs two bytes for the length prefix; give
384
         *   it a few extra bytes as insurance against future algorithm
385
         *   changes that might need more padding.  
386
         */
387
        result_buf = alloc_str_buf(vmg_ new_str,
388
                                   result_buf, result_buf_size, 40);
389
390
        /* generate the string */
391
        return cvt_int_to_str(result_buf, 40, val->val.intval, radix);
392
393
    case VM_NIL:
394
        /* nil - use the literal string "nil" */
395
        return "\003\000nil";
396
        break;
397
398
    case VM_TRUE:
399
        /* true - use the literal string "true" */
400
        return "\004\000true";
401
        break;
402
403
    default:
404
        /* other types cannot be added to a string */
405
        err_throw(VMERR_NO_STR_CONV);
406
407
        /* we never really get here, but the compiler doesn't know that */
408
        AFTER_ERR_THROW(return 0;)
409
    }
410
}
411
412
/* ------------------------------------------------------------------------ */
413
/*
414
 *   Convert an integer to a string, storing the result in the given
415
 *   buffer in portable string format (with length prefix).  The radix
416
 *   must be 8, 10, or 16.  
417
 *   
418
 *   Decimal numbers are treated as signed, and a leading dash is included
419
 *   if the number is negative.  Octal and hex numbers are treated as
420
 *   unsigned.
421
 *   
422
 *   For efficiency, we store the number at the end of the buffer (this
423
 *   makes it easy to generate the number, since we need to generate
424
 *   numerals in reverse order).  We return a pointer to the result, which
425
 *   may not start at the beginning of the buffer.  
426
 */
427
char *CVmObjString::cvt_int_to_str(char *buf, size_t buflen,
428
                                   int32 inval, int radix)
429
{
430
    int neg;
431
    uint32 val;
432
    char *p;
433
    size_t len;
434
435
    /* start at the end of the buffer */
436
    p = buf + buflen;
437
438
    /* 
439
     *   if it's negative, and we're converting to decimal representation,
440
     *   treat the value as signed and use a leading minus sign;
441
     *   otherwise, treat the value as unsigned 
442
     */
443
    if (radix == 10 && inval < 0)
444
    {
445
        /* note that we need a minus sign */
446
        neg = TRUE;
447
448
        /* use the positive value for the conversion */
449
        val = (uint32)(-inval);
450
    }
451
    else
452
    {
453
        /* the value is positive (or at least unsigned) */
454
        neg = FALSE;
455
456
        /* use the value as-is */
457
        val = (uint32)inval;
458
    }
459
460
    /* store numerals in reverse order */
461
    do
462
    {
463
        char c;
464
465
        /* if we have no more room, throw an error */
466
        if (p == buf)
467
            err_throw(VMERR_CONV_BUF_OVF);
468
        
469
        /* move on to the next available character in the buffer */
470
        --p;
471
472
        /* figure the character representation of this numeral */
473
        c = (char)(val % radix);
474
        if (c < 10)
475
            c += '0';
476
        else
477
            c += 'A' - 10;
478
479
        /* store the numeral at the current location */
480
        *p = c;
481
482
        /* divide the remaining number by the radix */
483
        val /= radix;
484
    } while (val != 0);
485
486
    /* store the leading minus sign if necessary */
487
    if (neg)
488
    {
489
        /* if we don't have room, throw an error */
490
        if (p == buf)
491
            err_throw(VMERR_CONV_BUF_OVF);
492
493
        /* move to the next byte */
494
        --p;
495
496
        /* store the minus sign */
497
        *p = '-';
498
    }
499
500
    /* calculate the length */
501
    len = buflen - (p - buf);
502
503
    /* make sure we have room for the length prefix */
504
    if (p < buf + 2)
505
        err_throw(VMERR_CONV_BUF_OVF);
506
507
    /* store the length prefix */
508
    p -= 2;
509
    vmb_put_len(p, len);
510
511
    /* return the pointer to the start of the number */
512
    return p;
513
}
514
515
/* ------------------------------------------------------------------------ */
516
/*
517
 *   Check a value for equality 
518
 */
519
int CVmObjString::equals(VMG_ vm_obj_id_t self,
520
                         const vm_val_t *val, int /*depth*/) const
521
{
522
    /* if the other value is a reference to myself, we certainly match */
523
    if (val->typ == VM_OBJ && val->val.obj == self)
524
        return TRUE;
525
526
    /* 
527
     *   use the constant string comparison routine, using our underlying
528
     *   string as the constant string data 
529
     */
530
    return const_equals(vmg_ ext_, val);
531
}
532
533
/*
534
 *   Constant string equality test
535
 */
536
int CVmObjString::const_equals(VMG_ const char *str, const vm_val_t *val)
537
{
538
    const char *str2;
539
    size_t len;
540
541
    /* get the other value as a string */
542
    str2 = val->get_as_string(vmg0_);
543
544
    /* if the object doesn't have an underlying string, we don't match */
545
    if (str2 == 0)
546
        return FALSE;
547
548
    /* 
549
     *   if their lengths match, and the bytes match exactly, we have a
550
     *   match; otherwise, they're not equal 
551
     */
552
    len = vmb_get_len(str);
553
    return (len == vmb_get_len(str2)
554
            && memcmp(str + VMB_LEN, str2 + VMB_LEN, len) == 0);
555
}
556
557
/* ------------------------------------------------------------------------ */
558
/*
559
 *   Hash value 
560
 */
561
uint CVmObjString::calc_hash(VMG_ vm_obj_id_t self, int /*depth*/) const
562
{
563
    return const_calc_hash(ext_);
564
}
565
566
/*
567
 *   Hash value calculation 
568
 */
569
uint CVmObjString::const_calc_hash(const char *str)
570
{
571
    size_t len;
572
    uint hash;
573
    utf8_ptr p;
574
    
575
    /* get and skip the length prefix */
576
    len = vmb_get_len(str);
577
    str += VMB_LEN;
578
579
    /* scan the string and calculate the hash */
580
    for (p.set((char *)str), hash = 0 ; len != 0 ; p.inc(&len))
581
        hash += p.getch();
582
583
    /* return the result */
584
    return hash;
585
}
586
587
588
/* ------------------------------------------------------------------------ */
589
/*
590
 *   Compare this string to another value 
591
 */
592
int CVmObjString::compare_to(VMG_ vm_obj_id_t /*self*/,
593
                             const vm_val_t *val) const
594
{
595
    /* use the static string magnitude comparison routine */
596
    return const_compare(vmg_ ext_, val);
597
}
598
599
/*
600
 *   Compare a constant string value to another value.  Returns a positive
601
 *   number if the constant string is lexically greater than the other
602
 *   value, a negative number if the constant string is lexically less
603
 *   than the other value, or zero if the constant string is lexically
604
 *   identical to the other value.
605
 *   
606
 *   The other value must be a string constant or an object with an
607
 *   underlying string value.  We'll throw an error for any other type of
608
 *   value.  
609
 */
610
int CVmObjString::const_compare(VMG_ const char *str1, const vm_val_t *val)
611
{
612
    const char *str2;
613
    size_t len1, len2;
614
615
    /* get the other value as a string */
616
    str2 = val->get_as_string(vmg0_);
617
618
    /* if it's not a string, we can't compare it */
619
    if (str2 == 0)
620
        err_throw(VMERR_INVALID_COMPARISON);
621
622
    /* get the lengths of the two strings */
623
    len1 = vmb_get_len(str1);
624
    len2 = vmb_get_len(str2);
625
626
    /* perform a lexical comparison and return the result */
627
    return utf8_ptr::s_compare_to(str1 + VMB_LEN, len1, str2 + VMB_LEN, len2);
628
}
629
630
/* ------------------------------------------------------------------------ */
631
/*
632
 *   Find a substring within a string 
633
 */
634
const char *CVmObjString::find_substr(VMG_ const char *str, int start_idx,
635
                                      const char *substr, size_t *idxp)
636
{
637
    utf8_ptr p;
638
    size_t rem;
639
    size_t sublen;
640
    size_t char_ofs;
641
    int i;
642
    
643
    /* get the lengths */
644
    rem = vmb_get_len(str);
645
    sublen = vmb_get_len(substr);
646
647
    /* set up utf8 pointer into the string */
648
    p.set((char *)str + 2);
649
650
    /* skip to the starting index */
651
    for (i = start_idx ; i > 0 && rem >= sublen ; --i, p.inc(&rem)) ;
652
653
    /* scan for the substring */
654
    for (char_ofs = 0 ; rem != 0 && rem >= sublen ; ++char_ofs, p.inc(&rem))
655
    {
656
        /* check for a match */
657
        if (memcmp(p.getptr(), substr + VMB_LEN, sublen) == 0)
658
        {
659
            /* it's a match - set the return index if they are interested */
660
            if (idxp != 0)
661
                *idxp = char_ofs + start_idx;
662
663
            /* return the current pointer */
664
            return p.getptr();
665
        }
666
    }
667
668
    /* we didn't find it - so indicate by returning null */
669
    return 0;
670
}
671
672
/* ------------------------------------------------------------------------ */
673
/*
674
 *   Evaluate a property 
675
 */
676
int CVmObjString::get_prop(VMG_ vm_prop_id_t prop, vm_val_t *retval,
677
                           vm_obj_id_t self, vm_obj_id_t *source_obj,
678
                           uint *argc)
679
{
680
    vm_val_t self_val;
681
    
682
    /* use the constant evaluator */
683
    self_val.set_obj(self);
684
    if (const_get_prop(vmg_ retval, &self_val, ext_, prop, source_obj, argc))
685
    {
686
        *source_obj = metaclass_reg_->get_class_obj(vmg0_);
687
        return TRUE;
688
    }
689
690
    /* inherit default handling from the base object class */
691
    return CVmObject::get_prop(vmg_ prop, retval, self, source_obj, argc);
692
}
693
694
/* ------------------------------------------------------------------------ */
695
/*
696
 *   Evaluate a property of a constant string value 
697
 */
698
int CVmObjString::const_get_prop(VMG_ vm_val_t *retval,
699
                                 const vm_val_t *self_val, const char *str,
700
                                 vm_prop_id_t prop, vm_obj_id_t *src_obj,
701
                                 uint *argc)
702
{
703
    uint func_idx;
704
705
    /* presume no source object */
706
    *src_obj = VM_INVALID_OBJ;
707
708
    /* translate the property index to an index into our function table */
709
    func_idx = G_meta_table
710
               ->prop_to_vector_idx(metaclass_reg_->get_reg_idx(), prop);
711
712
    /* call the appropriate function */
713
    if ((*func_table_[func_idx])(vmg_ retval, self_val, str, argc))
714
        return TRUE;
715
716
    /* 
717
     *   If this is a constant string (which is indicated by an invalid
718
     *   'self' object ID), try inheriting the default object
719
     *   interpretation, passing the constant string placeholder object
720
     *   for its type information.  
721
     */
722
    if (self_val->typ != VM_OBJ)
723
    {
724
        /* try going to CVmObject directly */
725
        if (vm_objp(vmg_ G_predef->const_str_obj)
726
            ->CVmObject::get_prop(vmg_ prop, retval, G_predef->const_str_obj,
727
                                  src_obj, argc))
728
            return TRUE;
729
    }
730
731
    /* not handled */
732
    return FALSE;
733
}
734
735
/* ------------------------------------------------------------------------ */
736
/*
737
 *   property evaluator - get the length 
738
 */
739
int CVmObjString::getp_len(VMG_ vm_val_t *retval, const vm_val_t *self_val,
740
                           const char *str, uint *argc)
741
{
742
    utf8_ptr p;
743
    static CVmNativeCodeDesc desc(0);
744
    
745
    /* check arguments */
746
    if (get_prop_check_argc(retval, argc, &desc))
747
        return TRUE;
748
749
    /* set up a utf-8 pointer to the string's contents */
750
    p.set((char *)str + VMB_LEN);
751
752
    /* return the character length of the string */
753
    retval->set_int(p.len(vmb_get_len(str)));
754
755
    /* handled */
756
    return TRUE;
757
}
758
759
/* ------------------------------------------------------------------------ */
760
/*
761
 *   property evaluator - extract a substring
762
 */
763
int CVmObjString::getp_substr(VMG_ vm_val_t *retval, const vm_val_t *self_val,
764
                              const char *str, uint *in_argc)
765
{
766
    long start;
767
    ulong len = 0;
768
    size_t rem;
769
    utf8_ptr p;
770
    utf8_ptr start_p;
771
    size_t start_rem;
772
    size_t new_len;
773
    vm_obj_id_t obj;
774
    uint argc = (in_argc == 0 ? 0 : *in_argc);
775
    static CVmNativeCodeDesc desc(1, 1);
776
777
    /* check arguments */
778
    if (get_prop_check_argc(retval, in_argc, &desc))
779
        return TRUE;
780
781
    /* pop the starting index */
782
    start = CVmBif::pop_long_val(vmg0_);
783
784
    /* pop the length, if present */
785
    if (argc >= 2)
786
        len = CVmBif::pop_long_val(vmg0_);
787
788
    /* push a self-reference to protect against GC */
789
    G_stk->push(self_val);
790
791
    /* set up a utf8 pointer to traverse the string */
792
    p.set((char *)str + VMB_LEN);
793
794
    /* get the byte length of the string */
795
    rem = vmb_get_len(str);
796
797
    /* 
798
     *   Skip ahead to the starting index.  If the index is positive, it's
799
     *   an index from the start of the string; if it's negative, it's an
800
     *   offset from the end of the string.  
801
     */
802
    if (start > 0)
803
    {
804
        /* 
805
         *   it's an index from the start - skip ahead by start-1 characters
806
         *   (since a start value of 1 tells us to start at the first
807
         *   character) 
808
         */
809
        for ( ; start > 1 && rem != 0 ; --start)
810
            p.inc(&rem);
811
    }
812
    else if (start < 0)
813
    {
814
        /*
815
         *   It's an index from the end of the string: -1 tells us to start
816
         *   at the last character, -2 at the second to last, and so on.
817
         *   Move to the first byte past the end of the string, and work
818
         *   backwards by the given number of characters.  
819
         */
820
        for (p.set((char *)str + VMB_LEN + rem), rem = 0 ;
821
             start < 0 && p.getptr() != (char *)str + VMB_LEN ; ++start)
822
        {
823
            /* move back one character */
824
            p.dec(&rem);
825
        }
826
    }
827
828
    /* this is the starting position */
829
    start_p = p;
830
    start_rem = rem;
831
832
    /* 
833
     *   if a length was specified, calculate the number of bytes in the
834
     *   given length; otherwise, use the entire remainder of the string 
835
     */
836
    if (argc >= 2)
837
    {
838
        /* keep skipping ahead by the desired length */
839
        for ( ; len > 0 && rem != 0 ; --len)
840
            p.inc(&rem);
841
842
        /* use the difference in lengths from the starting point to here */
843
        new_len = start_rem - rem;
844
    }
845
    else
846
    {
847
        /* use the entire remainder of the string */
848
        new_len = start_rem;
849
    }
850
851
    /* create the new string */
852
    obj = CVmObjString::create(vmg_ FALSE, start_p.getptr(), new_len);
853
854
    /* return the new object */
855
    retval->set_obj(obj);
856
857
    /* discard the GC protection references */
858
    G_stk->discard();
859
860
    /* handled */
861
    return TRUE;
862
}
863
864
/* ------------------------------------------------------------------------ */
865
/*
866
 *   property evaluator - toUpper
867
 */
868
int CVmObjString::getp_upper(VMG_ vm_val_t *retval, const vm_val_t *self_val,
869
                             const char *str, uint *argc)
870
{
871
    size_t srclen;
872
    size_t dstlen;
873
    size_t rem;
874
    utf8_ptr srcp;
875
    utf8_ptr dstp;
876
    vm_obj_id_t result_obj;
877
    static CVmNativeCodeDesc desc(0);
878
879
    /* check arguments */
880
    if (get_prop_check_argc(retval, argc, &desc))
881
        return TRUE;
882
883
    /* get my length */
884
    srclen = vmb_get_len(str);
885
886
    /* leave the string on the stack as GC protection */
887
    G_stk->push(self_val);
888
889
    /* 
890
     *   Scan the string to determine how long the result will be.  The
891
     *   result won't necessarily be the same length as the original,
892
     *   because a two-byte character in the original could turn into a
893
     *   three-byte character in the result, and vice versa.  (We could
894
     *   allocate a result buffer three times the length of the original,
895
     *   but this seems more wasteful of space than scanning the string
896
     *   twice is wasteful of time.  It's a trade-off, though.)  
897
     */
898
    for (dstlen = 0, srcp.set((char *)str + VMB_LEN), rem = srclen ;
899
         rem != 0 ; srcp.inc(&rem))
900
    {
901
        /* get the size of the mapping for this character */
902
        dstlen += utf8_ptr::s_wchar_size(t3_to_upper(srcp.getch()));
903
    }
904
905
    /* allocate the result string */
906
    result_obj = CVmObjString::create(vmg_ FALSE, dstlen);
907
908
    /* get a pointer to the result buffer */
909
    dstp.set(((CVmObjString *)vm_objp(vmg_ result_obj))->cons_get_buf());
910
911
    /* write the string */
912
    for (srcp.set((char *)str + VMB_LEN), rem = srclen ;
913
         rem != 0 ; srcp.inc(&rem))
914
    {
915
        /* write the next character */
916
        dstp.setch(t3_to_upper(srcp.getch()));
917
    }
918
919
    /* return the value */
920
    retval->set_obj(result_obj);
921
922
    /* discard GC protection */
923
    G_stk->discard();
924
925
    /* handled */
926
    return TRUE;
927
}
928
929
/* ------------------------------------------------------------------------ */
930
/*
931
 *   property evaluator - toLower
932
 */
933
int CVmObjString::getp_lower(VMG_ vm_val_t *retval, const vm_val_t *self_val,
934
                             const char *str, uint *argc)
935
{
936
    size_t srclen;
937
    size_t dstlen;
938
    size_t rem;
939
    utf8_ptr srcp;
940
    utf8_ptr dstp;
941
    vm_obj_id_t result_obj;
942
    static CVmNativeCodeDesc desc(0);
943
944
    /* check arguments */
945
    if (get_prop_check_argc(retval, argc, &desc))
946
        return TRUE;
947
948
    /* get my length */
949
    srclen = vmb_get_len(str);
950
951
    /* leave the string on the stack as GC protection */
952
    G_stk->push(self_val);
953
954
    /* 
955
     *   Scan the string to determine how long the result will be.  The
956
     *   result won't necessarily be the same length as the original,
957
     *   because a two-byte character in the original could turn into a
958
     *   three-byte character in the result, and vice versa.  (We could
959
     *   allocate a result buffer three times the length of the original,
960
     *   but this seems more wasteful of space than scanning the string
961
     *   twice is wasteful of time.  It's a trade-off, though.)  
962
     */
963
    for (dstlen = 0, srcp.set((char *)str + VMB_LEN), rem = srclen ;
964
         rem != 0 ; srcp.inc(&rem))
965
    {
966
        /* get the size of the mapping for this character */
967
        dstlen += utf8_ptr::s_wchar_size(t3_to_lower(srcp.getch()));
968
    }
969
970
    /* allocate the result string */
971
    result_obj = CVmObjString::create(vmg_ FALSE, dstlen);
972
973
    /* get a pointer to the result buffer */
974
    dstp.set(((CVmObjString *)vm_objp(vmg_ result_obj))->cons_get_buf());
975
976
    /* write the string */
977
    for (srcp.set((char *)str + VMB_LEN), rem = srclen ;
978
         rem != 0 ; srcp.inc(&rem))
979
    {
980
        /* write the next character */
981
        dstp.setch(t3_to_lower(srcp.getch()));
982
    }
983
984
    /* return the value */
985
    retval->set_obj(result_obj);
986
987
    /* discard GC protection */
988
    G_stk->discard();
989
990
    /* handled */
991
    return TRUE;
992
}
993
994
/* ------------------------------------------------------------------------ */
995
/*
996
 *   property evaluator - find
997
 */
998
int CVmObjString::getp_find(VMG_ vm_val_t *retval, const vm_val_t *self_val,
999
                            const char *str, uint *argc)
1000
{
1001
    const char *str2;
1002
    size_t idx;
1003
    uint orig_argc = (argc != 0 ? *argc : 0);
1004
    static CVmNativeCodeDesc desc(1, 1);
1005
    int start_idx;
1006
    
1007
    /* check arguments */
1008
    if (get_prop_check_argc(retval, argc, &desc))
1009
        return TRUE;
1010
1011
    /* retrieve the string to find */
1012
    str2 = CVmBif::pop_str_val(vmg0_);
1013
1014
    /* if there's a starting index, retrieve it */
1015
    start_idx = (orig_argc >= 2 ? CVmBif::pop_int_val(vmg0_) - 1 : 0);
1016
    
1017
    /* find the substring */
1018
    if (find_substr(vmg_ str, start_idx, str2, &idx) != 0)
1019
    {
1020
        /* we found it - adjust to a 1-based value for return */
1021
        retval->set_int(idx + 1);
1022
    }
1023
    else
1024
    {
1025
        /* didn't find it - return nil */
1026
        retval->set_nil();
1027
    }
1028
1029
    /* handled */
1030
    return TRUE;
1031
}
1032
1033
/* ------------------------------------------------------------------------ */
1034
/*
1035
 *   replace flags 
1036
 */
1037
#define GETP_RPL_ALL    0x0001
1038
1039
/*
1040
 *   property evaluator - replace 
1041
 */
1042
int CVmObjString::getp_replace(VMG_ vm_val_t *retval,
1043
                               const vm_val_t *self_val,
1044
                               const char *str, uint *argc)
1045
{
1046
    vm_val_t arg1;
1047
    vm_val_t arg2;
1048
    const char *substr;
1049
    const char *rplstr;
1050
    size_t sublen;
1051
    size_t rpllen;
1052
    uint orig_argc = (argc != 0 ? *argc : 0);
1053
    static CVmNativeCodeDesc desc(3, 1);
1054
    int flags;
1055
    utf8_ptr p;
1056
    size_t rem;
1057
    size_t new_len;
1058
    int found;
1059
    int start_idx;
1060
    const char *rpl_start;
1061
1062
    /* check arguments */
1063
    if (get_prop_check_argc(retval, argc, &desc))
1064
        return TRUE;
1065
1066
    /* 
1067
     *   make copies of the string references, so we can put them back on the
1068
     *   stack as gc protection while we're working 
1069
     */
1070
    arg1 = *G_stk->get(0);
1071
    arg2 = *G_stk->get(1);
1072
1073
    /* retrieve the search and replacement substrings */
1074
    substr = CVmBif::pop_str_val(vmg0_);
1075
    rplstr = CVmBif::pop_str_val(vmg0_);
1076
1077
    /* note the string lengths */
1078
    sublen = vmb_get_len(substr);
1079
    rpllen = vmb_get_len(rplstr);
1080
1081
    /* get the flags */
1082
    flags = CVmBif::pop_int_val(vmg0_);
1083
1084
    /* if there's a starting index, retrieve it */
1085
    start_idx = (orig_argc >= 4 ? CVmBif::pop_int_val(vmg0_) - 1 : 0);
1086
1087
    /* put the string references back on the stack for gc protection */
1088
    G_stk->push(&arg1);
1089
    G_stk->push(&arg2);
1090
1091
    /* start at the beginning of the string to search */
1092
    rem = new_len = vmb_get_len(str);
1093
    p.set((char *)str + 2);
1094
1095
    /* skip ahead to the starting index */
1096
    for ( ; start_idx > 0 && rem >= sublen ; --start_idx, p.inc(&rem)) ;
1097
1098
    /* 
1099
     *   note the starting index for replacements - we don't want to replace
1100
     *   anything before this point 
1101
     */
1102
    rpl_start = p.getptr();
1103
1104
    /* 
1105
     *   Scan for instances of the substring, so we can figure out how big
1106
     *   the result string will be.  Don't actually do any replacements yet;
1107
     *   we'll scan again once we know how the result size.  
1108
     */
1109
    for (found = FALSE ; rem >= sublen ; )
1110
    {
1111
        /* if this is a match for the substring, note it */
1112
        if (memcmp(p.getptr(), substr + VMB_LEN, vmb_get_len(substr)) == 0)
1113
        {
1114
            /* note the find */
1115
            found = TRUE;
1116
1117
            /* it's a match - adjust the result length for the replacement */
1118
            new_len += rpllen - sublen;
1119
1120
            /* if we're replacing one instance only, look no further */
1121
            if ((flags & GETP_RPL_ALL) == 0)
1122
                break;
1123
1124
            /* skip the entire substring in the source */
1125
            p.set(p.getptr() + sublen);
1126
            rem -= sublen;
1127
        }
1128
        else
1129
        {
1130
            /* skip one character */
1131
            p.inc(&rem);
1132
        }
1133
    }
1134
1135
    /* 
1136
     *   if we found no instances of the search substring, the result is
1137
     *   simply the source string; otherwise, we must create a new string
1138
     *   with the substitution(s) 
1139
     */
1140
    if (found)
1141
    {
1142
        utf8_ptr dst;
1143
1144
        /* allocate the new string */
1145
        retval->set_obj(create(vmg_ FALSE, new_len));
1146
1147
        /* get a pointer to the buffer */
1148
        dst.set(((CVmObjString *)vm_objp(vmg_ retval->val.obj))
1149
                ->cons_get_buf());
1150
1151
        /* scan the string for replacements */
1152
        for (p.set((char *)str + 2), rem = vmb_get_len(str) ;
1153
             rem >= sublen ; )
1154
        {
1155
            /* 
1156
             *   If this is a match for the substring, and we've reached the
1157
             *   starting point for replacements, replace the substring. 
1158
             */
1159
            if (p.getptr() >= rpl_start
1160
                && memcmp(p.getptr(), substr + VMB_LEN, sublen) == 0)
1161
            {
1162
                /* it's a match - copy the replacement into the result */
1163
                memcpy(dst.getptr(), rplstr + VMB_LEN, rpllen);
1164
1165
                /* move past the replacement in the result */
1166
                dst.set(dst.getptr() + rpllen);
1167
1168
                /* move past the search substring in the source */
1169
                p.set(p.getptr() + sublen);
1170
                rem -= sublen;
1171
1172
                /* if we're replacing one instance only, look no further */
1173
                if ((flags & GETP_RPL_ALL) == 0)
1174
                    break;
1175
            }
1176
            else
1177
            {
1178
                /* copy the current character to the result */
1179
                dst.setch(p.getch());
1180
1181
                /* skip the current character of input */
1182
                p.inc(&rem);
1183
            }
1184
        }
1185
1186
        /* copy the remaining source into the result */
1187
        if (rem != 0)
1188
            memcpy(dst.getptr(), p.getptr(), rem);
1189
    }
1190
    else
1191
    {
1192
        /* we didn't find it - the result is simply the original string */
1193
        *retval = *self_val;
1194
    }
1195
1196
    /* discard the gc protection */
1197
    G_stk->discard(2);
1198
1199
    /* handled */
1200
    return TRUE;
1201
}
1202
1203
/* ------------------------------------------------------------------------ */
1204
/*
1205
 *   property evaluator - convert to unicode
1206
 */
1207
int CVmObjString::getp_to_uni(VMG_ vm_val_t *retval,
1208
                              const vm_val_t *self_val,
1209
                              const char *str, uint *in_argc)
1210
{
1211
    uint argc = (in_argc != 0 ? *in_argc : 0);
1212
    size_t bytelen;
1213
    ulong idx = 0;
1214
    utf8_ptr p;
1215
    static CVmNativeCodeDesc desc(0, 1);
1216
1217
    /* check arguments */
1218
    if (get_prop_check_argc(retval, in_argc, &desc))
1219
        return TRUE;
1220
1221
    /* retrieve the index argument if present */
1222
    if (argc >= 1)
1223
        idx = CVmBif::pop_long_val(vmg0_);
1224
1225
    /* push a self-reference as GC protection */
1226
    G_stk->push(self_val);
1227
1228
    /* get and skip the string's length prefix */
1229
    bytelen = vmb_get_len(str);
1230
    str += VMB_LEN;
1231
1232
    /* set up a utf8 pointer to the string */
1233
    p.set((char *)str);
1234
1235
    /* check for an index argument */
1236
    if (argc >= 1)
1237
    {
1238
        /* skip through the string until we get to the desired index */
1239
        for ( ; idx > 1 && bytelen != 0 ; --idx, p.inc(&bytelen)) ;
1240
1241
        /* check to see if we have a character available */
1242
        if (idx == 1 && bytelen != 0)
1243
        {
1244
            /* the index is valid - return the character here */
1245
            retval->set_int((long)p.getch());
1246
        }
1247
        else
1248
        {
1249
            /* 
1250
             *   the index is past the end of the string or is less than 1
1251
             *   - return nil to indicate that there's no character here 
1252
             */
1253
            retval->set_nil();
1254
        }
1255
    }
1256
    else
1257
    {
1258
        size_t charlen;
1259
        vm_obj_id_t lst_obj;
1260
        CVmObjList *lst;
1261
        size_t i;
1262
1263
        /* 
1264
         *   There's no index argument - they want a list of all of the
1265
         *   code points in the string.  First, get the number of
1266
         *   characters in the string.  
1267
         */
1268
        charlen = p.len(bytelen);
1269
1270
        /* create a list to hold the results */
1271
        lst_obj = CVmObjList::create(vmg_ FALSE, charlen);
1272
        lst = (CVmObjList *)vm_objp(vmg_ lst_obj);
1273
1274
        /* set the list's elements to the unicode characters values */
1275
        for (i = 0 ; i < charlen ; ++i, p.inc())
1276
        {
1277
            wchar_t ch;
1278
            vm_val_t ele_val;
1279
1280
            /* get this character */
1281
            ch = p.getch();
1282
1283
            /* set this list element */
1284
            ele_val.set_int((long)ch);
1285
            lst->cons_set_element(i, &ele_val);
1286
        }
1287
1288
        /* return the list object */
1289
        retval->set_obj(lst_obj);
1290
    }
1291
    
1292
    /* discard the GC protection */
1293
    G_stk->discard();
1294
1295
    /* handled */
1296
    return TRUE;
1297
}
1298
1299
/* ------------------------------------------------------------------------ */
1300
/*
1301
 *   property evaluator - htmlify
1302
 */
1303
1304
/* 
1305
 *   htmlify flags 
1306
 */
1307
1308
/* preserve spaces */
1309
#define VMSTR_HTMLIFY_KEEP_SPACES   0x0001
1310
1311
/* preserve newlines */
1312
#define VMSTR_HTMLIFY_KEEP_NEWLINES 0x0002
1313
1314
/* preserve tabs */
1315
#define VMSTR_HTMLIFY_KEEP_TABS     0x0004
1316
1317
/*
1318
 *   htmlify implementation 
1319
 */
1320
int CVmObjString::getp_htmlify(VMG_ vm_val_t *retval,
1321
                               const vm_val_t *self_val,
1322
                               const char *str, uint *in_argc)
1323
{
1324
    uint argc = (in_argc != 0 ? *in_argc : 0);
1325
    size_t bytelen;
1326
    utf8_ptr p;
1327
    utf8_ptr dstp;
1328
    size_t rem;
1329
    size_t extra;
1330
    long flags;
1331
    vm_obj_id_t result_obj;
1332
    int prv_was_sp;
1333
    static CVmNativeCodeDesc desc(0, 1);
1334
1335
    /* check arguments */
1336
    if (get_prop_check_argc(retval, in_argc, &desc))
1337
        return TRUE;
1338
1339
    /* if they specified flags, pop them */
1340
    if (argc >= 1)
1341
    {
1342
        /* retrieve the flags */
1343
        flags = CVmBif::pop_long_val(vmg0_);
1344
    }
1345
    else
1346
    {
1347
        /* no flags */
1348
        flags = 0;
1349
    }
1350
1351
    /* push a self-reference as GC protection */
1352
    G_stk->push(self_val);
1353
1354
    /* get and skip the string's length prefix */
1355
    bytelen = vmb_get_len(str);
1356
    str += VMB_LEN;
1357
1358
    /* 
1359
     *   scan the string to determine how much space we'll have to add to
1360
     *   generate the htmlified version 
1361
     */
1362
    for (prv_was_sp = FALSE, extra = 0, p.set((char *)str), rem = bytelen ;
1363
         rem != 0 ; p.inc(&rem))
1364
    {
1365
        int this_is_sp;
1366
1367
        /* presume it's not a space */
1368
        this_is_sp = FALSE;
1369
        
1370
        /* check what we have */
1371
        switch(p.getch())
1372
        {
1373
        case '&':
1374
            /* we must replace '&' with '&amp;' - this adds four bytes */
1375
            extra += 4;
1376
            break;
1377
            
1378
        case '<':
1379
            /* we must replace '<' with '&lt;' - this adds three bytes */
1380
            extra += 3;
1381
            break;
1382
1383
        case ' ':
1384
            /* 
1385
             *   If we're in preserve-spaces mode, and the previous space
1386
             *   was some kind of whitespace character, change this to
1387
             *   '&nbsp;' - this adds five bytes 
1388
             */
1389
            if (prv_was_sp && (flags & VMSTR_HTMLIFY_KEEP_SPACES) != 0)
1390
                extra += 5;
1391
1392
            /* note that this was a whitespace character */
1393
            this_is_sp = TRUE;
1394
            break;
1395
1396
        case '\t':
1397
            /* if we're in preserve-tabs mode, change this to '<tab>' */
1398
            if ((flags & VMSTR_HTMLIFY_KEEP_TABS) != 0)
1399
                extra += 4;
1400
1401
            /* note that this was a whitespace character */
1402
            this_is_sp = TRUE;
1403
            break;
1404
1405
        case '\n':
1406
        case 0x2028:
1407
            /* if we're in preserve-newlines mode, change this to '<br>' */
1408
            if ((flags & VMSTR_HTMLIFY_KEEP_NEWLINES) != 0)
1409
                extra += 3;
1410
1411
            /* note that this was a whitespace character */
1412
            this_is_sp = TRUE;
1413
            break;
1414
        }
1415
1416
        /* for next time, remember whether this is a space */
1417
        prv_was_sp = this_is_sp;
1418
    }
1419
1420
    /* allocate space for the new string */
1421
    result_obj = create(vmg_ FALSE, bytelen + extra);
1422
1423
    /* get a pointer to the result buffer */
1424
    dstp.set(((CVmObjString *)vm_objp(vmg_ result_obj))->cons_get_buf());
1425
1426
    /* translate the string and write the result */
1427
    for (prv_was_sp = FALSE, p.set((char *)str), rem = bytelen ;
1428
         rem != 0 ; p.inc(&rem))
1429
    {
1430
        wchar_t ch;
1431
        int this_is_sp;
1432
1433
        /* get this character */
1434
        ch = p.getch();
1435
1436
        /* presume it's not a space */
1437
        this_is_sp = FALSE;
1438
        
1439
        /* check what we have */
1440
        switch(ch)
1441
        {
1442
        case '&':
1443
            /* replace '&' with '&amp;' */
1444
            dstp.setch_str("&amp;");
1445
            break;
1446
1447
        case '<':
1448
            /* we must replace '<' with '&lt;' - this adds three bytes */
1449
            dstp.setch_str("&lt;");
1450
            break;
1451
1452
        case ' ':
1453
            /* note that this was a whitespace character */
1454
            this_is_sp = TRUE;
1455
1456
            /* 
1457
             *   ignore it if not in preserve-spaces mode, or if the
1458
             *   previous character wasn't whitespace of some kind 
1459
             */
1460
            if (!prv_was_sp || (flags & VMSTR_HTMLIFY_KEEP_SPACES) == 0)
1461
                goto do_default;
1462
1463
            /* add the nbsp */
1464
            dstp.setch_str("&nbsp;");
1465
            break;
1466
1467
        case '\t':
1468
            /* note that this was a whitespace character */
1469
            this_is_sp = TRUE;
1470
1471
            /* ignore if not in preserve-tabs mode */
1472
            if ((flags & VMSTR_HTMLIFY_KEEP_TABS) == 0)
1473
                goto do_default;
1474
1475
            /* add the <tab> */
1476
            dstp.setch_str("<tab>");
1477
            break;
1478
1479
        case '\n':
1480
        case 0x2028:
1481
            /* note that this was a whitespace character */
1482
            this_is_sp = TRUE;
1483
1484
            /* if we're not in preserve-newlines mode, ignore it */
1485
            if ((flags & VMSTR_HTMLIFY_KEEP_NEWLINES) == 0)
1486
                goto do_default;
1487
1488
            /* add the <br> */
1489
            dstp.setch_str("<br>");
1490
            break;
1491
1492
        default:
1493
        do_default:
1494
            /* copy this character unchanged */
1495
            dstp.setch(ch);
1496
            break;
1497
        }
1498
1499
        /* for next time, remember whether this is a space */
1500
        prv_was_sp = this_is_sp;
1501
    }
1502
1503
    /* return the new string */
1504
    retval->set_obj(result_obj);
1505
    
1506
    /* discard the GC protection */
1507
    G_stk->discard();
1508
1509
    /* handled */
1510
    return TRUE;
1511
}
1512
1513
/* ------------------------------------------------------------------------ */
1514
/*
1515
 *   property evaluator - startsWith 
1516
 */
1517
int CVmObjString::getp_starts_with(VMG_ vm_val_t *retval,
1518
                                   const vm_val_t *self_val,
1519
                                   const char *str, uint *argc)
1520
{
1521
    static CVmNativeCodeDesc desc(1);
1522
    const char *str2;
1523
    size_t len;
1524
    size_t len2;
1525
1526
    /* check arguments */
1527
    if (get_prop_check_argc(retval, argc, &desc))
1528
        return TRUE;
1529
1530
    /* retrieve the other string */
1531
    str2 = CVmBif::pop_str_val(vmg0_);
1532
1533
    /* get the lengths of the two strings */
1534
    len = vmb_get_len(str);
1535
    len2 = vmb_get_len(str2);
1536
1537
    /* move to the contents of each string */
1538
    str += VMB_LEN;
1539
    str2 += VMB_LEN;
1540
1541
    /* 
1542
     *   if the other string is no longer than our string, and the other
1543
     *   string matches our string exactly for the other string's entire
1544
     *   length, we start with the other string 
1545
     */
1546
    retval->set_logical(len2 <= len && memcmp(str, str2, len2) == 0);
1547
1548
    /* handled */
1549
    return TRUE;
1550
}
1551
1552
/*
1553
 *   property evaluator - endsWith 
1554
 */
1555
int CVmObjString::getp_ends_with(VMG_ vm_val_t *retval,
1556
                                 const vm_val_t *self_val,
1557
                                 const char *str, uint *argc)
1558
{
1559
    static CVmNativeCodeDesc desc(1);
1560
    const char *str2;
1561
    size_t len;
1562
    size_t len2;
1563
1564
    /* check arguments */
1565
    if (get_prop_check_argc(retval, argc, &desc))
1566
        return TRUE;
1567
1568
    /* retrieve the other string */
1569
    str2 = CVmBif::pop_str_val(vmg0_);
1570
1571
    /* get the lengths of the two strings */
1572
    len = vmb_get_len(str);
1573
    len2 = vmb_get_len(str2);
1574
1575
    /* move to the contents of each string */
1576
    str += VMB_LEN;
1577
    str2 += VMB_LEN;
1578
1579
    /* 
1580
     *   If the other string is no longer than our string, and the other
1581
     *   string matches our string at the end exactly for the other string's
1582
     *   entire length, we start with the other string.  Note we don't need
1583
     *   to worry about finding a valid character index in our string for
1584
     *   the ending offset, because all we care about is whether or not we
1585
     *   have an exact byte match between our suffix and the other string.  
1586
     */
1587
    retval->set_logical(len2 <= len
1588
                        && memcmp(str + len - len2, str2, len2) == 0);
1589
1590
    /* handled */
1591
    return TRUE;
1592
}
1593
1594
/* ------------------------------------------------------------------------ */
1595
/*
1596
 *   property evaluator - mapToByteArray 
1597
 */
1598
int CVmObjString::getp_to_byte_array(VMG_ vm_val_t *retval,
1599
                                     const vm_val_t *self_val,
1600
                                     const char *str, uint *argc)
1601
{
1602
    static CVmNativeCodeDesc desc(1);
1603
    size_t len;
1604
    CCharmapToLocal *mapper;
1605
    vm_val_t *arg;
1606
    size_t byte_len;
1607
    size_t src_bytes_used;
1608
    size_t out_idx;
1609
    CVmObjByteArray *arr;
1610
1611
    /* check arguments */
1612
    if (get_prop_check_argc(retval, argc, &desc))
1613
        return TRUE;
1614
1615
    /* retrieve the CharacterSet object and make sure it's valid */
1616
    arg = G_stk->get(0);
1617
    if (arg->typ != VM_OBJ || !CVmObjCharSet::is_charset(vmg_ arg->val.obj))
1618
        err_throw(VMERR_BAD_TYPE_BIF);
1619
1620
    /* get the to-local mapping from the character set */
1621
    mapper = ((CVmObjCharSet *)vm_objp(vmg_ arg->val.obj))
1622
             ->get_to_local(vmg0_);
1623
1624
    /* get my length and skip the length prefix */
1625
    len = vmb_get_len(str);
1626
    str += VMB_LEN;
1627
1628
    /* 
1629
     *   first, do a mapping with a null output buffer to determine how many
1630
     *   bytes we need for the mapping 
1631
     */
1632
    byte_len = mapper->map_utf8(0, 0, str, len, &src_bytes_used);
1633
1634
    /* allocate a new ByteArray with the required number of bytes */
1635
    retval->set_obj(CVmObjByteArray::create(vmg_ FALSE, byte_len));
1636
    arr = (CVmObjByteArray *)vm_objp(vmg_ retval->val.obj);
1637
1638
    /* convert it again, this time storing the bytes */
1639
    for (out_idx = 1 ; len != 0 ; )
1640
    {
1641
        char buf[128];
1642
1643
        /* convert a buffer-full */
1644
        byte_len = mapper->map_utf8(buf, sizeof(buf), str, len,
1645
                                    &src_bytes_used);
1646
1647
        /* store the bytes in the byte array */
1648
        arr->cons_copy_from_buf((unsigned char *)buf, out_idx, byte_len);
1649
1650
        /* advance past the output bytes we used */
1651
        out_idx += byte_len;
1652
1653
        /* advance past the source bytes we used */
1654
        str += src_bytes_used;
1655
        len -= src_bytes_used;
1656
    }
1657
1658
    /* discard arguments */
1659
    G_stk->discard();
1660
1661
    /* handled */
1662
    return TRUE;
1663
}
1664
1665
/* ------------------------------------------------------------------------ */
1666
/*
1667
 *   Constant-pool string object 
1668
 */
1669
1670
/*
1671
 *   create 
1672
 */
1673
vm_obj_id_t CVmObjStringConst::create(VMG_ const char *const_ptr)
1674
{
1675
    /* create our new ID */
1676
    vm_obj_id_t id = vm_new_id(vmg_ FALSE, FALSE, FALSE);
1677
1678
    /* create our string object, pointing directly to the constant pool */
1679
    new (vmg_ id) CVmObjStringConst(vmg_ const_ptr);
1680
1681
    /* return the new ID */
1682
    return id;
1683
}