cfad47cfa3/tads3/vmcset.cpp

4b825dc642cb6eb9a060e54bf8d69288fbee4904cfad47cfa334b206c65f22086bcc5d63e6f70944
1
/* 
2
 *   Copyright (c) 2001, 2002 Michael J. Roberts.  All Rights Reserved.
3
 *   
4
 *   Please see the accompanying license file, LICENSE.TXT, for information
5
 *   on using and copying this software.  
6
 */
7
/*
8
Name
9
  vmcset.cpp - T3 CharacterSet metaclass
10
Function
11
  
12
Notes
13
  
14
Modified
15
  06/06/01 MJRoberts  - Creation
16
*/
17
18
#include <stdlib.h>
19
#include "vmtype.h"
20
#include "vmobj.h"
21
#include "vmglob.h"
22
#include "vmcset.h"
23
#include "vmbif.h"
24
#include "vmfile.h"
25
#include "vmerrnum.h"
26
#include "vmerr.h"
27
#include "vmstack.h"
28
#include "vmmeta.h"
29
#include "vmrun.h"
30
#include "charmap.h"
31
#include "vmstr.h"
32
#include "vmpredef.h"
33
#include "vmrun.h"
34
#include "vmhost.h"
35
36
37
/* ------------------------------------------------------------------------ */
38
/*
39
 *   statics 
40
 */
41
42
/* metaclass registration object */
43
static CVmMetaclassCharSet metaclass_reg_obj;
44
CVmMetaclass *CVmObjCharSet::metaclass_reg_ = &metaclass_reg_obj;
45
46
/* function table */
47
int (CVmObjCharSet::
48
     *CVmObjCharSet::func_table_[])(VMG_ vm_obj_id_t self,
49
                                    vm_val_t *retval, uint *argc) =
50
{
51
    &CVmObjCharSet::getp_undef,
52
    &CVmObjCharSet::getp_get_name,
53
    &CVmObjCharSet::getp_is_known,
54
    &CVmObjCharSet::getp_is_mappable,
55
    &CVmObjCharSet::getp_is_rt_mappable
56
};
57
58
59
/* ------------------------------------------------------------------------ */
60
/*
61
 *   Create from stack 
62
 */
63
vm_obj_id_t CVmObjCharSet::create_from_stack(VMG_ const uchar **pc_ptr,
64
                                             uint argc)
65
{
66
    vm_obj_id_t id;
67
    vm_val_t *arg1;
68
    const char *charset_name;
69
70
    /* check our arguments */
71
    if (argc != 1)
72
        err_throw(VMERR_WRONG_NUM_OF_ARGS);
73
74
    /* get the name of the character set */
75
    arg1 = G_stk->get(0);
76
    charset_name = arg1->get_as_string(vmg0_);
77
    if (charset_name == 0)
78
        err_throw(VMERR_BAD_TYPE_BIF);
79
80
    /* create the character set object */
81
    id = vm_new_id(vmg_ FALSE, FALSE, FALSE);
82
    new (vmg_ id) CVmObjCharSet(vmg_ charset_name + VMB_LEN,
83
                                vmb_get_len(charset_name));
84
85
    /* discard arguments */
86
    G_stk->discard(argc);
87
88
    /* return the new object */
89
    return id;
90
}
91
92
/* ------------------------------------------------------------------------ */
93
/*
94
 *   Create with no contents 
95
 */
96
vm_obj_id_t CVmObjCharSet::create(VMG_ int in_root_set)
97
{
98
    vm_obj_id_t id = vm_new_id(vmg_ in_root_set, FALSE, FALSE);
99
    new (vmg_ id) CVmObjCharSet();
100
    return id;
101
}
102
103
/*
104
 *   Create with the given character set name
105
 */
106
vm_obj_id_t CVmObjCharSet::create(VMG_ int in_root_set,
107
                                  const char *charset_name,
108
                                  size_t charset_name_len)
109
{
110
    vm_obj_id_t id = vm_new_id(vmg_ in_root_set, FALSE, FALSE);
111
    new (vmg_ id) CVmObjCharSet(vmg_ charset_name, charset_name_len);
112
    return id;
113
}
114
115
/* ------------------------------------------------------------------------ */
116
/*
117
 *   Instantiate 
118
 */
119
CVmObjCharSet::CVmObjCharSet(VMG_ const char *charset_name,
120
                             size_t charset_name_len)
121
{
122
    /* allocate and initialize our extension */
123
    ext_ = 0;
124
    alloc_ext(vmg_ charset_name, charset_name_len);
125
}
126
127
/*
128
 *   Allocate and initialize our extension 
129
 */
130
void CVmObjCharSet::alloc_ext(VMG_ const char *charset_name,
131
                              size_t charset_name_len)
132
{
133
    size_t alloc_size;
134
    vmobj_charset_ext_t *extp;
135
    CResLoader *res_ldr;
136
    
137
    /* if we already have an extension, delete it */
138
    if (ext_ != 0)
139
        G_mem->get_var_heap()->free_mem(ext_);
140
141
    /* 
142
     *   compute the size we need - note that we use the one fixed byte of
143
     *   the structure's name element as the extra byte we need for null
144
     *   termination of the name 
145
     */
146
    alloc_size = sizeof(vmobj_charset_ext_t) + charset_name_len;
147
148
    /* allocate space for our extension structure */
149
    ext_ = (char *)G_mem->get_var_heap()->alloc_mem(alloc_size, this);
150
151
    /* cast the extension to our structure type */
152
    extp = (vmobj_charset_ext_t *)ext_;
153
154
    /* store the character set name and length, null-terminating the name */
155
    extp->charset_name_len = charset_name_len;
156
    memcpy(extp->charset_name, charset_name, charset_name_len);
157
    extp->charset_name[charset_name_len] = '\0';
158
159
    /* get the resource loader */
160
    res_ldr = G_host_ifc->get_cmap_res_loader();
161
162
    /* if we have a resource loader, load the mappings */
163
    if (res_ldr != 0)
164
    {
165
        /* load the unicode-to-local mapping */
166
        extp->to_local = CCharmapToLocal::load(res_ldr, extp->charset_name);
167
168
        /* load the local-to-unicode mapping */
169
        extp->to_uni = CCharmapToUni::load(res_ldr, extp->charset_name);
170
    }
171
}
172
173
/* ------------------------------------------------------------------------ */
174
/*
175
 *   Notify of deletion
176
 */
177
void CVmObjCharSet::notify_delete(VMG_ int /*in_root_set*/)
178
{
179
    /* release our mapper objects */
180
    if (ext_ != 0)
181
    {
182
        /* release the to-local character mapper */
183
        if (get_ext_ptr()->to_local != 0)
184
            get_ext_ptr()->to_local->release_ref();
185
186
        /* release the to-unicode character mapper */
187
        if (get_ext_ptr()->to_uni != 0)
188
            get_ext_ptr()->to_uni->release_ref();
189
190
        /* free our extension */
191
        G_mem->get_var_heap()->free_mem(ext_);
192
    }
193
}
194
195
/* ------------------------------------------------------------------------ */
196
/* 
197
 *   set a property 
198
 */
199
void CVmObjCharSet::set_prop(VMG_ class CVmUndo *,
200
                             vm_obj_id_t, vm_prop_id_t,
201
                             const vm_val_t *)
202
{
203
    err_throw(VMERR_INVALID_SETPROP);
204
}
205
206
/* ------------------------------------------------------------------------ */
207
/* 
208
 *   get a property 
209
 */
210
int CVmObjCharSet::get_prop(VMG_ vm_prop_id_t prop, vm_val_t *retval,
211
                            vm_obj_id_t self, vm_obj_id_t *source_obj,
212
                            uint *argc)
213
{
214
    uint func_idx;
215
216
    /* translate the property index to an index into our function table */
217
    func_idx = G_meta_table
218
               ->prop_to_vector_idx(metaclass_reg_->get_reg_idx(), prop);
219
    
220
    /* call the appropriate function */
221
    if ((this->*func_table_[func_idx])(vmg_ self, retval, argc))
222
    {
223
        *source_obj = metaclass_reg_->get_class_obj(vmg0_);
224
        return TRUE;
225
    }
226
    
227
    /* inherit default handling */
228
    return CVmObject::get_prop(vmg_ prop, retval, self, source_obj, argc);
229
}
230
231
/* ------------------------------------------------------------------------ */
232
/*
233
 *   load from an image file 
234
 */
235
void CVmObjCharSet::load_from_image(VMG_ vm_obj_id_t self,
236
                                    const char *ptr, size_t siz)
237
{
238
    /* initialize with the character set name from the image file */
239
    alloc_ext(vmg_ ptr + VMB_LEN, vmb_get_len(ptr));
240
}
241
242
/* ------------------------------------------------------------------------ */
243
/* 
244
 *   save to a file 
245
 */
246
void CVmObjCharSet::save_to_file(VMG_ class CVmFile *fp)
247
{
248
    /* write the name length */
249
    fp->write_int2(get_ext_ptr()->charset_name_len);
250
251
    /* write the bytes of the name */
252
    fp->write_bytes(get_ext_ptr()->charset_name,
253
                    get_ext_ptr()->charset_name_len);
254
}
255
256
/* 
257
 *   restore from a file 
258
 */
259
void CVmObjCharSet::restore_from_file(VMG_ vm_obj_id_t self,
260
                                      CVmFile *fp, CVmObjFixup *)
261
{
262
    char buf[128];
263
    size_t len;
264
    size_t read_len;
265
266
    /* read the length of the character set name */
267
    len = fp->read_uint2();
268
269
    /* limit the reading to the length of the buffer */
270
    read_len = len;
271
    if (read_len > sizeof(buf))
272
        read_len = sizeof(buf);
273
274
    /* read the name, up to the buffer length */
275
    fp->read_bytes(buf, read_len);
276
277
    /* skip any bytes we couldn't fit in the buffer */
278
    if (len > read_len)
279
        fp->set_pos(fp->get_pos() + len - read_len);
280
281
    /* initialize from the saved data */
282
    alloc_ext(vmg_ buf, read_len);
283
}
284
285
/* ------------------------------------------------------------------------ */
286
/*
287
 *   Compare for equality 
288
 */
289
int CVmObjCharSet::equals(VMG_ vm_obj_id_t self, const vm_val_t *val,
290
                          int /*depth*/) const
291
{
292
    CVmObjCharSet *other;
293
    const vmobj_charset_ext_t *ext;
294
    const vmobj_charset_ext_t *other_ext;
295
296
    /* if it's a self-reference, it's certainly equal */
297
    if (val->typ == VM_OBJ && val->val.obj == self)
298
        return TRUE;
299
300
    /* if it's not another character set, it's not equal */
301
    if (val->typ != VM_OBJ || !is_charset(vmg_ val->val.obj))
302
        return FALSE;
303
304
    /* we know it's another character set - cast it */
305
    other = (CVmObjCharSet *)vm_objp(vmg_ val->val.obj);
306
307
    /* get my extension and the other extension */
308
    ext = get_ext_ptr();
309
    other_ext = other->get_ext_ptr();
310
311
    /* it's equal if it has the same name (ignoring case) */
312
    return (ext->charset_name_len == other_ext->charset_name_len
313
            && memicmp(ext->charset_name, other_ext->charset_name,
314
                       ext->charset_name_len) == 0);
315
}
316
317
/*
318
 *   Calculate a hash value 
319
 */
320
uint CVmObjCharSet::calc_hash(VMG_ vm_obj_id_t self, int /*depth*/) const
321
{
322
    uint hash;
323
    size_t rem;
324
    const char *p;
325
326
    /* add up the bytes in the array */
327
    for (hash = 0, rem = get_ext_ptr()->charset_name_len,
328
         p = get_ext_ptr()->charset_name ;
329
         rem != 0 ;
330
         --rem, ++p)
331
    {
332
        /* add this character into the hash */
333
        hash += *p;
334
    }
335
336
    /* return the result */
337
    return hash;
338
}
339
340
/* ------------------------------------------------------------------------ */
341
/* 
342
 *   property evaluator - get the character set name
343
 */
344
int CVmObjCharSet::getp_get_name(VMG_ vm_obj_id_t self,
345
                                 vm_val_t *retval, uint *argc)
346
{
347
    static CVmNativeCodeDesc desc(0);
348
349
    /* check arguments */
350
    if (get_prop_check_argc(retval, argc, &desc))
351
        return TRUE;
352
353
    /* create a new string for the name */
354
    retval->set_obj(CVmObjString::create(vmg_ FALSE,
355
                                         get_ext_ptr()->charset_name,
356
                                         get_ext_ptr()->charset_name_len));
357
358
    /* handled */
359
    return TRUE;
360
}
361
362
/* 
363
 *   property evaluator - is known
364
 */
365
int CVmObjCharSet::getp_is_known(VMG_ vm_obj_id_t self,
366
                                 vm_val_t *retval, uint *argc)
367
{
368
    static CVmNativeCodeDesc desc(0);
369
    
370
    /* check arguments */
371
    if (get_prop_check_argc(retval, argc, &desc))
372
        return TRUE;
373
374
    /* 
375
     *   it's known if both of our character mappers are non-null; if either
376
     *   is null, the character set is not known on this platform 
377
     */
378
    retval->set_logical(get_ext_ptr()->to_local != 0
379
                        && get_ext_ptr()->to_uni != 0);
380
381
    /* handled */
382
    return TRUE;
383
}
384
385
/* 
386
 *   property evaluator - check a character or a string for mappability 
387
 */
388
int CVmObjCharSet::getp_is_mappable(VMG_ vm_obj_id_t self,
389
                                    vm_val_t *retval, uint *argc)
390
{
391
    static CVmNativeCodeDesc desc(1);
392
    vm_val_t arg;
393
    const char *str;
394
    CCharmapToLocal *to_local;
395
396
    /* check arguments */
397
    if (get_prop_check_argc(retval, argc, &desc))
398
        return TRUE;
399
400
    /* get the local mapping */
401
    to_local = get_to_local(vmg0_);
402
403
    /* get the argument and check what type we have */
404
    G_stk->pop(&arg);
405
    if ((str = arg.get_as_string(vmg0_)) != 0)
406
    {
407
        size_t len;
408
        utf8_ptr p;
409
410
        /* get the length and skip the length prefix */
411
        len = vmb_get_len(str);
412
        str += VMB_LEN;
413
414
        /* presume every character will be mappable */
415
        retval->set_true();
416
417
        /* check each character for mappability */
418
        for (p.set((char *)str) ; len != 0 ; p.inc(&len))
419
        {
420
            /* check to see if this character is mappable */
421
            if (!to_local->is_mappable(p.getch()))
422
            {
423
                /* 
424
                 *   The character isn't mappable - this is an
425
                 *   all-or-nothing check, so if one isn't mappable we
426
                 *   return false.  Set the nil return and stop looking.
427
                 */
428
                retval->set_nil();
429
                break;
430
            }
431
        }
432
    }
433
    else if (arg.typ == VM_INT)
434
    {
435
        /* 
436
         *   Check if the integer character value is mappable.  If it's out
437
         *   of the 16-bit unicode range (0..0xffff), it's not mappable;
438
         *   otherwise, ask the character mapper. 
439
         */
440
        if (arg.val.intval < 0 || arg.val.intval > 0xffff)
441
        {
442
            /* it's out of the valid unicode range, so it's not mappable */
443
            retval->set_nil();
444
        }
445
        else
446
        {
447
            /* ask the character mapper */
448
            retval->set_logical(to_local->is_mappable(
449
                (wchar_t)arg.val.intval));
450
        }
451
    }
452
453
    /* handled */
454
    return TRUE;
455
}
456
457
/* 
458
 *   property evaluator - check a character or a string to see if it has a
459
 *   round-trip mapping.  A round-trip mapping is one where the unicode
460
 *   characters can be mapped to the local character set, then back to
461
 *   unicode, yielding the exact original unicode string.  
462
 */
463
int CVmObjCharSet::getp_is_rt_mappable(VMG_ vm_obj_id_t self,
464
                                       vm_val_t *retval, uint *argc)
465
{
466
    static CVmNativeCodeDesc desc(1);
467
    vm_val_t arg;
468
    const char *str;
469
    CCharmapToLocal *to_local;
470
    CCharmapToUni *to_uni;
471
472
    /* check arguments */
473
    if (get_prop_check_argc(retval, argc, &desc))
474
        return TRUE;
475
476
    /* get the local and unicode mappings */
477
    to_local = get_to_local(vmg0_);
478
    to_uni = get_to_uni(vmg0_);
479
480
    /* get the argument and check what type we have */
481
    G_stk->pop(&arg);
482
    if ((str = arg.get_as_string(vmg0_)) != 0)
483
    {
484
        size_t len;
485
        utf8_ptr p;
486
487
        /* get the length and skip the length prefix */
488
        len = vmb_get_len(str);
489
        str += VMB_LEN;
490
491
        /* presume every character will be mappable */
492
        retval->set_true();
493
494
        /* check each character for mappability */
495
        for (p.set((char *)str) ; len != 0 ; p.inc(&len))
496
        {
497
            /* check for round-trip mappability */
498
            if (!is_rt_mappable(p.getch(), to_local, to_uni))
499
            {
500
                /* nope - return false */
501
                retval->set_nil();
502
                break;
503
            }
504
        }
505
    }
506
    else if (arg.typ == VM_INT)
507
    {
508
        /* check the integer character for mappability */
509
        if (arg.val.intval < 0 || arg.val.intval > 0xffff)
510
        {
511
            /* it's out of the valid unicode range, so it's not mappable */
512
            retval->set_nil();
513
        }
514
        else
515
        {
516
            /* ask the character mapper */
517
            retval->set_logical(is_rt_mappable(
518
                (wchar_t)arg.val.intval, to_local, to_uni));
519
        }
520
    }
521
522
    /* handled */
523
    return TRUE;
524
}
525
526
/*------------------------------------------------------------------------ */
527
/*
528
 *   Determine if a character has a round-trip mapping.  
529
 */
530
int CVmObjCharSet::is_rt_mappable(wchar_t c, CCharmapToLocal *to_local,
531
                                  CCharmapToUni *to_uni)
532
{
533
    char lclbuf[16];
534
    char unibuf[16];
535
    size_t lcllen;
536
    size_t unilen;
537
    char *p;
538
539
    /* if there's no local mapping, it's obviously not mappable */
540
    if (!to_local->is_mappable(c))
541
        return FALSE;
542
543
    /* 
544
     *   If there's an expansion in the mapping to the local set, then there
545
     *   can't be a round-trip mapping.  Expansions are inherently one-way
546
     *   because they produce multiple local characters for a single unicode
547
     *   character, and the reverse mapping has no way to group those
548
     *   multiple local characters back into a single unicode character.  
549
     */
550
    if (to_local->get_expansion(c, &lcllen) != 0)
551
        return FALSE;
552
553
    /* get the local mapping */
554
    lcllen = to_local->map_char(c, lclbuf, sizeof(lclbuf));
555
556
    /* map it back to unicode */
557
    p = unibuf;
558
    unilen = sizeof(unibuf);
559
    unilen = to_uni->map(&p, &unilen, lclbuf, lcllen);
560
561
    /* 
562
     *   if the unicode mapping is one character that exactly matches the
563
     *   original input character, then we have a valid round-trip mapping 
564
     */
565
    return (unilen == utf8_ptr::s_wchar_size(c)
566
            && utf8_ptr::s_getch(unibuf) == c);
567
}
568
569
/*------------------------------------------------------------------------ */
570
/*
571
 *   Get the unicode-to-local character set mapper 
572
 */
573
CCharmapToLocal *CVmObjCharSet::get_to_local(VMG0_) const
574
{
575
    /* if there's no mapper, throw an exception */
576
    if (get_ext_ptr()->to_local == 0)
577
    {
578
        /* throw an UnknownCharacterSetException */
579
        G_interpreter->throw_new_class(vmg_ G_predef->charset_unknown_exc,
580
                                       0, "unknown character set");
581
    }
582
583
    /* return the mapper */
584
    return get_ext_ptr()->to_local;
585
}
586
587
/*
588
 *   Get the local-to-unicode character set mapper 
589
 */
590
CCharmapToUni *CVmObjCharSet::get_to_uni(VMG0_) const
591
{
592
    /* if there's no mapper, throw an exception */
593
    if (get_ext_ptr()->to_uni == 0)
594
    {
595
        /* throw an UnknownCharacterSetException */
596
        G_interpreter->throw_new_class(vmg_ G_predef->charset_unknown_exc,
597
                                       0, "unknown character set");
598
    }
599
600
    /* return the mapper */
601
    return get_ext_ptr()->to_uni;
602
}