| | 1 | /* $Header: d:/cvsroot/tads/tads3/VMSTR.H,v 1.2 1999/05/17 02:52:28 MJRoberts Exp $ */ |
| | 2 | |
| | 3 | /* |
| | 4 | * Copyright (c) 1998, 2002 Michael J. Roberts. All Rights Reserved. |
| | 5 | * |
| | 6 | * Please see the accompanying license file, LICENSE.TXT, for information |
| | 7 | * on using and copying this software. |
| | 8 | */ |
| | 9 | /* |
| | 10 | Name |
| | 11 | vmstr.h - VM dynamic string implementation |
| | 12 | Function |
| | 13 | |
| | 14 | Notes |
| | 15 | |
| | 16 | Modified |
| | 17 | 10/28/98 MJRoberts - Creation |
| | 18 | */ |
| | 19 | |
| | 20 | #ifndef VMSTR_H |
| | 21 | #define VMSTR_H |
| | 22 | |
| | 23 | #include "vmglob.h" |
| | 24 | #include "vmobj.h" |
| | 25 | |
| | 26 | class CVmObjString: public CVmObject |
| | 27 | { |
| | 28 | friend class CVmMetaclassString; |
| | 29 | |
| | 30 | public: |
| | 31 | /* metaclass registration object */ |
| | 32 | static class CVmMetaclass *metaclass_reg_; |
| | 33 | class CVmMetaclass *get_metaclass_reg() const { return metaclass_reg_; } |
| | 34 | |
| | 35 | /* am I of the given metaclass? */ |
| | 36 | virtual int is_of_metaclass(class CVmMetaclass *meta) const |
| | 37 | { |
| | 38 | /* try my own metaclass and my base class */ |
| | 39 | return (meta == metaclass_reg_ |
| | 40 | || CVmObject::is_of_metaclass(meta)); |
| | 41 | } |
| | 42 | |
| | 43 | /* create from stack arguments */ |
| | 44 | static vm_obj_id_t create_from_stack(VMG_ const uchar **pc_ptr, |
| | 45 | uint argc); |
| | 46 | |
| | 47 | /* reserve constant data */ |
| | 48 | virtual void reserve_const_data(VMG_ class CVmConstMapper *mapper, |
| | 49 | vm_obj_id_t self); |
| | 50 | |
| | 51 | /* convert to constant data */ |
| | 52 | virtual void convert_to_const_data(VMG_ class CVmConstMapper *mapper, |
| | 53 | vm_obj_id_t self); |
| | 54 | |
| | 55 | /* get my datatype when converted to constant data */ |
| | 56 | virtual vm_datatype_t get_convert_to_const_data_type() const |
| | 57 | { return VM_SSTRING; } |
| | 58 | |
| | 59 | /* create a string with no initial contents */ |
| | 60 | static vm_obj_id_t create(VMG_ int in_root_set); |
| | 61 | |
| | 62 | /* create a string to hold a string of the given byte length */ |
| | 63 | static vm_obj_id_t create(VMG_ int in_root_set, size_t bytelen); |
| | 64 | |
| | 65 | /* create from a constant UTF-8 string */ |
| | 66 | static vm_obj_id_t create(VMG_ int in_root_set, |
| | 67 | const char *str, size_t bytelen); |
| | 68 | |
| | 69 | /* |
| | 70 | * For construction: get a pointer to the string's underlying |
| | 71 | * buffer. Returns a pointer into which the caller can write. The |
| | 72 | * buffer starts after the length prefix. |
| | 73 | */ |
| | 74 | char *cons_get_buf() const { return ext_ + 2; } |
| | 75 | |
| | 76 | /* |
| | 77 | * For construction: set my length. This can be used if the string |
| | 78 | * stored is smaller than the buffer allocated. This cannot be used |
| | 79 | * to expand the buffer, since this merely writes the length prefix |
| | 80 | * and does not reallocate the buffer. |
| | 81 | */ |
| | 82 | void cons_set_len(size_t len) { vmb_put_len(ext_, len); } |
| | 83 | |
| | 84 | /* notify of deletion */ |
| | 85 | void notify_delete(VMG_ int in_root_set); |
| | 86 | |
| | 87 | /* set a property */ |
| | 88 | void set_prop(VMG_ class CVmUndo *undo, |
| | 89 | vm_obj_id_t self, vm_prop_id_t prop, const vm_val_t *val); |
| | 90 | |
| | 91 | /* |
| | 92 | * call a static property - we don't have any of our own, so simply |
| | 93 | * "inherit" the base class handling |
| | 94 | */ |
| | 95 | static int call_stat_prop(VMG_ vm_val_t *result, |
| | 96 | const uchar **pc_ptr, uint *argc, |
| | 97 | vm_prop_id_t prop) |
| | 98 | { return CVmObject::call_stat_prop(vmg_ result, pc_ptr, argc, prop); } |
| | 99 | |
| | 100 | /* undo operations - strings are immutable and hence keep no undo */ |
| | 101 | void notify_new_savept() { } |
| | 102 | void apply_undo(VMG_ struct CVmUndoRecord *) { }; |
| | 103 | void mark_undo_ref(VMG_ struct CVmUndoRecord *) { } |
| | 104 | void remove_stale_undo_weak_ref(VMG_ struct CVmUndoRecord *) { } |
| | 105 | |
| | 106 | /* reference operations - strings reference no other objects */ |
| | 107 | void mark_refs(VMG_ uint state) { } |
| | 108 | void remove_stale_weak_refs(VMG0_) { } |
| | 109 | |
| | 110 | /* load from an image file */ |
| | 111 | void load_from_image(VMG_ vm_obj_id_t, const char *ptr, size_t) |
| | 112 | { ext_ = (char *)ptr; } |
| | 113 | |
| | 114 | /* rebuild for image file */ |
| | 115 | virtual ulong rebuild_image(VMG_ char *buf, ulong buflen); |
| | 116 | |
| | 117 | /* save to a file */ |
| | 118 | void save_to_file(VMG_ class CVmFile *fp); |
| | 119 | |
| | 120 | /* restore from a file */ |
| | 121 | void restore_from_file(VMG_ vm_obj_id_t self, |
| | 122 | class CVmFile *fp, class CVmObjFixup *fixups); |
| | 123 | |
| | 124 | /* |
| | 125 | * add a value to the string -- this creates a new string by |
| | 126 | * appending the value to this string |
| | 127 | */ |
| | 128 | void add_val(VMG_ vm_val_t *result, |
| | 129 | vm_obj_id_t self, const vm_val_t *val); |
| | 130 | |
| | 131 | /* |
| | 132 | * Get a string representation of the object. This is trivial for a |
| | 133 | * string object - we simply return our extension, which contains |
| | 134 | * the string in the required format. |
| | 135 | */ |
| | 136 | const char *cast_to_string(VMG_ vm_obj_id_t self, |
| | 137 | vm_val_t *new_str) const |
| | 138 | { |
| | 139 | /* we are the string object */ |
| | 140 | new_str->set_obj(self); |
| | 141 | |
| | 142 | /* return our extension directly */ |
| | 143 | return ext_; |
| | 144 | } |
| | 145 | |
| | 146 | /* get the underlying string */ |
| | 147 | const char *get_as_string(VMG0_) const { return ext_; } |
| | 148 | |
| | 149 | /* |
| | 150 | * Static routine to add a value to a string constant. Creates a |
| | 151 | * new string by appending the given value to the given string |
| | 152 | * constant. The string constant must be stored in portable format: |
| | 153 | * the first two bytes are the length prefix, in UINT2 format, |
| | 154 | * giving the length of the string's contents not counting the |
| | 155 | * prefix itself; immediately following the length prefix are the |
| | 156 | * bytes of the string's contents. |
| | 157 | */ |
| | 158 | static void add_to_str(VMG_ vm_val_t *result, |
| | 159 | vm_obj_id_t self, const char *strval, |
| | 160 | const vm_val_t *val); |
| | 161 | |
| | 162 | /* |
| | 163 | * Check a value for equality. We will match any constant string |
| | 164 | * that contains the same text as our string, and any other string |
| | 165 | * object with the same text. |
| | 166 | */ |
| | 167 | int equals(VMG_ vm_obj_id_t self, const vm_val_t *val, int depth) const; |
| | 168 | |
| | 169 | /* |
| | 170 | * Compare the string to another value. If the other value is a |
| | 171 | * constant string or string object, we'll perform a lexical |
| | 172 | * comparison of the string; other types are not comparable to |
| | 173 | * strings, so we'll throw an error for any other type. |
| | 174 | */ |
| | 175 | int compare_to(VMG_ vm_obj_id_t self, const vm_val_t *val) const; |
| | 176 | |
| | 177 | /* calculate a hash */ |
| | 178 | uint calc_hash(VMG_ vm_obj_id_t self, int depth) const; |
| | 179 | |
| | 180 | /* |
| | 181 | * Convert a value to a string. Throws an error if the value is not |
| | 182 | * convertible to a string. |
| | 183 | * |
| | 184 | * The result is stored in the given buffer, if possible, in |
| | 185 | * portable string format (with a portable UINT2 length prefix |
| | 186 | * followed by the string's bytes). If the buffer is not provided |
| | 187 | * or is not large enough to contain the result, we will allocate a |
| | 188 | * new string object and return its contents; since the string |
| | 189 | * object will never be referenced by anyone, it will be deleted in |
| | 190 | * the next garbage collection pass. In any case, we will return a |
| | 191 | * pointer to a buffer containing the result string. |
| | 192 | * |
| | 193 | * We'll fill in *new_obj with the new string object value, or nil |
| | 194 | * if we don't create a new string; this allows the caller to |
| | 195 | * protect the allocated object from garbage collection if |
| | 196 | * necessary. |
| | 197 | */ |
| | 198 | static const char *cvt_to_str(VMG_ vm_val_t *new_obj, |
| | 199 | char *result_buf, size_t result_buf_size, |
| | 200 | const vm_val_t *val, int radix); |
| | 201 | |
| | 202 | /* |
| | 203 | * Convert an integer to a string, storing the result in the given |
| | 204 | * buffer in portable string format (with length prefix). The radix |
| | 205 | * must be 8, 10, or 16. |
| | 206 | * |
| | 207 | * Decimal numbers are treated as signed, and a leading dash is |
| | 208 | * included if the number is negative. Octal and hex numbers are |
| | 209 | * treated as unsigned. |
| | 210 | * |
| | 211 | * For efficiency, we store the number at the end of the buffer |
| | 212 | * (this makes it easy to generate the number, since we need to |
| | 213 | * generate numerals in reverse order). We return a pointer to the |
| | 214 | * result, which may not start at the beginning of the buffer. |
| | 215 | */ |
| | 216 | static char *cvt_int_to_str(char *buf, size_t buflen, |
| | 217 | int32 inval, int radix); |
| | 218 | |
| | 219 | /* |
| | 220 | * Allocate a string buffer large enough to hold a given value. |
| | 221 | * We'll use the provided buffer if possible. |
| | 222 | * |
| | 223 | * If the provided buffer is null or is not large enough, we'll |
| | 224 | * allocate a new string object with a large enough buffer to hold |
| | 225 | * the value, and return the object's extension as the buffer. |
| | 226 | * |
| | 227 | * The buffer size and requested size are in bytes. |
| | 228 | * |
| | 229 | * If we allocate a new object, we'll set new_obj to the object |
| | 230 | * value; otherwise we'll set new_obj to nil. |
| | 231 | */ |
| | 232 | static char *alloc_str_buf(VMG_ vm_val_t *new_obj, |
| | 233 | char *buf, size_t buf_size, |
| | 234 | size_t required_size); |
| | 235 | |
| | 236 | /* |
| | 237 | * Constant string equality test routine. Compares the given |
| | 238 | * constant string (in portable format, with leading UINT2 length |
| | 239 | * prefix followed by the string's text in UTF8 format) to the other |
| | 240 | * value. Returns true if the values are lexically identical, false |
| | 241 | * if not or if the other value is not a string of some kind. |
| | 242 | */ |
| | 243 | static int const_equals(VMG_ const char *str, const vm_val_t *val); |
| | 244 | |
| | 245 | /* |
| | 246 | * Constant string hash value calculation |
| | 247 | */ |
| | 248 | static uint const_calc_hash(const char *str); |
| | 249 | |
| | 250 | /* |
| | 251 | * Constant string magnitude comparison routine. Compares the given |
| | 252 | * constant string (in portable format) to the other value. Returns |
| | 253 | * a positive value if the constant string is lexically greater than |
| | 254 | * the other value, a negative value if the constant string is |
| | 255 | * lexically less than the other value, or zero if the two values |
| | 256 | * are identical. Throws an error for any other type of value. |
| | 257 | */ |
| | 258 | static int const_compare(VMG_ const char *str, const vm_val_t *val); |
| | 259 | |
| | 260 | /* |
| | 261 | * Find a substring within a string. Returns a pointer to to the |
| | 262 | * start of the substring within the string, or null if the |
| | 263 | * substring isn't found. If 'idxp' is non-null, we'll fill in |
| | 264 | * *idxp with the character index, starting at zero for the first |
| | 265 | * character, of the substring within the string. |
| | 266 | * |
| | 267 | * Both strings are in standard constant string format, with UINT2 |
| | 268 | * length prefixes. |
| | 269 | */ |
| | 270 | static const char *find_substr(VMG_ const char *str, int start_idx, |
| | 271 | const char *substr, size_t *idxp); |
| | 272 | |
| | 273 | /* |
| | 274 | * Evaluate a property of a constant string value. Returns true if |
| | 275 | * we successfully evaluated the property, false if the property is |
| | 276 | * not one of the properties that the string class defines. |
| | 277 | */ |
| | 278 | static int const_get_prop(VMG_ vm_val_t *retval, const vm_val_t *self_val, |
| | 279 | const char *str, vm_prop_id_t prop, |
| | 280 | vm_obj_id_t *srcobj, uint *argc); |
| | 281 | |
| | 282 | /* evaluate a property */ |
| | 283 | virtual int get_prop(VMG_ vm_prop_id_t prop, vm_val_t *val, |
| | 284 | vm_obj_id_t self, vm_obj_id_t *source_obj, |
| | 285 | uint *argc); |
| | 286 | |
| | 287 | /* property evaluator - undefined property */ |
| | 288 | static int getp_undef(VMG_ vm_val_t *, const vm_val_t *, |
| | 289 | const char *, uint *) |
| | 290 | { return FALSE; } |
| | 291 | |
| | 292 | /* property evaluator - get the length */ |
| | 293 | static int getp_len(VMG_ vm_val_t *retval, const vm_val_t *self_val, |
| | 294 | const char *str, uint *argc); |
| | 295 | |
| | 296 | /* property evaluator - extract a substring */ |
| | 297 | static int getp_substr(VMG_ vm_val_t *retval, const vm_val_t *self_val, |
| | 298 | const char *str, uint *argc); |
| | 299 | |
| | 300 | /* property evaluator - toUpper */ |
| | 301 | static int getp_upper(VMG_ vm_val_t *retval, const vm_val_t *self_val, |
| | 302 | const char *str, uint *argc); |
| | 303 | |
| | 304 | /* property evaluator - toLower */ |
| | 305 | static int getp_lower(VMG_ vm_val_t *retval, const vm_val_t *self_val, |
| | 306 | const char *str, uint *argc); |
| | 307 | |
| | 308 | /* property evaluator - find substring */ |
| | 309 | static int getp_find(VMG_ vm_val_t *retval, const vm_val_t *self_val, |
| | 310 | const char *str, uint *argc); |
| | 311 | |
| | 312 | /* property evaluator - convert to unicode */ |
| | 313 | static int getp_to_uni(VMG_ vm_val_t *retval, const vm_val_t *self_val, |
| | 314 | const char *str, uint *argc); |
| | 315 | |
| | 316 | /* property evaluator - htmlify */ |
| | 317 | static int getp_htmlify(VMG_ vm_val_t *retval, const vm_val_t *self_val, |
| | 318 | const char *str, uint *argc); |
| | 319 | |
| | 320 | /* property evaluator - startsWith */ |
| | 321 | static int getp_starts_with(VMG_ vm_val_t *retval, |
| | 322 | const vm_val_t *self_val, |
| | 323 | const char *str, uint *argc); |
| | 324 | |
| | 325 | /* property evaluator - endsWith */ |
| | 326 | static int getp_ends_with(VMG_ vm_val_t *retval, |
| | 327 | const vm_val_t *self_val, |
| | 328 | const char *str, uint *argc); |
| | 329 | |
| | 330 | /* property evaluator - mapToByteArray */ |
| | 331 | static int getp_to_byte_array(VMG_ vm_val_t *retval, |
| | 332 | const vm_val_t *self_val, |
| | 333 | const char *str, uint *argc); |
| | 334 | |
| | 335 | /* property evaluator - replace substring */ |
| | 336 | static int getp_replace(VMG_ vm_val_t *retval, const vm_val_t *self_val, |
| | 337 | const char *str, uint *argc); |
| | 338 | |
| | 339 | protected: |
| | 340 | /* create a string with no initial contents */ |
| | 341 | CVmObjString() { ext_ = 0; } |
| | 342 | |
| | 343 | /* create with a given buffer size in bytes */ |
| | 344 | CVmObjString(VMG_ size_t bytelen); |
| | 345 | |
| | 346 | /* create from a constant UTF-8 string */ |
| | 347 | CVmObjString(VMG_ const char *str, size_t bytelen); |
| | 348 | |
| | 349 | /* |
| | 350 | * Set the length of the string. This can be used after a string is |
| | 351 | * constructed to set the size of the actual stored string. |
| | 352 | */ |
| | 353 | void set_length(size_t bytelen) { vmb_put_len(ext_, bytelen); } |
| | 354 | |
| | 355 | /* copy bytes into the string buffer */ |
| | 356 | void copy_into_str(const char *str, size_t bytelen) |
| | 357 | { memcpy(ext_ + VMB_LEN, str, bytelen); } |
| | 358 | |
| | 359 | /* copy bytes into the string buffer starting at the given byte offset */ |
| | 360 | void copy_into_str(size_t ofs, const char *str, size_t bytelen) |
| | 361 | { memcpy(ext_ + VMB_LEN + ofs, str, bytelen); } |
| | 362 | |
| | 363 | /* property evaluation function table */ |
| | 364 | static int (*func_table_[])(VMG_ vm_val_t *retval, |
| | 365 | const vm_val_t *self_val, |
| | 366 | const char *str, uint *argc); |
| | 367 | }; |
| | 368 | |
| | 369 | /* ------------------------------------------------------------------------ */ |
| | 370 | /* |
| | 371 | * A constant string is exactly like an ordinary string, except that our |
| | 372 | * contents come from the constant pool. We store a pointer directly to |
| | 373 | * our constant pool data rather than making a separate copy. The only |
| | 374 | * thing we have to do differently from an ordinary string is that we don't |
| | 375 | * delete our extension when we're deleted, since our extension is really |
| | 376 | * just a pointer into the constant pool. |
| | 377 | */ |
| | 378 | class CVmObjStringConst: public CVmObjString |
| | 379 | { |
| | 380 | public: |
| | 381 | /* notify of deletion */ |
| | 382 | void notify_delete(VMG_ int /*in_root_set*/) |
| | 383 | { |
| | 384 | /* |
| | 385 | * do nothing, since our extension is just a pointer into the |
| | 386 | * constant pool |
| | 387 | */ |
| | 388 | } |
| | 389 | |
| | 390 | /* create from constant pool data */ |
| | 391 | static vm_obj_id_t create(VMG_ const char *const_ptr); |
| | 392 | |
| | 393 | protected: |
| | 394 | /* construct from constant pool data */ |
| | 395 | CVmObjStringConst(VMG_ const char *const_ptr) |
| | 396 | { |
| | 397 | /* point our extension directly to the constant pool data */ |
| | 398 | ext_ = (char *)const_ptr; |
| | 399 | } |
| | 400 | }; |
| | 401 | |
| | 402 | |
| | 403 | /* ------------------------------------------------------------------------ */ |
| | 404 | /* |
| | 405 | * Registration table object |
| | 406 | */ |
| | 407 | class CVmMetaclassString: public CVmMetaclass |
| | 408 | { |
| | 409 | public: |
| | 410 | /* get the global name */ |
| | 411 | const char *get_meta_name() const { return "string/030005"; } |
| | 412 | |
| | 413 | /* create from image file */ |
| | 414 | void create_for_image_load(VMG_ vm_obj_id_t id) |
| | 415 | { |
| | 416 | new (vmg_ id) CVmObjString(); |
| | 417 | G_obj_table->set_obj_gc_characteristics(id, FALSE, FALSE); |
| | 418 | } |
| | 419 | |
| | 420 | /* create from restoring from saved state */ |
| | 421 | void create_for_restore(VMG_ vm_obj_id_t id) |
| | 422 | { |
| | 423 | new (vmg_ id) CVmObjString(); |
| | 424 | G_obj_table->set_obj_gc_characteristics(id, FALSE, FALSE); |
| | 425 | } |
| | 426 | |
| | 427 | /* create dynamically using stack arguments */ |
| | 428 | vm_obj_id_t create_from_stack(VMG_ const uchar **pc_ptr, uint argc) |
| | 429 | { return CVmObjString::create_from_stack(vmg_ pc_ptr, argc); } |
| | 430 | |
| | 431 | /* call a static property */ |
| | 432 | int call_stat_prop(VMG_ vm_val_t *result, |
| | 433 | const uchar **pc_ptr, uint *argc, |
| | 434 | vm_prop_id_t prop) |
| | 435 | { |
| | 436 | return CVmObjString::call_stat_prop(vmg_ result, pc_ptr, argc, prop); |
| | 437 | } |
| | 438 | }; |
| | 439 | |
| | 440 | #endif /* VMSTR_H */ |
| | 441 | |
| | 442 | /* |
| | 443 | * Register the class |
| | 444 | */ |
| | 445 | VM_REGISTER_METACLASS(CVmObjString) |
| | 446 | |