| | 1 | #charset "us-ascii" |
| | 2 | |
| | 3 | /* |
| | 4 | * Copyright (c) 2001, 2006 Michael J. Roberts |
| | 5 | * |
| | 6 | * This file is part of TADS 3. |
| | 7 | * |
| | 8 | * This header defines the CharacterSet intrinsic class. |
| | 9 | */ |
| | 10 | |
| | 11 | #ifndef _CHARSET_H_ |
| | 12 | #define _CHARSET_H_ |
| | 13 | |
| | 14 | /* include our base class definition */ |
| | 15 | #include "systype.h" |
| | 16 | |
| | 17 | /* |
| | 18 | * The CharacterSet intrinsic class provides information on character set |
| | 19 | * translations and can be used to translate between the Unicode character |
| | 20 | * set that the T3 VM uses internally for string values and the local |
| | 21 | * character set or sets used for display, keyboard input, and file I/O. |
| | 22 | */ |
| | 23 | intrinsic class CharacterSet 'character-set/030001': Object |
| | 24 | { |
| | 25 | /* |
| | 26 | * Constructor: |
| | 27 | * |
| | 28 | * new CharacterSet(charsetName) - creates an object to represent the |
| | 29 | * named local character set. Certain character set names are |
| | 30 | * pre-defined: |
| | 31 | * |
| | 32 | * us-ascii - the plain 7-bit ASCII character set |
| | 33 | *. utf-8 - Unicode UTF-8 (a multi-byte unicode encoding) |
| | 34 | *. utf-16le - little-endian 16-bit Unicode |
| | 35 | *. utf-16be - big-endian 16-bit Unicode |
| | 36 | * |
| | 37 | * In addition, any character set for which the VM has an external |
| | 38 | * mapping file can be used. Check your platform-specific T3 |
| | 39 | * installation notes for infomration on how character set mapping |
| | 40 | * files are implemented on your version of T3. |
| | 41 | * |
| | 42 | * A CharacterSet can be created for a non-existent mapping, but the |
| | 43 | * object cannot be used to perform any mappings; an |
| | 44 | * UnknownCharacterSetException will be thrown if any mapping is |
| | 45 | * attempted with a CharacterSet object that has non-existent local |
| | 46 | * mappings. You can determine if the local mapping exists with the |
| | 47 | * isMappingKnown method. |
| | 48 | */ |
| | 49 | |
| | 50 | /* |
| | 51 | * Get the name of the character set. This simply returns the name |
| | 52 | * that was given to construct the character set. |
| | 53 | */ |
| | 54 | getName(); |
| | 55 | |
| | 56 | /* |
| | 57 | * Determine if the mapping is known. This returns true if the |
| | 58 | * character set has a known local mapping, nil if not. Note that it |
| | 59 | * doesn't matter whether or not the character set is actually in use |
| | 60 | * on the local platform; all that matters is that a T3 mapping file |
| | 61 | * is available on this machine. |
| | 62 | */ |
| | 63 | isMappingKnown(); |
| | 64 | |
| | 65 | /* |
| | 66 | * Determine if a character or string of characters is mappable to this |
| | 67 | * character set. If the input is an integer, it represents the |
| | 68 | * Unicode character code for a single character; if the input is a |
| | 69 | * string, each character in the string is checked. This returns true |
| | 70 | * if every character given has a valid mapping in the local character |
| | 71 | * set, nil if not. Note that if a string is given, and even one |
| | 72 | * character is not mappable, this returns nil. |
| | 73 | */ |
| | 74 | isMappable(val); |
| | 75 | |
| | 76 | /* |
| | 77 | * Determine if a character or string of characters is "round-trip" |
| | 78 | * mappable to this character set. If the input is an integer, it |
| | 79 | * represents a Unicode character code to be tested; if the input is a |
| | 80 | * string, each character in the string is tested. Returns true if |
| | 81 | * every character given has a valid round-trip mapping, nil if not. |
| | 82 | * |
| | 83 | * A character has a round-trip mapping if it can be mapped to this |
| | 84 | * local character set and then back to Unicode to yield the original |
| | 85 | * character. If a character has a round-trip mapping, then in general |
| | 86 | * the character has an exact representation in the local character set |
| | 87 | * (as opposed to an approximation: if 'a-umlaut' maps to a simple |
| | 88 | * unaccented 'a', or to 'ae', then it has only an approximated |
| | 89 | * representation). |
| | 90 | */ |
| | 91 | isRoundTripMappable(val); |
| | 92 | } |
| | 93 | |
| | 94 | #endif /* _CHARSET_H_ */ |