| | 1 | #ifdef RCSID |
| | 2 | static char RCSid[] = |
| | 3 | "$Header: d:/cvsroot/tads/tads3/utf8.cpp,v 1.2 1999/05/17 02:52:29 MJRoberts Exp $"; |
| | 4 | #endif |
| | 5 | |
| | 6 | /* |
| | 7 | * Copyright (c) 1998, 2002 Michael J. Roberts. All Rights Reserved. |
| | 8 | * |
| | 9 | * Please see the accompanying license file, LICENSE.TXT, for information |
| | 10 | * on using and copying this software. |
| | 11 | */ |
| | 12 | /* |
| | 13 | Name |
| | 14 | utf8.cpp - UTF-8 implementation |
| | 15 | Function |
| | 16 | |
| | 17 | Notes |
| | 18 | |
| | 19 | Modified |
| | 20 | 10/17/98 MJRoberts - Creation |
| | 21 | */ |
| | 22 | |
| | 23 | #include "utf8.h" |
| | 24 | |
| | 25 | /* ------------------------------------------------------------------------ */ |
| | 26 | /* |
| | 27 | * encode a string of wide characters into the buffer |
| | 28 | */ |
| | 29 | size_t utf8_ptr::setwchars(const wchar_t *src, size_t src_count, |
| | 30 | size_t bufsiz) |
| | 31 | { |
| | 32 | size_t outbytes; |
| | 33 | |
| | 34 | /* loop through the source and store the characters */ |
| | 35 | for (outbytes = 0 ; src_count > 0 ; --src_count, ++src) |
| | 36 | { |
| | 37 | size_t curbytes; |
| | 38 | |
| | 39 | /* figure out how many bytes we need for this character */ |
| | 40 | curbytes = s_wchar_size(*src); |
| | 41 | |
| | 42 | /* add it to the total output size */ |
| | 43 | outbytes += curbytes; |
| | 44 | |
| | 45 | /* if we have room, add it to the buffer */ |
| | 46 | if (bufsiz >= curbytes) |
| | 47 | { |
| | 48 | /* store it */ |
| | 49 | setch(*src); |
| | 50 | |
| | 51 | /* deduct this space from the remaining buffer size */ |
| | 52 | bufsiz -= outbytes; |
| | 53 | } |
| | 54 | else |
| | 55 | { |
| | 56 | /* |
| | 57 | * there's no room for this - make sure we don't store |
| | 58 | * anything more (since we might have room for a shorter |
| | 59 | * character later, but that would put a gap in the output |
| | 60 | * string - better to just truncate here) |
| | 61 | */ |
| | 62 | bufsiz = 0; |
| | 63 | } |
| | 64 | } |
| | 65 | |
| | 66 | /* return the total output size used (or needed) */ |
| | 67 | return outbytes; |
| | 68 | } |
| | 69 | |
| | 70 | /* ------------------------------------------------------------------------ */ |
| | 71 | /* |
| | 72 | * encode a null-terminated string of wide characters into the buffer |
| | 73 | */ |
| | 74 | size_t utf8_ptr::setwcharsz(const wchar_t *src, size_t bufsiz) |
| | 75 | { |
| | 76 | size_t outbytes; |
| | 77 | |
| | 78 | /* loop through the source and store the characters */ |
| | 79 | for (outbytes = 0 ; *src != 0 ; ++src) |
| | 80 | { |
| | 81 | size_t curbytes; |
| | 82 | |
| | 83 | /* figure out how many bytes we need for this character */ |
| | 84 | curbytes = s_wchar_size(*src); |
| | 85 | |
| | 86 | /* add it to the total output size */ |
| | 87 | outbytes += curbytes; |
| | 88 | |
| | 89 | /* if we have room, add it to the buffer */ |
| | 90 | if (bufsiz >= curbytes) |
| | 91 | { |
| | 92 | /* store it */ |
| | 93 | setch(*src); |
| | 94 | |
| | 95 | /* deduct this space from the remaining buffer size */ |
| | 96 | bufsiz -= outbytes; |
| | 97 | } |
| | 98 | else |
| | 99 | { |
| | 100 | /* |
| | 101 | * there's no room for this - make sure we don't store |
| | 102 | * anything more (since we might have room for a shorter |
| | 103 | * character later, but that would put a gap in the output |
| | 104 | * string - better to just truncate here) |
| | 105 | */ |
| | 106 | bufsiz = 0; |
| | 107 | } |
| | 108 | } |
| | 109 | |
| | 110 | /* |
| | 111 | * Add the null terminator, if there's room, but do not increment |
| | 112 | * our pointer - we want to leave our pointer pointing at the null |
| | 113 | * terminator. Include the null terminator's size (one byte) in the |
| | 114 | * result length (even if we don't have room to store it). |
| | 115 | */ |
| | 116 | ++outbytes; |
| | 117 | if (bufsiz > 0) |
| | 118 | *p_ = '\0'; |
| | 119 | |
| | 120 | /* return the total output size used (or needed) */ |
| | 121 | return outbytes; |
| | 122 | } |
| | 123 | |
| | 124 | /* ------------------------------------------------------------------------ */ |
| | 125 | /* |
| | 126 | * Compare this string to the given string |
| | 127 | */ |
| | 128 | int utf8_ptr::s_compare_to(const char *p1, size_t bytelen1, |
| | 129 | const char *p2, size_t bytelen2) |
| | 130 | { |
| | 131 | /* keep going until one or the other string runs out of bytes */ |
| | 132 | while (bytelen1 != 0 && bytelen2 != 0) |
| | 133 | { |
| | 134 | wchar_t c1, c2; |
| | 135 | size_t siz1, siz2; |
| | 136 | |
| | 137 | /* get the current character from each string */ |
| | 138 | c1 = s_getch(p1); |
| | 139 | c2 = s_getch(p2); |
| | 140 | |
| | 141 | /* compare them */ |
| | 142 | if (c1 > c2) |
| | 143 | return 1; |
| | 144 | else if (c1 < c2) |
| | 145 | return -1; |
| | 146 | |
| | 147 | /* get the size of each character */ |
| | 148 | siz1 = s_charsize(*p1); |
| | 149 | siz2 = s_charsize(*p2); |
| | 150 | |
| | 151 | /* decrement each counter by the byte size of this character */ |
| | 152 | bytelen1 -= siz1; |
| | 153 | bytelen2 -= siz2; |
| | 154 | |
| | 155 | /* advance to the next character in each string */ |
| | 156 | p1 += siz1; |
| | 157 | p2 += siz2; |
| | 158 | } |
| | 159 | |
| | 160 | /* |
| | 161 | * we didn't find any character differences, but one string is |
| | 162 | * longer than the other -- if they ran out at the same time, |
| | 163 | * they're identical; otherwise, the one that ran out first is the |
| | 164 | * lesser one |
| | 165 | */ |
| | 166 | if (bytelen2 != 0) |
| | 167 | { |
| | 168 | /* the first one ran out first, so the first one sorts earlier */ |
| | 169 | return -1; |
| | 170 | } |
| | 171 | else if (bytelen1 != 0) |
| | 172 | { |
| | 173 | /* the second one ran out first, so the first one sort later */ |
| | 174 | return 1; |
| | 175 | } |
| | 176 | else |
| | 177 | { |
| | 178 | /* they both ran out at the same time */ |
| | 179 | return 0; |
| | 180 | } |
| | 181 | } |
| | 182 | |