cfad47cfa3/tads3/utf8.cpp

4b825dc642cb6eb9a060e54bf8d69288fbee4904cfad47cfa334b206c65f22086bcc5d63e6f70944
1
#ifdef RCSID
2
static char RCSid[] =
3
"$Header: d:/cvsroot/tads/tads3/utf8.cpp,v 1.2 1999/05/17 02:52:29 MJRoberts Exp $";
4
#endif
5
6
/* 
7
 *   Copyright (c) 1998, 2002 Michael J. Roberts.  All Rights Reserved.
8
 *   
9
 *   Please see the accompanying license file, LICENSE.TXT, for information
10
 *   on using and copying this software.  
11
 */
12
/*
13
Name
14
  utf8.cpp - UTF-8 implementation
15
Function
16
  
17
Notes
18
  
19
Modified
20
  10/17/98 MJRoberts  - Creation
21
*/
22
23
#include "utf8.h"
24
25
/* ------------------------------------------------------------------------ */
26
/*
27
 *   encode a string of wide characters into the buffer 
28
 */
29
size_t utf8_ptr::setwchars(const wchar_t *src, size_t src_count,
30
                           size_t bufsiz)
31
{
32
    size_t outbytes;
33
34
    /* loop through the source and store the characters */
35
    for (outbytes = 0 ; src_count > 0 ; --src_count, ++src)
36
    {
37
        size_t curbytes;
38
39
        /* figure out how many bytes we need for this character */
40
        curbytes = s_wchar_size(*src);
41
42
        /* add it to the total output size */
43
        outbytes += curbytes;
44
45
        /* if we have room, add it to the buffer */
46
        if (bufsiz >= curbytes)
47
        {
48
            /* store it */
49
            setch(*src);
50
            
51
            /* deduct this space from the remaining buffer size */
52
            bufsiz -= outbytes;
53
        }
54
        else
55
        {
56
            /* 
57
             *   there's no room for this - make sure we don't store
58
             *   anything more (since we might have room for a shorter
59
             *   character later, but that would put a gap in the output
60
             *   string - better to just truncate here) 
61
             */
62
            bufsiz = 0;
63
        }
64
    }
65
66
    /* return the total output size used (or needed) */
67
    return outbytes;
68
}
69
70
/* ------------------------------------------------------------------------ */
71
/*
72
 *   encode a null-terminated string of wide characters into the buffer 
73
 */
74
size_t utf8_ptr::setwcharsz(const wchar_t *src, size_t bufsiz)
75
{
76
    size_t outbytes;
77
78
    /* loop through the source and store the characters */
79
    for (outbytes = 0 ; *src != 0 ; ++src)
80
    {
81
        size_t curbytes;
82
83
        /* figure out how many bytes we need for this character */
84
        curbytes = s_wchar_size(*src);
85
86
        /* add it to the total output size */
87
        outbytes += curbytes;
88
89
        /* if we have room, add it to the buffer */
90
        if (bufsiz >= curbytes)
91
        {
92
            /* store it */
93
            setch(*src);
94
95
            /* deduct this space from the remaining buffer size */
96
            bufsiz -= outbytes;
97
        }
98
        else
99
        {
100
            /* 
101
             *   there's no room for this - make sure we don't store
102
             *   anything more (since we might have room for a shorter
103
             *   character later, but that would put a gap in the output
104
             *   string - better to just truncate here) 
105
             */
106
            bufsiz = 0;
107
        }
108
    }
109
110
    /* 
111
     *   Add the null terminator, if there's room, but do not increment
112
     *   our pointer - we want to leave our pointer pointing at the null
113
     *   terminator.  Include the null terminator's size (one byte) in the
114
     *   result length (even if we don't have room to store it).  
115
     */
116
    ++outbytes;
117
    if (bufsiz > 0)
118
        *p_ = '\0';
119
120
    /* return the total output size used (or needed) */
121
    return outbytes;
122
}
123
124
/* ------------------------------------------------------------------------ */
125
/*
126
 *   Compare this string to the given string 
127
 */
128
int utf8_ptr::s_compare_to(const char *p1, size_t bytelen1,
129
                           const char *p2, size_t bytelen2)
130
{
131
    /* keep going until one or the other string runs out of bytes */
132
    while (bytelen1 != 0 && bytelen2 != 0)
133
    {
134
        wchar_t c1, c2;
135
        size_t siz1, siz2;
136
        
137
        /* get the current character from each string */
138
        c1 = s_getch(p1);
139
        c2 = s_getch(p2);
140
141
        /* compare them */
142
        if (c1 > c2)
143
            return 1;
144
        else if (c1 < c2)
145
            return -1;
146
147
        /* get the size of each character */
148
        siz1 = s_charsize(*p1);
149
        siz2 = s_charsize(*p2);
150
151
        /* decrement each counter by the byte size of this character */
152
        bytelen1 -= siz1;
153
        bytelen2 -= siz2;
154
155
        /* advance to the next character in each string */
156
        p1 += siz1;
157
        p2 += siz2;
158
    }
159
160
    /* 
161
     *   we didn't find any character differences, but one string is
162
     *   longer than the other -- if they ran out at the same time,
163
     *   they're identical; otherwise, the one that ran out first is the
164
     *   lesser one 
165
     */
166
    if (bytelen2 != 0)
167
    {
168
        /* the first one ran out first, so the first one sorts earlier */
169
        return -1;
170
    }
171
    else if (bytelen1 != 0)
172
    {
173
        /* the second one ran out first, so the first one sort later */
174
        return 1;
175
    }
176
    else
177
    {
178
        /* they both ran out at the same time */
179
        return 0;
180
    }
181
}
182