cfad47cfa3/t3compiler/tads3/tcsrc.cpp

4b825dc642cb6eb9a060e54bf8d69288fbee4904cfad47cfa334b206c65f22086bcc5d63e6f70944
1
#ifdef RCSID
2
static char RCSid[] =
3
"$Header: d:/cvsroot/tads/tads3/TCSRC.CPP,v 1.3 1999/07/11 00:46:55 MJRoberts Exp $";
4
#endif
5
6
/* 
7
 *   Copyright (c) 1999, 2002 Michael J. Roberts.  All Rights Reserved.
8
 *   
9
 *   Please see the accompanying license file, LICENSE.TXT, for information
10
 *   on using and copying this software.  
11
 */
12
/*
13
Name
14
  tcsrc.cpp - source file reader
15
Function
16
  
17
Notes
18
  
19
Modified
20
  04/13/99 MJRoberts  - Creation
21
*/
22
23
#include <string.h>
24
#include <stdlib.h>
25
26
#include "os.h"
27
#include "t3std.h"
28
#include "tcsrc.h"
29
#include "tcglob.h"
30
#include "charmap.h"
31
32
33
/* ------------------------------------------------------------------------ */
34
/*
35
 *   Deletion 
36
 */
37
CTcSrcFile::~CTcSrcFile()
38
{
39
    /* close my source file */
40
    if (fp_ != 0)
41
        osfcls(fp_);
42
43
    /* release my character mapper */
44
    if (mapper_ != 0)
45
        mapper_->release_ref();
46
}
47
48
49
#if 0
50
// we don't currently need this, but keep the source in case it
51
// becomes interesting later
52
//
53
/* ------------------------------------------------------------------------ */
54
/*
55
 *   Open a plain ASCII file, with no #charset marker. 
56
 */
57
CTcSrcFile *CTcSrcFile::open_plain(const char *filename)
58
{
59
    osfildef *fp;
60
    char buf[5];
61
    size_t siz;
62
63
    /* 
64
     *   open the file in binary mode, since we do all of the newline
65
     *   interpretation explicitly 
66
     */
67
    if ((fp = osfoprb(filename, OSFTTEXT)) == 0)
68
        return 0;
69
70
    /* read the first few bytes of the file */
71
    siz = osfrbc(fp, buf, sizeof(buf));
72
73
    /* check for a 3-byte UTF-8 marker */
74
    if (siz >= 3
75
        && (uchar)buf[0] == 0xEF
76
        && (uchar)buf[1] == 0xBB
77
        && (uchar)buf[2] == 0xBF)
78
    {
79
        /* 
80
         *   seek to the byte after the marker, so that our caller won't see
81
         *   the marker 
82
         */
83
        osfseek(fp, 3, OSFSK_SET);
84
85
        /* return a source file reader with a utf-8 mapper */
86
        return new CTcSrcFile(fp, new CCharmapToUniUTF8());
87
    }
88
89
    /* if we read at least two bytes, try auto-detecting UCS-2 */
90
    if (siz >= 2)
91
    {
92
        /* if the first bytes are 0xFF 0xFE, it's UCS-2 low-byte first */
93
        if ((unsigned char)buf[0] == 0xFF && (unsigned char)buf[1] == 0xFE)
94
        {
95
            /* seek to the byte after the marker */
96
            osfseek(fp, 2, OSFSK_SET);
97
98
            /* return a reader with a little-endian mapper */
99
            return new CTcSrcFile(fp, new CCharmapToUniUcs2Little());
100
        }
101
102
        /* if the first bytes are 0xFE 0xFF, it's UCS-2 high-byte first */
103
        if ((unsigned char)buf[0] == 0xFE && (unsigned char)buf[1] == 0xFF)
104
        {
105
            /* seek to the byte after the marker */
106
            osfseek(fp, 2, OSFSK_SET);
107
108
            /* return a reader with a little-endian mapper */
109
            return new CTcSrcFile(fp, new CCharmapToUniUcs2Big());
110
        }
111
    }
112
113
    /* 
114
     *   there are no Unicode markers, so our only remaining option is plain
115
     *   ASCII - return a source file object with a plain ASCII mapper 
116
     */
117
    return new CTcSrcFile(fp, new CCharmapToUniASCII());
118
}
119
#endif
120
121
/* ------------------------------------------------------------------------ */
122
/*
123
 *   Open a plain ASCII source file.  
124
 */
125
CTcSrcFile *CTcSrcFile::open_ascii(const char *filename)
126
{
127
    osfildef *fp;
128
129
    /* 
130
     *   open the file in binary mode, since we do all of the newline
131
     *   interpretation explicitly 
132
     */
133
    if ((fp = osfoprb(filename, OSFTTEXT)) == 0)
134
        return 0;
135
136
    /* return a source reader with a plain ASCII mapper */
137
    return new CTcSrcFile(fp, new CCharmapToUniASCII());
138
}
139
140
141
/* ------------------------------------------------------------------------ */
142
/*
143
 *   Open a source file 
144
 */
145
CTcSrcFile *CTcSrcFile::open_source(const char *filename,
146
                                    class CResLoader *res_loader,
147
                                    const char *default_charset,
148
                                    int *charset_error,
149
                                    int *default_charset_error)
150
{
151
    char buf[275];
152
    size_t siz;
153
    osfildef *fp;
154
    long startofs;
155
    CCharmapToUni *mapper;
156
157
    /* presume we won't find an invalid #charset directive */
158
    *charset_error = FALSE;
159
160
    /* presume we'll have no problem with the default character set */
161
    *default_charset_error = FALSE;
162
163
    /* 
164
     *   open the file in binary mode, so that we can scan the first few
165
     *   bytes to see if we can detect the character set from information
166
     *   at the beginning of the file 
167
     */
168
    fp = osfoprb(filename, OSFTTEXT);
169
170
    /* if we couldn't open the file, return failure */
171
    if (fp == 0)
172
        return 0;
173
174
    /* note the starting offset in the file */
175
    startofs = osfpos(fp);
176
177
    /* read the first few bytes of the file */
178
    siz = osfrbc(fp, buf, sizeof(buf));
179
180
    /* check for a 3-byte UTF-8 byte-order marker */
181
    if (siz >= 3  && (uchar)buf[0] == 0xEF && (uchar)buf[1] == 0xBB
182
        && (uchar)buf[2] == 0xBF)
183
    {
184
        char *p;
185
        size_t rem;
186
        uint skip;
187
188
        /* skip at least the three-byte marker sequence */
189
        skip = 3;
190
        
191
        /* 
192
         *   check for a #charset marker for utf-8 - this would be redundant,
193
         *   but we'll allow it 
194
         */
195
        p = buf + 3;
196
        rem = siz - 3;
197
        if (rem > 9 && memcmp(p, "#charset ", 9) == 0)
198
        {
199
            /* skip spaces */
200
            for (p += 9, rem -= 9 ; rem != 0 && (*p == ' ' || *p == '\t') ;
201
                 ++p, --rem);
202
203
            /* check for valid character set markers */
204
            if (rem >= 7 && memicmp(p, "\"utf-8\"", 7) == 0)
205
            {
206
                /* skip the whole sequence */
207
                skip = (p + 7) - buf;
208
            }
209
            else if (rem >= 6 && memicmp(p, "\"utf8\"", 6) == 0)
210
            {
211
                /* skip the whole sequence */
212
                skip = (p + 6) - buf;
213
            }
214
        }
215
216
        /* seek past the character set markers */
217
        osfseek(fp, startofs + skip, OSFSK_SET);
218
219
        /* return a new utf-8 decoder */
220
        return new CTcSrcFile(fp, new CCharmapToUniUTF8());
221
    }
222
223
    /* if we read at least two bytes, try auto-detecting unicode */
224
    if (siz >= 2)
225
    {
226
        CTcSrcFile *srcf;
227
        const char *const *cs_names;
228
        int bige;
229
230
        /* presume we won't find a byte-order marker */
231
        srcf = 0;
232
        
233
        /* if the first bytes are 0xFF 0xFE, it's UCS-2 low-byte first */
234
        if ((unsigned char)buf[0] == 0xFF && (unsigned char)buf[1] == 0xFE)
235
        {
236
            static const char *names[] = { "unicodel", "utf-16le", 0 };
237
238
            /* create a UCS-2 little-endian reader */
239
            srcf = new CTcSrcFile(fp, new CCharmapToUniUcs2Little());
240
            bige = FALSE;
241
            cs_names = names;
242
        }
243
244
        /* if the first bytes are 0xFE 0xFF, it's UCS-2 high-byte first */
245
        if ((unsigned char)buf[0] == 0xFE && (unsigned char)buf[1] == 0xFF)
246
        {
247
            static const char *names[] = { "unicodeb", "utf-16be", 0 };
248
249
            /* create a UCS-2 little-endian reader */
250
            srcf = new CTcSrcFile(fp, new CCharmapToUniUcs2Big());
251
            bige = TRUE;
252
            cs_names = names;
253
        }
254
255
        /* if we found the byte-order marker, we know the character set */
256
        if (srcf != 0)
257
        {
258
            uint skip;
259
260
            /* we at least want to skip the byte-order marker */
261
            skip = 2;
262
            
263
            /* check to see if we have a '#charset' directive */
264
            if (ucs_str_starts_with(buf + 2, siz - 2, "#charset ",
265
                                    bige, FALSE))
266
            {
267
                char *p;
268
                size_t rem;
269
                
270
                /* scan past following spaces */
271
                for (p = buf + 2 + 18, rem = siz - 2 - 18 ;
272
                     rem >= 2 && (ucs_char_eq(p, ' ', bige, FALSE)
273
                                  || ucs_char_eq(p, '\t', bige, FALSE)) ;
274
                     p += 2, rem -= 2) ;
275
276
                /* check for a '"' */
277
                if (rem >= 2 && ucs_char_eq(p, '"', bige, FALSE))
278
                {
279
                    const char *const *n;
280
281
                    /* skip the '"' */
282
                    p += 2;
283
                    rem -= 2;
284
                    
285
                    /* 
286
                     *   check for a match to any of the valid names for this
287
                     *   character set 
288
                     */
289
                    for (n = cs_names ; *n != 0 ; ++n)
290
                    {
291
                        /* if it's a match, stop scanning */
292
                        if (ucs_str_starts_with(p, rem, *n, bige, TRUE))
293
                        {
294
                            size_t l;
295
296
                            /* get the length of the name */
297
                            l = strlen(*n) * 2;
298
299
                            /* check for a close quote */
300
                            if (rem >= l + 2
301
                                && ucs_char_eq(p + l, '"', bige, FALSE))
302
                            {
303
                                /* skip the name and the quote */
304
                                p += l + 2;
305
                                rem -= l + 2;
306
307
                                /* skip the source text to this point */
308
                                skip = p - buf;
309
310
                                /* stop scanning */
311
                                break;
312
                            }
313
                        }
314
                    }
315
                }
316
            }
317
318
            /* seek just past the character set indicators */
319
            osfseek(fp, startofs + skip, OSFSK_SET);
320
321
            /* return the file */
322
            return srcf;
323
        }
324
    }
325
326
    /*
327
     *   It doesn't appear to use UCS-2 encoding (at least, the file
328
     *   doesn't start with a byte-order sensing sequence).  Check to see
329
     *   if the file starts with "#charset " in ASCII single-byte
330
     *   characters.  
331
     */
332
    if (siz >= 9 && memcmp(buf, "#charset ", 9) == 0)
333
    {
334
        char *p;
335
        size_t rem;
336
        
337
        /* skip the #charset string and any following spaces */
338
        for (p = buf + 9, rem = siz - 9 ;
339
             rem > 0 && (*p == ' ' || *p == '\t') ; ++p, --rem) ;
340
341
        /* make sure we're looking at a '"' */
342
        if (rem != 0 && *p == '"')
343
        {
344
            char *charset_name;
345
346
            /* skip the open quote */
347
            ++p;
348
            --rem;
349
            
350
            /* remember where the character set name starts */
351
            charset_name = p;
352
353
            /* 
354
             *   find the closing quote, which must occur before a CR or
355
             *   LF character 
356
             */
357
            for ( ; rem > 0 && *p != '"' && *p != 10 && *p != 13 ;
358
                 ++p, --rem) ;
359
360
            /* make sure we found a matching quote */
361
            if (rem != 0 && *p == '"')
362
            {
363
                /* seek just past the #charset string */
364
                osfseek(fp, startofs + (p - buf) + 1, OSFSK_SET);
365
366
                /* 
367
                 *   put a null terminator at the end of the character set
368
                 *   name 
369
                 */
370
                *p = '\0';
371
372
                /* create a mapper */
373
                mapper = CCharmapToUni::load(res_loader, charset_name);
374
375
                /* 
376
                 *   if that succeeded, return a reader for the mapper;
377
                 *   otherwise, simply proceed as though no #charset had
378
                 *   been present, so that we create a default mapper 
379
                 */
380
                if (mapper != 0)
381
                {
382
                    /* success - return a reader */
383
                    return new CTcSrcFile(fp, mapper);
384
                }
385
                else
386
                {
387
                    /* tell the caller the #charset was invalid */
388
                    *charset_error = TRUE;
389
                }
390
            }
391
        }
392
    }
393
394
    /* 
395
     *   we didn't find any sensing codes, so seek back to the start of
396
     *   the file 
397
     */
398
    osfseek(fp, startofs, OSFSK_SET);
399
400
    /*
401
     *   We couldn't identify the file's character set based on anything
402
     *   in the file, so create a mapper for the given default character
403
     *   set.  If there's not even a default character set defined, create
404
     *   a plain ASCII mapper.  
405
     */
406
    if (default_charset != 0)
407
        mapper = CCharmapToUni::load(res_loader, default_charset);
408
    else
409
        mapper = new CCharmapToUniASCII();
410
411
    /* check to see if we created a mapper */
412
    if (mapper != 0)
413
    {
414
        /* return a source file reader based on the mapper */
415
        return new CTcSrcFile(fp, mapper);
416
    }
417
    else
418
    {
419
        /* 
420
         *   we failed to create a mapper for the default character set -
421
         *   flag the problem 
422
         */
423
        *default_charset_error = TRUE;
424
425
        /* close the input file */
426
        osfcls(fp);
427
428
        /* return failure */
429
        return 0;
430
    }
431
}
432
433
/* ------------------------------------------------------------------------ */
434
/*
435
 *   Read a line of text from the file.  
436
 */
437
size_t CTcSrcFile::read_line(char *buf, size_t bufl)
438
{
439
    char *dst;
440
441
    /* start out writing to the start of the caller's buffer */
442
    dst = buf;
443
444
    /*
445
     *   Keep going until we run out of input file, fill up the buffer, or
446
     *   reach the end of a line 
447
     */
448
    for (;;)
449
    {
450
        char *src;
451
        
452
        /* read some more data if our buffer is empty */
453
        if (rem_ == 0)
454
        {
455
            /* load another buffer-full */
456
            rem_ = mapper_->read_file(fp_, buf_, sizeof(buf_), 0);
457
458
            /* 
459
             *   If we didn't read anything, we've reached the end of the
460
             *   file.  If we've already copied anything into the caller's
461
             *   buffer, null-terminate their buffer and return success;
462
             *   otherwise, return failure, since the caller has already
463
             *   read everything available from the file.  
464
             */
465
            if (rem_ == 0)
466
            {
467
                /* 
468
                 *   Remember that we've reached the end of the file.
469
                 *   We're about to return the last of the data, so the
470
                 *   caller will not need to call us again (although it's
471
                 *   legal if they do - we'll just return a zero length on
472
                 *   the next call).  
473
                 */
474
                at_eof_ = TRUE;
475
                
476
                /* check if we've copied anything to the caller's buffer */
477
                if (buf == dst)
478
                {
479
                    /* the caller's buffer is empty - return end of file */
480
                    return 0;
481
                }
482
                else
483
                {
484
                    /* null-terminate the caller's buffer */
485
                    *dst++ = '\0';
486
487
                    /* 
488
                     *   return the number of bytes copied, including the null
489
                     *   terminator 
490
                     */
491
                    return (dst - buf);
492
                }
493
            }
494
495
            /* start over at the beginning of the buffer */
496
            p_ = buf_;
497
        }
498
499
        /*
500
         *   Scan the input buffer one character (not byte) at a time.
501
         *   Keep track of how much many bytes we've skipped.  Stop when
502
         *   we reach a CR or LF character, or when skipping another
503
         *   character would exceed the remaining capacity of the caller's
504
         *   buffer, or when we run out of data in our input buffer.  
505
         */
506
        for (src = p_ ; rem_ > 0 ; )
507
        {
508
            size_t csiz;
509
            
510
            /* get the length of the current character */
511
            csiz = utf8_ptr::s_charsize(*src);
512
513
            /* 
514
             *   if this character plus a null terminator wouldn't fit in
515
             *   the output buffer, stop scanning 
516
             */
517
            if (csiz >= bufl)
518
            {
519
                /* 
520
                 *   There's no more room in the caller's buffer.  Copy
521
                 *   what we've scanned so far to the output buffer and
522
                 *   null-terminate the buffer.  
523
                 */
524
                memcpy(dst, p_, src - p_);
525
526
                /* advance past the copied bytes and write the null byte */
527
                dst += (src - p_);
528
                *dst++ = '\0';
529
530
                /* advance the buffer read pointer over the copied bytes */
531
                p_ = src;
532
533
                /* return success - indicate the number of bytes copied */
534
                return (dst - buf);
535
            }
536
537
            /* 
538
             *   If it's a newline character of some kind, we're done with
539
             *   this line.  Note that we can just check the byte directly,
540
             *   since if it's a multi-byte character, we'll never mistake
541
             *   the first byte for a single-byte newline or carriage return
542
             *   character, since a UTF-8 lead byte always has the high bit
543
             *   set.
544
             *   
545
             *   Also treat the Unicode character 0x2028 (line separator) as
546
             *   a newline.  
547
             */
548
            if (*src == '\n' || *src == '\r'
549
                || utf8_ptr::s_getch(src) == 0x2028)
550
            {
551
                char nl;
552
                
553
                /* copy what we've scanned so far to the caller's buffer */
554
                memcpy(dst, p_, src - p_);
555
556
                /* advance past the copied bytes */
557
                dst += src - p_;
558
559
                /* 
560
                 *   add a newline to the caller's buffer -- always add a
561
                 *   '\n' newline, regardless of what kind of newline
562
                 *   sequence we found in the input; also add a null
563
                 *   terminator 
564
                 */
565
                *dst++ = '\n';
566
                *dst++ = '\0';
567
568
                /* remember which type of newline we found */
569
                nl = *src;
570
571
                /* advance past the newline */
572
                p_ = src + csiz;
573
                rem_ -= csiz;
574
575
                /* 
576
                 *   If the input buffer is empty, read more, so that we
577
                 *   can check the next character after the newline
578
                 *   character. 
579
                 */
580
                if (rem_ == 0)
581
                {
582
                    /* read more data */
583
                    rem_ = mapper_->read_file(fp_, buf_, sizeof(buf_), 0);
584
585
                    /* start over at the start of the buffer */
586
                    p_ = buf_;
587
                }
588
589
                /* 
590
                 *   Check for a paired newline character.  If we found a
591
                 *   CR, check for an LF; if we found an LF, check for a
592
                 *   CR.  This will ensure that we will recognize
593
                 *   essentially any newline character sequence for any
594
                 *   platform - this will accept CR, LF, CR-LF, or LF-CR
595
                 *   sequences. 
596
                 */
597
                if (rem_ != 0
598
                    && ((nl == '\n' && *p_ == '\r')
599
                        || (nl == '\r' && *p_ == '\n')))
600
                {
601
                    /* it's a paired newline - skip the second character */
602
                    ++p_;
603
                    --rem_;
604
                }
605
606
                /* we've finished this line - return success */
607
                return dst - buf;
608
            }
609
            
610
            /* skip this character in the input and proceed */
611
            src += csiz;
612
            rem_ -= csiz;
613
614
            /* consider this character consumed in the caller's buffer */
615
            bufl -= csiz;
616
        }
617
618
        /*
619
         *   We've exhausted the current input buffer, without filling the
620
         *   caller's buffer.  Copy what we've skipped so far into the
621
         *   caller's buffer.  
622
         */
623
        memcpy(dst, p_, src - p_);
624
625
        /* 
626
         *   Advance the output pointer past the data we just copied, then
627
         *   continue looping to read more data from the input file. 
628
         */
629
        dst += src - p_;
630
    }
631
}
632
633
/* ------------------------------------------------------------------------ */
634
/*
635
 *   Buffer reader source object 
636
 */
637
638
/*
639
 *   allocate 
640
 */
641
CTcSrcMemory::CTcSrcMemory(const char *buf, CCharmapToUni *mapper)
642
{
643
    size_t len;
644
    size_t alo_len;
645
    char *p;
646
647
    /* get the length of the null-terminated source string */
648
    len = strlen(buf);
649
650
    /* 
651
     *   Allocate a buffer for a UTF8-encoded copy of the buffer -
652
     *   allocate three bytes per byte of the original, since this is the
653
     *   worst case for expansion of the encoding.  Allocate one extra
654
     *   byte to ensure we have space for a null terminator.  
655
     */
656
    alo_len = len*3;
657
    buf_alo_ = (char *)t3malloc(alo_len + 1);
658
659
    /* map the buffer */
660
    p = buf_alo_;
661
    mapper->map(&p, &alo_len, buf, len);
662
663
    /* null-terminate the translated buffer */
664
    *p = '\0';
665
666
    /* start reading at the start of the translated buffer */
667
    buf_ = buf_alo_;
668
}
669
670
/* 
671
 *   delete 
672
 */
673
CTcSrcMemory::~CTcSrcMemory()
674
{
675
    /* free our buffer */
676
    t3free(buf_alo_);
677
}
678
679
/*
680
 *   read next line 
681
 */
682
size_t CTcSrcMemory::read_line(char *buf, size_t bufl)
683
{
684
    char *dst;
685
    const char *src;
686
687
    /* if there's nothing left in our buffer, return EOF */
688
    if (*buf_ == '\0')
689
        return 0;
690
691
    /* start out writing to the start of the caller's buffer */
692
    dst = buf;
693
694
    /*
695
     *   Scan the input buffer one character (not byte) at a time.  Keep
696
     *   track of how much many bytes we've skipped.  Stop when we reach a
697
     *   CR or LF character, or when skipping another character would
698
     *   exceed the remaining capacity of the caller's buffer, or when we
699
     *   run out of data in our input buffer.  
700
     */
701
    for (src = buf_ ; *src != '\0' ; )
702
    {
703
        size_t csiz;
704
705
        /* get the length of the current character */
706
        csiz = utf8_ptr::s_charsize(*src);
707
708
        /* 
709
         *   if this character plus a null terminator wouldn't fit in the
710
         *   output buffer, stop scanning 
711
         */
712
        if (csiz >= bufl)
713
        {
714
            /* 
715
             *   There's no more room in the caller's buffer.  Copy what
716
             *   we've scanned so far to the output buffer and
717
             *   null-terminate the buffer.  
718
             */
719
            memcpy(dst, buf_, src - buf_);
720
            
721
            /* advance past the copied bytes and write the null byte */
722
            dst += (src - buf_);
723
            *dst++ = '\0';
724
            
725
            /* advance the buffer read pointer over the copied bytes */
726
            buf_ = src;
727
            
728
            /* return success - indicate the number of bytes copied */
729
            return (dst - buf);
730
        }
731
732
        /* 
733
         *   If it's a newline character of some kind, we're done with this
734
         *   line.  Note that we can just check the byte directly, since if
735
         *   it's a multi-byte character, we'll never mistake the first byte
736
         *   for a single-byte newline or carriage return character, since a
737
         *   UTF-8 lead byte always has the high bit set.  Allow Unicode
738
         *   character 0x2028 (line separator) as a newline as well.  
739
         */
740
        if (*src == '\n' || *src == '\r' || utf8_ptr::s_getch(src) == 0x2028)
741
        {
742
            char nl;
743
            
744
            /* copy what we've scanned so far to the caller's buffer */
745
            memcpy(dst, buf_, src - buf_);
746
            
747
            /* advance past the copied bytes */
748
            dst += src - buf_;
749
            
750
            /* 
751
             *   add a newline to the caller's buffer -- always add a '\n'
752
             *   newline, regardless of what kind of newline sequence we
753
             *   found in the input; also add a null terminator 
754
             */
755
            *dst++ = '\n';
756
            *dst++ = '\0';
757
758
            /* remember which type of newline we found */
759
            nl = *src;
760
761
            /* advance past the newline */
762
            buf_ = src + csiz;
763
764
            /* 
765
             *   Check for a paired newline character.  If we found a CR,
766
             *   check for an LF; if we found an LF, check for a CR.  This
767
             *   will ensure that we will recognize essentially any
768
             *   newline character sequence for any platform - this will
769
             *   accept CR, LF, CR-LF, or LF-CR sequences.  
770
             */
771
            if ((nl == '\n' && *buf_ == '\r')
772
                || (nl == '\r' && *buf_ == '\n'))
773
            {
774
                /* it's a paired newline - skip the second character */
775
                ++buf_;
776
            }
777
            
778
            /* we've finished this line - return its length */
779
            return dst - buf;
780
        }
781
        
782
        /* skip this character in the input and proceed */
783
        src += csiz;
784
785
        /* consider this space consumed in the caller's buffer */
786
        bufl -= csiz;
787
    }
788
789
    /*
790
     *   We've exhausted the input buffer, without filling the caller's
791
     *   buffer.  Copy what we've skipped so far into the caller's buffer.
792
     */
793
    memcpy(dst, buf_, src - buf_);
794
    dst += src - buf_;
795
796
    /* null-terminate the result buffer */
797
    *dst++ = '\0';
798
799
    /* advance our input pointer to the new (EOF) position */
800
    buf_ = src;
801
802
    /* return the buffer length */
803
    return dst - buf;
804
}
805