cfad47cfa3/tads2/regex.h

4b825dc642cb6eb9a060e54bf8d69288fbee4904cfad47cfa334b206c65f22086bcc5d63e6f70944
1
/* $Header: d:/cvsroot/tads/TADS2/regex.h,v 1.3 1999/07/11 00:46:34 MJRoberts Exp $ */
2
3
/* 
4
 *   Copyright (c) 1998, 2002 Michael J. Roberts.  All Rights Reserved.
5
 *   
6
 *   Please see the accompanying license file, LICENSE.TXT, for information
7
 *   on using and copying this software.  
8
 */
9
/*
10
Name
11
  regex.h - regular expression parser for TADS
12
Function
13
  
14
Notes
15
  
16
Modified
17
  04/11/99 CNebel     - Fix warnings.
18
  10/07/98 MJRoberts  - Creation
19
*/
20
21
#ifndef REGEX_H
22
#define REGEX_H
23
24
#include <stdlib.h>
25
26
27
/* state ID */
28
typedef int re_state_id;
29
30
/* invalid state ID - used to mark null machines */
31
#define RE_STATE_INVALID   ((re_state_id)-1)
32
33
/* first valid state ID */
34
#define RE_STATE_FIRST_VALID  ((re_state_id)0)
35
36
37
/* ------------------------------------------------------------------------ */
38
/*
39
 *   Group register structure.  Each register keeps track of the starting
40
 *   and ending offset of the group's text.  
41
 */
42
typedef struct
43
{
44
    const char *start_ofs;
45
    const char *end_ofs;
46
} re_group_register;
47
48
/* number of group registers we keep */
49
#define RE_GROUP_REG_CNT  10
50
51
52
/* ------------------------------------------------------------------------ */
53
/* 
54
 *   Denormalized state transition tuple.  Each tuple represents the
55
 *   complete set of transitions out of a particular state.  A particular
56
 *   state can have one character transition, or two epsilon transitions.
57
 *   Note that we don't need to store the state ID in the tuple, because
58
 *   the state ID is the index of the tuple in an array of state tuples.  
59
 */
60
typedef struct
61
{
62
    /* the character we must match to transition to the target state */
63
    char ch;
64
65
    /* the target states */
66
    re_state_id next_state_1;
67
    re_state_id next_state_2;
68
69
    /* character range match table, if used */
70
    unsigned char *char_range;
71
72
    /* flags */
73
    unsigned char flags;
74
} re_tuple;
75
76
77
/*
78
 *   Tuple flags 
79
 */
80
81
/* this state is the start of a group - the 'ch' value is the group ID */
82
#define RE_STATE_GROUP_BEGIN  0x02
83
84
/* this state is the end of a group - 'ch' is the group ID */
85
#define RE_STATE_GROUP_END    0x04
86
87
88
/* ------------------------------------------------------------------------ */
89
/*
90
 *   Regular expression compilation context structure.  This tracks the
91
 *   state of the compilation and stores the resources associated with the
92
 *   compiled expression.  
93
 */
94
typedef struct
95
{
96
    /* error context */
97
    errcxdef *errctx;
98
99
    /* next available state ID */
100
    re_state_id next_state;
101
102
    /*
103
     *   The array of transition tuples.  We'll allocate this array and
104
     *   expand it as necessary.  
105
     */
106
    re_tuple *tuple_arr;
107
108
    /* number of transition tuples allocated in the array */
109
    int tuples_alloc;
110
111
    /* current group ID */
112
    int cur_group;
113
114
    /* group registers */
115
    re_group_register regs[RE_GROUP_REG_CNT];
116
117
    /* 
118
     *   Buffer for retaining a copy of the last string we scanned.  We
119
     *   retain our own copy of each string, and point the group registers
120
     *   into this copy rather than the caller's original string -- this
121
     *   ensures that the group registers remain valid even after the
122
     *   caller has deallocated the original string.  
123
     */
124
    char *strbuf;
125
126
    /* length of the string currently in the buffer */
127
    size_t curlen;
128
129
    /* size of the buffer allocated to strbuf */
130
    size_t strbufsiz;
131
} re_context;
132
133
134
/* ------------------------------------------------------------------------ */
135
/*
136
 *   Status codes 
137
 */
138
typedef enum
139
{
140
    /* success */
141
    RE_STATUS_SUCCESS = 0,
142
143
    /* compilation error - group nesting too deep */
144
    RE_STATUS_GROUP_NESTING_TOO_DEEP
145
} re_status_t;
146
147
148
/* ------------------------------------------------------------------------ */
149
/*
150
 *   Initialize the context.  The memory for the context structure itself
151
 *   must be allocated and maintained by the caller. 
152
 */
153
void re_init(re_context *ctx, errcxdef *errctx);
154
155
/*
156
 *   Delete the context - frees structures associated with the context.
157
 *   Does NOT free the memory used by the context structure itself.  
158
 */
159
void re_delete(re_context *ctx);
160
161
/*
162
 *   Compile an expression and search for a match within the given string.
163
 *   Returns the offset of the match, or -1 if no match was found.  
164
 */
165
int re_compile_and_search(re_context *ctx,
166
                          const char *pattern, size_t patlen,
167
                          const char *searchstr, size_t searchlen,
168
                          int *result_len);
169
170
/*
171
 *   Compile an expression and check for a match.  Returns the length of
172
 *   the match if we found a match, -1 if we found no match.  This is not
173
 *   a search function; we merely match the leading substring of the given
174
 *   string to the given pattern.  
175
 */
176
int re_compile_and_match(re_context *ctx,
177
                         const char *pattern, size_t patlen,
178
                         const char *searchstr, size_t searchlen);
179
180
#endif /* REGEX_H */
181