Process Hacker
pcre_get.c
Go to the documentation of this file.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8  Written by Philip Hazel
9  Copyright (c) 1997-2008 University of Cambridge
10 
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14 
15  * Redistributions of source code must retain the above copyright notice,
16  this list of conditions and the following disclaimer.
17 
18  * Redistributions in binary form must reproduce the above copyright
19  notice, this list of conditions and the following disclaimer in the
20  documentation and/or other materials provided with the distribution.
21 
22  * Neither the name of the University of Cambridge nor the names of its
23  contributors may be used to endorse or promote products derived from
24  this software without specific prior written permission.
25 
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39 
40 
41 /* This module contains some convenience functions for extracting substrings
42 from the subject string after a regex match has succeeded. The original idea
43 for these functions came from Scott Wimer. */
44 
45 
46 #define HAVE_CONFIG_H
47 #ifdef HAVE_CONFIG_H
48 #include "config.h"
49 #endif
50 
51 #include "pcre_internal.h"
52 
53 
54 /*************************************************
55 * Find number for named string *
56 *************************************************/
57 
58 /* This function is used by the get_first_set() function below, as well
59 as being generally available. It assumes that names are unique.
60 
61 Arguments:
62  code the compiled regex
63  stringname the name whose number is required
64 
65 Returns: the number of the named parentheses, or a negative number
66  (PCRE_ERROR_NOSUBSTRING) if not found
67 */
68 
70 pcre_get_stringnumber(const pcre *code, const char *stringname)
71 {
72 int rc;
73 int entrysize;
74 int top, bot;
75 uschar *nametable;
76 
77 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
78  return rc;
79 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
80 
81 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
82  return rc;
83 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
84  return rc;
85 
86 bot = 0;
87 while (top > bot)
88  {
89  int mid = (top + bot) / 2;
90  uschar *entry = nametable + entrysize*mid;
91  int c = strcmp(stringname, (char *)(entry + 2));
92  if (c == 0) return (entry[0] << 8) + entry[1];
93  if (c > 0) bot = mid + 1; else top = mid;
94  }
95 
97 }
98 
99 
100 
101 /*************************************************
102 * Find (multiple) entries for named string *
103 *************************************************/
104 
105 /* This is used by the get_first_set() function below, as well as being
106 generally available. It is used when duplicated names are permitted.
107 
108 Arguments:
109  code the compiled regex
110  stringname the name whose entries required
111  firstptr where to put the pointer to the first entry
112  lastptr where to put the pointer to the last entry
113 
114 Returns: the length of each entry, or a negative number
115  (PCRE_ERROR_NOSUBSTRING) if not found
116 */
117 
119 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
120  char **firstptr, char **lastptr)
121 {
122 int rc;
123 int entrysize;
124 int top, bot;
125 uschar *nametable, *lastentry;
126 
127 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
128  return rc;
129 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
130 
131 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
132  return rc;
133 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
134  return rc;
135 
136 lastentry = nametable + entrysize * (top - 1);
137 bot = 0;
138 while (top > bot)
139  {
140  int mid = (top + bot) / 2;
141  uschar *entry = nametable + entrysize*mid;
142  int c = strcmp(stringname, (char *)(entry + 2));
143  if (c == 0)
144  {
145  uschar *first = entry;
146  uschar *last = entry;
147  while (first > nametable)
148  {
149  if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
150  first -= entrysize;
151  }
152  while (last < lastentry)
153  {
154  if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
155  last += entrysize;
156  }
157  *firstptr = (char *)first;
158  *lastptr = (char *)last;
159  return entrysize;
160  }
161  if (c > 0) bot = mid + 1; else top = mid;
162  }
163 
165 }
166 
167 
168 
169 /*************************************************
170 * Find first set of multiple named strings *
171 *************************************************/
172 
173 /* This function allows for duplicate names in the table of named substrings.
174 It returns the number of the first one that was set in a pattern match.
175 
176 Arguments:
177  code the compiled regex
178  stringname the name of the capturing substring
179  ovector the vector of matched substrings
180 
181 Returns: the number of the first that is set,
182  or the number of the last one if none are set,
183  or a negative number on error
184 */
185 
186 static int
187 get_first_set(const pcre *code, const char *stringname, int *ovector)
188 {
189 const real_pcre *re = (const real_pcre *)code;
190 int entrysize;
191 char *first, *last;
192 uschar *entry;
193 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
194  return pcre_get_stringnumber(code, stringname);
195 entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
196 if (entrysize <= 0) return entrysize;
197 for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
198  {
199  int n = (entry[0] << 8) + entry[1];
200  if (ovector[n*2] >= 0) return n;
201  }
202 return (first[0] << 8) + first[1];
203 }
204 
205 
206 
207 
208 /*************************************************
209 * Copy captured string to given buffer *
210 *************************************************/
211 
212 /* This function copies a single captured substring into a given buffer.
213 Note that we use memcpy() rather than strncpy() in case there are binary zeros
214 in the string.
215 
216 Arguments:
217  subject the subject string that was matched
218  ovector pointer to the offsets table
219  stringcount the number of substrings that were captured
220  (i.e. the yield of the pcre_exec call, unless
221  that was zero, in which case it should be 1/3
222  of the offset table size)
223  stringnumber the number of the required substring
224  buffer where to put the substring
225  size the size of the buffer
226 
227 Returns: if successful:
228  the length of the copied string, not including the zero
229  that is put on the end; can be zero
230  if not successful:
231  PCRE_ERROR_NOMEMORY (-6) buffer too small
232  PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
233 */
234 
236 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
237  int stringnumber, char *buffer, int size)
238 {
239 int yield;
240 if (stringnumber < 0 || stringnumber >= stringcount)
241  return PCRE_ERROR_NOSUBSTRING;
242 stringnumber *= 2;
243 yield = ovector[stringnumber+1] - ovector[stringnumber];
244 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
245 memcpy(buffer, subject + ovector[stringnumber], yield);
246 buffer[yield] = 0;
247 return yield;
248 }
249 
250 
251 
252 /*************************************************
253 * Copy named captured string to given buffer *
254 *************************************************/
255 
256 /* This function copies a single captured substring into a given buffer,
257 identifying it by name. If the regex permits duplicate names, the first
258 substring that is set is chosen.
259 
260 Arguments:
261  code the compiled regex
262  subject the subject string that was matched
263  ovector pointer to the offsets table
264  stringcount the number of substrings that were captured
265  (i.e. the yield of the pcre_exec call, unless
266  that was zero, in which case it should be 1/3
267  of the offset table size)
268  stringname the name of the required substring
269  buffer where to put the substring
270  size the size of the buffer
271 
272 Returns: if successful:
273  the length of the copied string, not including the zero
274  that is put on the end; can be zero
275  if not successful:
276  PCRE_ERROR_NOMEMORY (-6) buffer too small
277  PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
278 */
279 
281 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
282  int stringcount, const char *stringname, char *buffer, int size)
283 {
284 int n = get_first_set(code, stringname, ovector);
285 if (n <= 0) return n;
286 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
287 }
288 
289 
290 
291 /*************************************************
292 * Copy all captured strings to new store *
293 *************************************************/
294 
295 /* This function gets one chunk of store and builds a list of pointers and all
296 of the captured substrings in it. A NULL pointer is put on the end of the list.
297 
298 Arguments:
299  subject the subject string that was matched
300  ovector pointer to the offsets table
301  stringcount the number of substrings that were captured
302  (i.e. the yield of the pcre_exec call, unless
303  that was zero, in which case it should be 1/3
304  of the offset table size)
305  listptr set to point to the list of pointers
306 
307 Returns: if successful: 0
308  if not successful:
309  PCRE_ERROR_NOMEMORY (-6) failed to get store
310 */
311 
313 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
314  const char ***listptr)
315 {
316 int i;
317 int size = sizeof(char *);
318 int double_count = stringcount * 2;
319 char **stringlist;
320 char *p;
321 
322 for (i = 0; i < double_count; i += 2)
323  size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
324 
325 stringlist = (char **)(pcre_malloc)(size);
326 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
327 
328 *listptr = (const char **)stringlist;
329 p = (char *)(stringlist + stringcount + 1);
330 
331 for (i = 0; i < double_count; i += 2)
332  {
333  int len = ovector[i+1] - ovector[i];
334  memcpy(p, subject + ovector[i], len);
335  *stringlist++ = p;
336  p += len;
337  *p++ = 0;
338  }
339 
340 *stringlist = NULL;
341 return 0;
342 }
343 
344 
345 
346 /*************************************************
347 * Free store obtained by get_substring_list *
348 *************************************************/
349 
350 /* This function exists for the benefit of people calling PCRE from non-C
351 programs that can call its functions, but not free() or (pcre_free)() directly.
352 
353 Argument: the result of a previous pcre_get_substring_list()
354 Returns: nothing
355 */
356 
358 pcre_free_substring_list(const char **pointer)
359 {
360 (pcre_free)((void *)pointer);
361 }
362 
363 
364 
365 /*************************************************
366 * Copy captured string to new store *
367 *************************************************/
368 
369 /* This function copies a single captured substring into a piece of new
370 store
371 
372 Arguments:
373  subject the subject string that was matched
374  ovector pointer to the offsets table
375  stringcount the number of substrings that were captured
376  (i.e. the yield of the pcre_exec call, unless
377  that was zero, in which case it should be 1/3
378  of the offset table size)
379  stringnumber the number of the required substring
380  stringptr where to put a pointer to the substring
381 
382 Returns: if successful:
383  the length of the string, not including the zero that
384  is put on the end; can be zero
385  if not successful:
386  PCRE_ERROR_NOMEMORY (-6) failed to get store
387  PCRE_ERROR_NOSUBSTRING (-7) substring not present
388 */
389 
391 pcre_get_substring(const char *subject, int *ovector, int stringcount,
392  int stringnumber, const char **stringptr)
393 {
394 int yield;
395 char *substring;
396 if (stringnumber < 0 || stringnumber >= stringcount)
397  return PCRE_ERROR_NOSUBSTRING;
398 stringnumber *= 2;
399 yield = ovector[stringnumber+1] - ovector[stringnumber];
400 substring = (char *)(pcre_malloc)(yield + 1);
401 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
402 memcpy(substring, subject + ovector[stringnumber], yield);
403 substring[yield] = 0;
404 *stringptr = substring;
405 return yield;
406 }
407 
408 
409 
410 /*************************************************
411 * Copy named captured string to new store *
412 *************************************************/
413 
414 /* This function copies a single captured substring, identified by name, into
415 new store. If the regex permits duplicate names, the first substring that is
416 set is chosen.
417 
418 Arguments:
419  code the compiled regex
420  subject the subject string that was matched
421  ovector pointer to the offsets table
422  stringcount the number of substrings that were captured
423  (i.e. the yield of the pcre_exec call, unless
424  that was zero, in which case it should be 1/3
425  of the offset table size)
426  stringname the name of the required substring
427  stringptr where to put the pointer
428 
429 Returns: if successful:
430  the length of the copied string, not including the zero
431  that is put on the end; can be zero
432  if not successful:
433  PCRE_ERROR_NOMEMORY (-6) couldn't get memory
434  PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
435 */
436 
438 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
439  int stringcount, const char *stringname, const char **stringptr)
440 {
441 int n = get_first_set(code, stringname, ovector);
442 if (n <= 0) return n;
443 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
444 }
445 
446 
447 
448 
449 /*************************************************
450 * Free store obtained by get_substring *
451 *************************************************/
452 
453 /* This function exists for the benefit of people calling PCRE from non-C
454 programs that can call its functions, but not free() or (pcre_free)() directly.
455 
456 Argument: the result of a previous pcre_get_substring()
457 Returns: nothing
458 */
459 
461 pcre_free_substring(const char *pointer)
462 {
463 (pcre_free)((void *)pointer);
464 }
465 
466 /* End of pcre_get.c */