51 #define PSSTART start_pattern
52 #define PSEND end_pattern
62 #include "pcre_printint.src"
68 #define SETBIT(a,b) a[b/8] |= (1 << (b%8))
75 #define OFLOW_MAX (INT_MAX - 20)
94 #define COMPILE_WORK_SIZE (4096)
99 #define WORK_SIZE_CHECK (COMPILE_WORK_SIZE - 100)
112 static const short int escapes[] = {
157 static const short int escapes[] = {
158 0, 0, 0,
'.',
'<',
'(',
'+',
'|',
159 '&', 0, 0, 0, 0, 0, 0, 0,
160 0, 0,
'!',
'$',
'*',
')',
';',
'~',
161 '-',
'/', 0, 0, 0, 0, 0, 0,
162 0, 0,
'|',
',',
'%',
'_',
'>',
'?',
163 0, 0, 0, 0, 0, 0, 0, 0,
164 0,
'`',
':',
'#',
'@',
'\'',
'=',
'"',
166 -
ESC_h, 0, 0,
'{', 0, 0, 0, 0,
168 0,
ESC_r, 0,
'}', 0, 0, 0, 0,
170 0,-
ESC_z, 0, 0, 0,
'[', 0, 0,
171 0, 0, 0, 0, 0, 0, 0, 0,
172 0, 0, 0, 0, 0,
']',
'=',
'-',
174 -
ESC_H, 0, 0, 0, 0, 0, 0, 0,
178 0,-
ESC_Z, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 0, 0, 0
197 static const char verbnames[] =
220 static const int verbcount =
sizeof(verbs)/
sizeof(
verbitem);
229 static const char posix_names[] =
235 static const uschar posix_name_lengths[] = {
236 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
248 static const int posix_class_maps[] = {
270 static const uschar *substitutes[] = {
279 static const uschar *posix_substitutes[] = {
310 #define POSIX_SUBSIZE (sizeof(posix_substitutes)/sizeof(uschar *))
313 #define STRING(a) # a
314 #define XSTRING(s) STRING(s)
330 static const char error_texts[] =
332 "\\ at end of pattern\0"
333 "\\c at end of pattern\0"
334 "unrecognized character follows \\\0"
335 "numbers out of order in {} quantifier\0"
337 "number too big in {} quantifier\0"
338 "missing terminating ] for character class\0"
339 "invalid escape sequence in character class\0"
340 "range out of order in character class\0"
341 "nothing to repeat\0"
343 "operand of unlimited repeat could match the empty string\0"
344 "internal error: unexpected repeat\0"
345 "unrecognized character after (? or (?-\0"
346 "POSIX named classes are supported only within a class\0"
349 "reference to non-existent subpattern\0"
350 "erroffset passed as NULL\0"
351 "unknown option bit(s) set\0"
352 "missing ) after comment\0"
353 "parentheses nested too deeply\0"
355 "regular expression is too large\0"
356 "failed to get memory\0"
357 "unmatched parentheses\0"
358 "internal error: code overflow\0"
359 "unrecognized character after (?<\0"
361 "lookbehind assertion is not fixed length\0"
362 "malformed number or name after (?(\0"
363 "conditional group contains more than two branches\0"
364 "assertion expected after (?(\0"
365 "(?R or (?[+-]digits must be followed by )\0"
367 "unknown POSIX class name\0"
368 "POSIX collating elements are not supported\0"
369 "this version of PCRE is not compiled with PCRE_UTF8 support\0"
371 "character value in \\x{...} sequence is too large\0"
373 "invalid condition (?(0)\0"
374 "\\C not allowed in lookbehind assertion\0"
375 "PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0"
376 "number after (?C is > 255\0"
377 "closing ) for (?C expected\0"
379 "recursive call could loop indefinitely\0"
380 "unrecognized character after (?P\0"
381 "syntax error in subpattern name (missing terminator)\0"
382 "two named subpatterns have the same name\0"
383 "invalid UTF-8 string\0"
385 "support for \\P, \\p, and \\X has not been compiled\0"
386 "malformed \\P or \\p sequence\0"
387 "unknown property name after \\P or \\p\0"
391 "repeated subpattern is too long\0"
392 "octal value is greater than \\377 (not in UTF-8 mode)\0"
393 "internal error: overran compiling workspace\0"
394 "internal error: previously-checked referenced subpattern not found\0"
395 "DEFINE group contains more than one branch\0"
397 "repeating a DEFINE group is not allowed\0"
398 "inconsistent NEWLINE options\0"
399 "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
400 "a numbered reference must not be zero\0"
401 "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
403 "(*VERB) not recognized\0"
404 "number is too big\0"
405 "subpattern name expected\0"
406 "digit expected after (?+\0"
407 "] is an invalid data character in JavaScript compatibility mode\0"
409 "different names for subpatterns of the same number are not allowed\0"
410 "(*MARK) must have an argument\0"
411 "this version of PCRE is not compiled with PCRE_UCP support\0"
412 "\\c must be followed by an ASCII character\0"
436 static const unsigned char digitab[] =
438 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
439 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
440 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
441 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
442 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
443 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
444 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,
445 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00,
446 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00,
447 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
448 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
449 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
450 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00,
451 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
452 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
453 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
454 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
455 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
456 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
457 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
458 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
459 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
460 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
461 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
462 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
463 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
464 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
465 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
466 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
467 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
468 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
469 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
475 static const unsigned char digitab[] =
477 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
478 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
479 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
480 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
481 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
482 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
483 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
484 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
485 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
486 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
487 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
488 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
489 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
490 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
491 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
492 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
493 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00,
494 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
495 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
496 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
497 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
498 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
499 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
500 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
501 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00,
502 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
503 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
504 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
505 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
506 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
507 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,
508 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};
510 static const unsigned char ebcdic_chartab[] = {
511 0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00,
512 0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00,
513 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,
514 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
515 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,
516 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
517 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
518 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
519 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
520 0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80,
521 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
522 0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00,
523 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
524 0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80,
525 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
526 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
527 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12,
528 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
529 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
530 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
531 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12,
532 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
533 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
534 0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00,
535 0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12,
536 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
537 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
538 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
539 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12,
540 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00,
541 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,
542 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};
568 find_error_text(
int n)
570 const char *s = error_texts;
573 while (*s++ != 0) {};
574 if (*s == 0)
return "Error text not found (please report)";
605 check_escape(
const uschar **ptrptr,
int *errorcodeptr,
int bracount,
606 int options,
BOOL isclass)
609 const uschar *ptr = *ptrptr + 1;
617 if (c == 0) *errorcodeptr =
ERR1;
624 else if (c < CHAR_0 || c >
CHAR_z) {}
625 else if ((i = escapes[c -
CHAR_0]) != 0) c = i;
628 else if (c <
'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}
629 else if ((i = escapes[c - 0x48]) != 0) c = i;
637 BOOL braced, negated;
648 *errorcodeptr =
ERR37;
696 else negated =
FALSE;
699 while ((digitab[ptr[1]] & ctype_digit) != 0)
700 c = c * 10 + *(++ptr) -
CHAR_0;
704 *errorcodeptr =
ERR61;
710 *errorcodeptr =
ERR57;
716 *errorcodeptr =
ERR58;
724 *errorcodeptr =
ERR15;
727 c = bracount - (c - 1);
752 while ((digitab[ptr[1]] & ctype_digit) != 0)
753 c = c * 10 + *(++ptr) -
CHAR_0;
756 *errorcodeptr =
ERR61;
759 if (c < 10 || c <= bracount)
787 c = c * 8 + *(++ptr) -
CHAR_0;
788 if (!utf8 && c > 255) *errorcodeptr =
ERR51;
798 const uschar *pt = ptr + 2;
804 register int cc = *pt++;
805 if (c == 0 && cc ==
CHAR_0)
continue;
809 if (cc >=
CHAR_a) cc -= 32;
812 if (cc >=
CHAR_a && cc <= CHAR_z) cc += 64;
819 if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr =
ERR34;
831 while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)
836 if (cc >=
CHAR_a) cc -= 32;
839 if (cc <= CHAR_z) cc += 64;
854 *errorcodeptr =
ERR2;
860 *errorcodeptr =
ERR68;
863 if (c >=
CHAR_a && c <= CHAR_z) c -= 32;
866 if (c >=
CHAR_a && c <= CHAR_z) c += 64;
881 *errorcodeptr =
ERR3;
892 *errorcodeptr =
ERR37;
927 get_ucp(
const uschar **ptrptr,
BOOL *negptr,
int *dptr,
int *errorcodeptr)
930 const uschar *ptr = *ptrptr;
934 if (c == 0)
goto ERROR_RETURN;
948 for (i = 0; i < (int)
sizeof(name) - 1; i++)
951 if (c == 0)
goto ERROR_RETURN;
976 i = (bot + top) >> 1;
977 c = strcmp(name, _pcre_utt_names + _pcre_utt[i].name_offset);
983 if (c > 0) bot = i + 1;
else top = i;
986 *errorcodeptr =
ERR47;
991 *errorcodeptr =
ERR46;
1016 is_counted_repeat(
const uschar *p)
1018 if ((digitab[*p++] & ctype_digit) == 0)
return FALSE;
1019 while ((digitab[*p] & ctype_digit) != 0) p++;
1025 if ((digitab[*p++] & ctype_digit) == 0)
return FALSE;
1026 while ((digitab[*p] & ctype_digit) != 0) p++;
1053 read_repeat_counts(
const uschar *p,
int *minp,
int *maxp,
int *errorcodeptr)
1061 while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ -
CHAR_0;
1062 if (min < 0 || min > 65535)
1064 *errorcodeptr =
ERR5;
1076 while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ -
CHAR_0;
1077 if (max < 0 || max > 65535)
1079 *errorcodeptr =
ERR5;
1084 *errorcodeptr =
ERR4;
1142 int start_count = *count;
1143 int hwm_count = start_count;
1160 if (name == NULL && *count == lorn)
return *count;
1191 if (*ptr != 0) ptr++;
1200 if (*ptr ==
CHAR_P) ptr++;
1210 if (name == NULL && *count == lorn)
return *count;
1214 while (*ptr != term) ptr++;
1215 if (name != NULL && lorn == ptr - thisname &&
1216 strncmp((
const char *)name, (
const char *)thisname, lorn) == 0)
1234 if (*(++ptr) == 0)
goto FAIL_EXIT;
1235 if (*ptr ==
CHAR_Q)
for (;;)
1238 if (*ptr == 0)
goto FAIL_EXIT;
1239 if (*(++ptr) ==
CHAR_E)
break;
1259 else if (strncmp((
const char *)ptr+2,
1267 negate_class =
TRUE;
1282 if (*ptr == 0)
return -1;
1285 if (*(++ptr) == 0)
goto FAIL_EXIT;
1286 if (*ptr ==
CHAR_Q)
for (;;)
1289 if (*ptr == 0)
goto FAIL_EXIT;
1290 if (*(++ptr) ==
CHAR_E)
break;
1308 if (utf8)
while ((*ptr & 0xc0) == 0x80) ptr++;
1311 if (*ptr == 0)
goto FAIL_EXIT;
1319 int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf8, count);
1320 if (rc > 0)
return rc;
1321 if (*ptr == 0)
goto FAIL_EXIT;
1326 if (dup_parens && *count < hwm_count) *count = hwm_count;
1332 if (*count > hwm_count) hwm_count = *count;
1333 *count = start_count;
1385 rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf8, &count);
1386 if (rc > 0 || *ptr++ == 0)
break;
1416 first_significant_code(
const uschar *code,
int *options,
int optbit,
1424 if (optbit > 0 && ((
int)code[1] & optbit) != (*options & optbit))
1425 *options = (int)code[1];
1432 if (!skipassert)
return code;
1433 do code += GET(code, 1);
while (*code ==
OP_ALT);
1439 if (!skipassert)
return code;
1493 register int branchlength = 0;
1503 register int op = *cc;
1510 d = find_fixedlength(cc + ((op ==
OP_CBRA)? 2:0), options, atend, cd);
1511 if (d < 0)
return d;
1513 do cc += GET(cc, 1);
while (*cc ==
OP_ALT);
1526 if (length < 0) length = branchlength;
1527 else if (length != branchlength)
return -1;
1528 if (*cc !=
OP_ALT)
return length;
1538 if (!atend)
return -3;
1540 do ce += GET(ce, 1);
while (*ce ==
OP_ALT);
1541 if (cc > cs && cc < ce)
return -1;
1542 d = find_fixedlength(cs + 2, options, atend, cd);
1543 if (d < 0)
return d;
1554 do cc += GET(cc, 1);
while (*cc ==
OP_ALT);
1587 if ((options &
PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
1596 branchlength +=
GET2(cc,1);
1599 if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
1605 branchlength +=
GET2(cc,1);
1638 cc += GET(cc, 1) - 33;
1656 if (
GET2(cc,1) !=
GET2(cc,3))
return -1;
1657 branchlength +=
GET2(cc,1);
1701 register int c = *code;
1702 if (c ==
OP_END)
return NULL;
1708 if (c ==
OP_XCLASS) code += GET(code, 1);
1714 if (number < 0)
return (
uschar *)code;
1722 int n =
GET2(code, 1+LINK_SIZE);
1723 if (n == number)
return (
uschar *)code;
1823 register int c = *code;
1824 if (c ==
OP_END)
return NULL;
1831 if (c ==
OP_XCLASS) code += GET(code, 1);
1932 could_be_empty_branch(
const uschar *code,
const uschar *endcode,
BOOL utf8,
1949 do code += GET(code, 1);
while (*code ==
OP_ALT);
1959 do code += GET(code, 1);
while (*code ==
OP_ALT);
1971 if (GET(scode, 1) == 0)
return TRUE;
1974 if (could_be_empty_branch(scode, endcode, utf8, cd))
1976 empty_branch =
TRUE;
1979 scode += GET(scode, 1);
1981 while (*scode ==
OP_ALT);
1982 if (!empty_branch)
return FALSE;
1991 if (GET(code, 1) == 0)
return TRUE;
1998 code += GET(code, 1);
2001 empty_branch =
FALSE;
2004 if (!empty_branch && could_be_empty_branch(code, endcode, utf8, cd))
2005 empty_branch =
TRUE;
2006 code += GET(code, 1);
2009 if (!empty_branch)
return FALSE;
2028 ccode = code += GET(code, 1);
2029 goto CHECK_CLASS_REPEAT;
2189 if (!could_be_empty_branch(bcptr->
current_branch, endcode, utf8, cd))
2191 bcptr = bcptr->
outer;
2230 check_posix_syntax(
const uschar *ptr,
const uschar **endptr)
2233 terminator = *(++ptr);
2234 for (++ptr; *ptr != 0; ptr++)
2267 check_posix_name(
const uschar *ptr,
int len)
2269 const char *pn = posix_names;
2270 register int yield = 0;
2271 while (posix_name_lengths[yield] != 0)
2273 if (len == posix_name_lengths[yield] &&
2274 strncmp((
const char *)ptr, pn, len) == 0)
return yield;
2275 pn += posix_name_lengths[yield] + 1;
2319 while ((ptr = (
uschar *)find_recurse(ptr, utf8)) != NULL)
2329 offset = GET(hc, 0);
2332 PUT(hc, 0, offset + adjust);
2342 offset = GET(ptr, 1);
2343 if (cd->
start_code + offset >= group) PUT(ptr, 1, offset + adjust);
2373 PUT(code, LINK_SIZE, 0);
2398 int length = (int)(ptr - cd->
start_pattern - GET(previous_callout, 2));
2399 PUT(previous_callout, 2 + LINK_SIZE, length);
2424 get_othercase_range(
unsigned int *cptr,
unsigned int d,
unsigned int *ocptr,
2425 unsigned int *odptr)
2427 unsigned int c, othercase, next;
2429 for (c = *cptr; c <= d; c++)
2432 if (c > d)
return FALSE;
2435 next = othercase + 1;
2437 for (++c; c <= d; c++)
2468 check_char_prop(
int c,
int ptype,
int pdata,
BOOL negated)
2479 return (pdata == _pcre_ucp_gentype[prop->
chartype]) == negated;
2482 return (pdata == prop->
chartype) == negated;
2485 return (pdata == prop->
script) == negated;
2534 check_auto_possessive(
const uschar *previous,
BOOL utf8,
const uschar *ptr,
2538 int op_code = *previous++;
2555 if (utf8)
while ((*ptr & 0xc0) == 0x80) ptr++;
2568 int temperrorcode = 0;
2569 next = check_escape(&ptr, &temperrorcode, cd->
bracount, options,
FALSE);
2570 if (temperrorcode != 0)
return FALSE;
2586 if ((options & PCRE_EXTENDED) != 0)
2590 while ((cd->
ctypes[*ptr] & ctype_space) != 0) ptr++;
2599 if (utf8)
while ((*ptr & 0xc0) == 0x80) ptr++;
2616 if (next >= 0)
switch(op_code)
2636 if (c == next)
return FALSE;
2640 unsigned int othercase;
2641 if (next < 128) othercase = cd->
fcc[next];
else
2647 return (
unsigned int)c != othercase;
2651 return (c != cd->
fcc[next]);
2656 if ((c = *previous) == next)
return TRUE;
2661 unsigned int othercase;
2662 if (next < 128) othercase = cd->
fcc[next];
else
2668 return (
unsigned int)c == othercase;
2672 return (c == cd->
fcc[next]);
2742 return check_char_prop(next, previous[0], previous[1],
FALSE);
2745 return check_char_prop(next, previous[0], previous[1],
TRUE);
2811 return -next !=
ESC_h;
2813 return -next ==
ESC_h;
2827 return -next !=
ESC_v;
2829 return -next ==
ESC_v;
2844 int temperrorcode = 0;
2845 ptr = substitutes[-next -
ESC_DU];
2846 next = check_escape(&ptr, &temperrorcode, 0, options,
FALSE);
2847 if (temperrorcode != 0)
return FALSE;
2855 int ptype, pdata, errorcodeptr;
2859 ptype = get_ucp(&ptr, &negated, &pdata, &errorcodeptr);
2860 if (ptype < 0)
return FALSE;
2873 return check_char_prop(c, ptype, pdata, (next == -
ESC_P) != negated);
2892 return next == -
ESC_d;
2905 return next == -
ESC_h;
2958 compile_branch(
int *optionsptr,
uschar **codeptr,
const uschar **ptrptr,
2959 int *errorcodeptr,
int *firstbyteptr,
int *reqbyteptr,
branch_chain *bcptr,
2962 int repeat_type, op_type;
2963 int repeat_min = 0, repeat_max = 0;
2965 int greedy_default, greedy_non_default;
2966 int firstbyte, reqbyte;
2967 int zeroreqbyte, zerofirstbyte;
2968 int req_caseopt, reqvary, tempreqvary;
2969 int options = *optionsptr;
2970 int after_manual_callout = 0;
2971 int length_prevgroup = 0;
2973 register uschar *code = *codeptr;
2974 uschar *last_code = code;
2975 uschar *orig_code = code;
2979 const uschar *ptr = *ptrptr;
2981 const uschar *nestptr = NULL;
2983 uschar *previous_callout = NULL;
2991 uschar *class_utf8data_base;
2995 uschar *utf8_char = NULL;
2999 if (lengthptr != NULL)
DPRINTF((
">> start branch\n"));
3005 greedy_non_default = greedy_default ^ 1;
3017 firstbyte = reqbyte = zerofirstbyte = zeroreqbyte =
REQ_UNSET;
3031 BOOL should_flip_negation;
3032 BOOL possessive_quantifier;
3035 BOOL reset_bracount;
3036 int class_charcount;
3055 if (c == 0 && nestptr != NULL)
3065 if (lengthptr != NULL)
3068 if (code > cd->
hwm) cd->
hwm = code;
3072 *errorcodeptr =
ERR52;
3082 if (code < last_code) code = last_code;
3086 if (
OFLOW_MAX - *lengthptr < code - last_code)
3088 *errorcodeptr =
ERR20;
3092 *lengthptr += (int)(code - last_code);
3093 DPRINTF((
"length=%d added %d c=%c\n", *lengthptr, code - last_code, c));
3099 if (previous != NULL)
3101 if (previous > orig_code)
3103 memmove(orig_code, previous, code - previous);
3104 code -= previous - orig_code;
3105 previous = orig_code;
3108 else code = orig_code;
3121 *errorcodeptr =
ERR52;
3127 if (inescq && c != 0)
3137 if (previous_callout != NULL)
3139 if (lengthptr == NULL)
3140 complete_callout(previous_callout, ptr, cd);
3141 previous_callout = NULL;
3145 previous_callout = code;
3146 code = auto_callout(code, ptr, cd);
3159 if (!is_quantifier && previous_callout != NULL &&
3160 after_manual_callout-- <= 0)
3162 if (lengthptr == NULL)
3163 complete_callout(previous_callout, ptr, cd);
3164 previous_callout = NULL;
3169 if ((options & PCRE_EXTENDED) != 0)
3171 if ((cd->
ctypes[c] & ctype_space) != 0)
continue;
3180 if (utf8)
while ((*ptr & 0xc0) == 0x80) ptr++;
3183 if (*ptr != 0)
continue;
3194 previous_callout = code;
3195 code = auto_callout(code, ptr, cd);
3204 *firstbyteptr = firstbyte;
3205 *reqbyteptr = reqbyte;
3208 if (lengthptr != NULL)
3210 if (
OFLOW_MAX - *lengthptr < code - last_code)
3212 *errorcodeptr =
ERR20;
3215 *lengthptr += (int)(code - last_code);
3244 zerofirstbyte = firstbyte;
3245 zeroreqbyte = reqbyte;
3269 *errorcodeptr =
ERR64;
3282 check_posix_syntax(ptr, &tempptr))
3292 negate_class =
FALSE;
3300 else if (strncmp((
const char *)ptr+1,
3307 negate_class =
TRUE;
3321 zerofirstbyte = firstbyte;
3329 should_flip_negation =
FALSE;
3335 class_charcount = 0;
3336 class_lastchar = -1;
3343 memset(classbits, 0, 32 *
sizeof(
uschar));
3347 class_utf8data = code + LINK_SIZE + 2;
3348 class_utf8data_base = class_utf8data;
3360 if (utf8 && c > 127)
3370 if (lengthptr != NULL)
3372 *lengthptr += class_utf8data - class_utf8data_base;
3373 class_utf8data = class_utf8data_base;
3402 int posix_class, taboffset, tabopt;
3408 *errorcodeptr =
ERR31;
3415 local_negate =
TRUE;
3416 should_flip_negation =
TRUE;
3420 posix_class = check_posix_name(ptr, (
int)(tempptr - ptr));
3421 if (posix_class < 0)
3423 *errorcodeptr =
ERR30;
3431 if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
3438 if ((options & PCRE_UCP) != 0)
3440 int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0);
3441 if (posix_substitutes[pc] != NULL)
3443 nestptr = tempptr + 1;
3444 ptr = posix_substitutes[pc] - 1;
3458 memcpy(pbits, cbits + posix_class_maps[posix_class],
3463 taboffset = posix_class_maps[posix_class + 1];
3464 tabopt = posix_class_maps[posix_class + 2];
3469 for (c = 0; c < 32; c++) pbits[c] |= cbits[c + taboffset];
3471 for (c = 0; c < 32; c++) pbits[c] &= ~cbits[c + taboffset];
3477 if (tabopt < 0) tabopt = -tabopt;
3478 if (tabopt == 1) pbits[1] &= ~0x3c;
3479 else if (tabopt == 2) pbits[11] &= 0x7f;
3485 for (c = 0; c < 32; c++) classbits[c] |= ~pbits[c];
3487 for (c = 0; c < 32; c++) classbits[c] |= pbits[c];
3490 class_charcount = 10;
3504 c = check_escape(&ptr, errorcodeptr, cd->
bracount, options,
TRUE);
3505 if (*errorcodeptr != 0)
goto FAILED;
3508 else if (-c ==
ESC_Q)
3517 else if (-c ==
ESC_E)
continue;
3522 class_charcount += 2;
3534 ptr = substitutes[-c -
ESC_DU] - 1;
3535 class_charcount -= 2;
3539 for (c = 0; c < 32; c++) classbits[c] |= cbits[c+
cbit_digit];
3543 should_flip_negation =
TRUE;
3544 for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+
cbit_digit];
3548 for (c = 0; c < 32; c++) classbits[c] |= cbits[c+
cbit_word];
3552 should_flip_negation =
TRUE;
3553 for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+
cbit_word];
3563 for (c = 2; c < 32; c++) classbits[c] |= cbits[c+
cbit_space];
3567 should_flip_negation =
TRUE;
3568 for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+
cbit_space];
3569 classbits[1] |= 0x08;
3598 for (c = 0; c < 32; c++)
3603 case 0x09/8: x ^= 1 << (0x09%8);
break;
3604 case 0x20/8: x ^= 1 << (0x20%8);
break;
3605 case 0xa0/8: x ^= 1 << (0xa0%8);
break;
3658 for (c = 0; c < 32; c++)
3663 case 0x0a/8: x ^= 1 << (0x0a%8);
3668 case 0x85/8: x ^= 1 << (0x85%8);
break;
3694 int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);
3695 if (ptype < 0)
goto FAILED;
3697 *class_utf8data++ = ((-c ==
ESC_p) != negated)?
3699 *class_utf8data++ = ptype;
3700 *class_utf8data++ = pdata;
3701 class_charcount -= 2;
3712 *errorcodeptr =
ERR7;
3715 class_charcount -= 2;
3759 { ptr += 2;
continue; }
3767 goto LONE_SINGLE_CHARACTER;
3785 d = check_escape(&ptr, errorcodeptr, cd->
bracount, options,
TRUE);
3786 if (*errorcodeptr != 0)
goto FAILED;
3795 goto LONE_SINGLE_CHARACTER;
3805 *errorcodeptr =
ERR8;
3809 if (d == c)
goto LONE_SINGLE_CHARACTER;
3821 if (utf8 && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))
3830 if ((options & PCRE_CASELESS) != 0)
3832 unsigned int occ, ocd;
3833 unsigned int cc = c;
3834 unsigned int origd = d;
3835 while (get_othercase_range(&cc, origd, &occ, &ocd))
3837 if (occ >= (
unsigned int)c &&
3838 ocd <= (
unsigned int)d)
3841 if (occ < (
unsigned int)c &&
3842 ocd >= (
unsigned int)c - 1)
3847 if (ocd > (
unsigned int)d &&
3848 occ <= (
unsigned int)d + 1)
3882 if ((options & PCRE_CASELESS) == 0 || c > 127)
continue;
3896 class_charcount += d - c + 1;
3901 if (lengthptr == NULL)
for (; c <= d; c++)
3903 classbits[c/8] |= (1 << (c&7));
3904 if ((options & PCRE_CASELESS) != 0)
3906 int uc = cd->
fcc[c];
3907 classbits[uc/8] |= (1 << (uc&7));
3918 LONE_SINGLE_CHARACTER:
3923 if (utf8 && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))
3930 if ((options & PCRE_CASELESS) != 0)
3932 unsigned int othercase;
3947 classbits[c/8] |= (1 << (c&7));
3948 if ((options & PCRE_CASELESS) != 0)
3951 classbits[c/8] |= (1 << (c&7));
3962 while (((c = *(++ptr)) != 0 ||
3964 (ptr = nestptr, nestptr = NULL, c = *(++ptr)) != 0)) &&
3971 *errorcodeptr =
ERR6;
3993 if (class_charcount == 1 && !class_utf8 &&
3994 (!utf8 || !negate_class || class_lastchar < 128))
3996 if (class_charcount == 1)
3999 zeroreqbyte = reqbyte;
4006 zerofirstbyte = firstbyte;
4008 *code++ = class_lastchar;
4016 if (utf8 && class_lastchar > 127)
4021 mcbuffer[0] = class_lastchar;
4033 zerofirstbyte = firstbyte;
4034 zeroreqbyte = reqbyte;
4045 if (class_utf8 && (!should_flip_negation || (options & PCRE_UCP) != 0))
4050 *code = negate_class?
XCL_NOT : 0;
4055 if (class_charcount > 0)
4058 memmove(code + 32, code, class_utf8data - code);
4059 memcpy(code, classbits, 32);
4060 code = class_utf8data + 32;
4062 else code = class_utf8data;
4066 PUT(previous, 1, code - previous);
4080 if (lengthptr == NULL)
4081 for (c = 0; c < 32; c++) code[c] = ~classbits[c];
4085 memcpy(code, classbits, 32);
4096 if (!is_quantifier)
goto NORMAL_CHAR;
4097 ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);
4098 if (*errorcodeptr != 0)
goto FAILED;
4116 if (previous == NULL)
4118 *errorcodeptr =
ERR9;
4122 if (repeat_min == 0)
4124 firstbyte = zerofirstbyte;
4125 reqbyte = zeroreqbyte;
4130 reqvary = (repeat_min == repeat_max)? 0 :
REQ_VARY;
4133 possessive_quantifier =
FALSE;
4138 tempcode = previous;
4149 possessive_quantifier =
TRUE;
4154 repeat_type = greedy_non_default;
4157 else repeat_type = greedy_default;
4173 if (utf8 && (code[-1] & 0x80) != 0)
4175 uschar *lastchar = code - 1;
4176 while((*lastchar & 0xc0) == 0x80) lastchar--;
4177 c = code - lastchar;
4178 memcpy(utf8_char, lastchar, c);
4189 if (repeat_min > 1) reqbyte = c | req_caseopt | cd->
req_varyopt;
4197 if (!possessive_quantifier &&
4199 check_auto_possessive(previous, utf8, ptr + 1, options, cd))
4202 possessive_quantifier =
TRUE;
4205 goto OUTPUT_SINGLE_REPEAT;
4214 else if (*previous ==
OP_NOT)
4218 if (!possessive_quantifier &&
4220 check_auto_possessive(previous, utf8, ptr + 1, options, cd))
4223 possessive_quantifier =
TRUE;
4225 goto OUTPUT_SINGLE_REPEAT;
4238 int prop_type, prop_value;
4242 if (!possessive_quantifier &&
4244 check_auto_possessive(previous, utf8, ptr + 1, options, cd))
4247 possessive_quantifier =
TRUE;
4250 OUTPUT_SINGLE_REPEAT:
4253 prop_type = previous[1];
4254 prop_value = previous[2];
4256 else prop_type = prop_value = -1;
4264 if (repeat_max == 0)
goto END_REPEAT;
4278 repeat_type += op_type;
4283 if (repeat_min == 0)
4285 if (repeat_max == -1) *code++ = OP_STAR + repeat_type;
4286 else if (repeat_max == 1) *code++ =
OP_QUERY + repeat_type;
4289 *code++ =
OP_UPTO + repeat_type;
4299 else if (repeat_min == 1)
4301 if (repeat_max == -1)
4302 *code++ =
OP_PLUS + repeat_type;
4306 if (repeat_max == 1)
goto END_REPEAT;
4307 *code++ =
OP_UPTO + repeat_type;
4308 PUT2INC(code, 0, repeat_max - 1);
4329 if (utf8 && c >= 128)
4331 memcpy(code, utf8_char, c & 7);
4340 *code++ = prop_type;
4341 *code++ = prop_value;
4344 *code++ = OP_STAR + repeat_type;
4351 else if (repeat_max != repeat_min)
4354 if (utf8 && c >= 128)
4356 memcpy(code, utf8_char, c & 7);
4364 *code++ = prop_type;
4365 *code++ = prop_value;
4367 repeat_max -= repeat_min;
4369 if (repeat_max == 1)
4375 *code++ =
OP_UPTO + repeat_type;
4384 if (utf8 && c >= 128)
4386 memcpy(code, utf8_char, c & 7);
4399 *code++ = prop_type;
4400 *code++ = prop_value;
4415 if (repeat_max == 0)
4431 if (repeat_min == 0 && repeat_max == -1)
4433 else if (repeat_min == 1 && repeat_max == -1)
4435 else if (repeat_min == 0 && repeat_max == 1)
4441 if (repeat_max == -1) repeat_max = 0;
4454 int len = (int)(code - previous);
4459 if (*previous ==
OP_COND && previous[LINK_SIZE+1] ==
OP_DEF)
4461 *errorcodeptr =
ERR55;
4471 if (repeat_max == -1)
4473 register uschar *ket = previous;
4474 do ket += GET(ket, 1);
while (*ket !=
OP_KET);
4475 ketoffset = (int)(code - ket);
4485 if (repeat_min == 0)
4508 if (repeat_max <= 1)
4511 adjust_recurse(previous, 1, utf8, cd, save_hwm);
4512 memmove(previous+1, previous, len);
4514 if (repeat_max == 0)
4534 adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd, save_hwm);
4535 memmove(previous + 2 + LINK_SIZE, previous, len);
4543 offset = (bralink == NULL)? 0 : (
int)(previous - bralink);
4545 PUTINC(previous, 0, offset);
4567 if (lengthptr != NULL)
4569 int delta = (repeat_min - 1)*length_prevgroup;
4575 *errorcodeptr =
ERR20;
4578 *lengthptr += delta;
4585 if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;
4586 for (i = 1; i < repeat_min; i++)
4590 memcpy(code, previous, len);
4591 for (hc = save_hwm; hc < this_hwm; hc +=
LINK_SIZE)
4593 PUT(cd->
hwm, 0, GET(hc, 0) + len);
4596 save_hwm = this_hwm;
4602 if (repeat_max > 0) repeat_max -= repeat_min;
4612 if (repeat_max >= 0)
4621 if (lengthptr != NULL && repeat_max > 0)
4623 int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*
LINK_SIZE) -
4630 *errorcodeptr =
ERR20;
4633 *lengthptr += delta;
4638 else for (i = repeat_max - 1; i >= 0; i--)
4652 offset = (bralink == NULL)? 0 : (
int)(code - bralink);
4657 memcpy(code, previous, len);
4658 for (hc = save_hwm; hc < this_hwm; hc +=
LINK_SIZE)
4660 PUT(cd->
hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
4663 save_hwm = this_hwm;
4670 while (bralink != NULL)
4673 int offset = (int)(code - bralink + 1);
4674 uschar *bra = code - offset;
4675 oldlinkoffset = GET(bra, 1);
4676 bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
4679 PUT(bra, 1, offset);
4696 uschar *ketcode = code - ketoffset;
4697 uschar *bracode = ketcode - GET(ketcode, 1);
4699 if (lengthptr == NULL && *bracode !=
OP_ONCE)
4704 if (could_be_empty_branch(scode, ketcode, utf8, cd))
4709 scode += GET(scode, 1);
4711 while (*scode ==
OP_ALT);
4721 else if (*previous ==
OP_FAIL)
goto END_REPEAT;
4727 *errorcodeptr =
ERR11;
4744 if (possessive_quantifier)
4756 if (utf8 && tempcode[-1] >= 0xc0)
4761 len = (int)(code - tempcode);
4762 if (len > 0)
switch (*tempcode)
4784 adjust_recurse(tempcode, 1 + LINK_SIZE, utf8, cd, save_hwm);
4785 memmove(tempcode + 1+LINK_SIZE, tempcode, len);
4791 PUT(tempcode, 1, len);
4812 newoptions = options;
4816 reset_bracount =
FALSE;
4825 const char *vn = verbnames;
4826 const uschar *name = ptr + 1;
4827 const uschar *arg = NULL;
4829 while ((cd->
ctypes[*++ptr] & ctype_letter) != 0) {};
4830 namelen = (int)(ptr - name);
4835 while ((cd->
ctypes[*ptr] & (ctype_letter|ctype_digit)) != 0
4836 || *ptr ==
'_') ptr++;
4837 arglen = (int)(ptr - arg);
4842 *errorcodeptr =
ERR60;
4848 for (i = 0; i < verbcount; i++)
4850 if (namelen == verbs[i].len &&
4851 strncmp((
char *)name, vn, namelen) == 0)
4870 if (verbs[i].op < 0)
4872 *errorcodeptr =
ERR66;
4875 *code = verbs[i].op;
4885 if (verbs[i].op_arg < 0)
4887 *errorcodeptr =
ERR59;
4890 *code = verbs[i].op_arg;
4897 memcpy(code, arg, arglen);
4905 vn += verbs[i].len + 1;
4908 if (i < verbcount)
continue;
4909 *errorcodeptr =
ERR60;
4918 int i,
set, unset, namelen;
4930 *errorcodeptr =
ERR18;
4938 reset_bracount =
TRUE;
5013 *errorcodeptr =
ERR28;
5021 while ((cd->
ctypes[*ptr] & ctype_word) != 0)
5025 recno * 10 + *ptr - CHAR_0 : -1;
5028 namelen = (int)(ptr - name);
5030 if ((terminator > 0 && *ptr++ != terminator) ||
5034 *errorcodeptr =
ERR26;
5040 if (lengthptr != NULL)
break;
5050 *errorcodeptr =
ERR58;
5057 *errorcodeptr =
ERR15;
5060 PUT2(code, 2+LINK_SIZE, recno);
5073 if (strncmp((
char *)name, (
char *)slot+2, namelen) == 0)
break;
5079 if (i < cd->names_found)
5081 recno =
GET2(slot, 0);
5082 PUT2(code, 2+LINK_SIZE, recno);
5088 else if ((i = find_parens(cd, name, namelen,
5089 (options & PCRE_EXTENDED) != 0, utf8)) > 0)
5091 PUT2(code, 2+LINK_SIZE, i);
5101 else if (terminator != 0)
5103 *errorcodeptr =
ERR15;
5110 else if (*name ==
CHAR_R)
5113 for (i = 1; i < namelen; i++)
5115 if ((digitab[name[i]] & ctype_digit) == 0)
5117 *errorcodeptr =
ERR15;
5120 recno = recno * 10 + name[i] -
CHAR_0;
5124 PUT2(code, 2+LINK_SIZE, recno);
5130 else if (namelen == 6 && strncmp((
char *)name,
STRING_DEFINE, 6) == 0)
5139 else if (recno > 0 && recno <= cd->final_bracount)
5141 PUT2(code, 2+LINK_SIZE, recno);
5189 if ((cd->
ctypes[ptr[1]] & ctype_word) != 0)
goto DEFINE_NAME;
5191 *errorcodeptr =
ERR24;
5206 previous_callout = code;
5207 after_manual_callout = 1;
5211 while ((digitab[*(++ptr)] & ctype_digit) != 0)
5212 n = n * 10 + *ptr - CHAR_0;
5215 *errorcodeptr =
ERR39;
5220 *errorcodeptr =
ERR38;
5225 PUT(code, LINK_SIZE, 0);
5239 goto NAMED_REF_OR_RECURSE;
5243 *errorcodeptr =
ERR41;
5257 while ((cd->
ctypes[*ptr] & ctype_word) != 0) ptr++;
5258 namelen = (int)(ptr - name);
5262 if (lengthptr != NULL)
5264 if (*ptr != terminator)
5266 *errorcodeptr =
ERR42;
5271 *errorcodeptr =
ERR49;
5279 *errorcodeptr =
ERR48;
5305 int crc = memcmp(name, slot+2, namelen);
5308 if (slot[2+namelen] == 0)
5313 *errorcodeptr =
ERR43;
5316 else dupname =
TRUE;
5350 *errorcodeptr =
ERR65;
5360 memcpy(slot + 2, name, namelen);
5361 slot[2+namelen] = 0;
5370 goto NUMBERED_GROUP;
5385 NAMED_REF_OR_RECURSE:
5387 while ((cd->
ctypes[*ptr] & ctype_word) != 0) ptr++;
5388 namelen = (int)(ptr - name);
5397 if (lengthptr != NULL)
5403 *errorcodeptr =
ERR62;
5406 if (*ptr != terminator)
5408 *errorcodeptr =
ERR42;
5413 *errorcodeptr =
ERR48;
5428 recno = find_parens(cd, name, namelen,
5429 (options & PCRE_EXTENDED) != 0, utf8);
5431 if (recno < 0) recno = 0;
5444 if (strncmp((
char *)name, (
char *)slot+2, namelen) == 0 &&
5445 slot[2+namelen] == 0)
5450 if (i < cd->names_found)
5452 recno =
GET2(slot, 0);
5455 find_parens(cd, name, namelen,
5456 (options & PCRE_EXTENDED) != 0, utf8)) <= 0)
5458 *errorcodeptr =
ERR15;
5466 if (is_recurse)
goto HANDLE_RECURSION;
5467 else goto HANDLE_REFERENCE;
5490 HANDLE_NUMERICAL_RECURSION:
5495 if ((digitab[*ptr] & ctype_digit) == 0)
5497 *errorcodeptr =
ERR63;
5503 if ((digitab[ptr[1]] & ctype_digit) == 0)
5504 goto OTHER_CHAR_AFTER_QUERY;
5509 while((digitab[*ptr] & ctype_digit) != 0)
5510 recno = recno * 10 + *ptr++ -
CHAR_0;
5512 if (*ptr != terminator)
5514 *errorcodeptr =
ERR29;
5522 *errorcodeptr =
ERR58;
5528 *errorcodeptr =
ERR15;
5536 *errorcodeptr =
ERR58;
5556 if (lengthptr == NULL)
5566 if (find_parens(cd, NULL, recno,
5567 (options & PCRE_EXTENDED) != 0, utf8) < 0)
5569 *errorcodeptr =
ERR15;
5585 else if (GET(called, 1) == 0 &&
5586 could_be_empty(called, code, bcptr, utf8, cd))
5588 *errorcodeptr =
ERR40;
5598 PUT(code, 1, 2 + 2*LINK_SIZE);
5602 PUT(code, 1, (
int)(called - cd->
start_code));
5606 PUT(code, 1, 2 + 2*LINK_SIZE);
5620 OTHER_CHAR_AFTER_QUERY:
5642 default: *errorcodeptr =
ERR12;
5650 newoptions = (options |
set) & (~unset);
5676 if (code == cd->
start_code + 1 + LINK_SIZE &&
5677 (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
5689 greedy_non_default = greedy_default ^ 1;
5698 *optionsptr = options = newoptions;
5738 previous = (bravalue >=
OP_ONCE)? code : NULL;
5742 length_prevgroup = 0;
5758 (lengthptr == NULL)? NULL :
5773 if (bravalue ==
OP_COND && lengthptr == NULL)
5787 if (code[LINK_SIZE+1] ==
OP_DEF)
5791 *errorcodeptr =
ERR54;
5805 *errorcodeptr =
ERR27;
5808 if (condcount == 1) subfirstbyte = subreqbyte =
REQ_NONE;
5816 *errorcodeptr =
ERR14;
5825 if (lengthptr != NULL)
5827 if (
OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE)
5829 *errorcodeptr =
ERR20;
5832 *lengthptr += length_prevgroup - 2 - 2*
LINK_SIZE;
5834 PUTINC(code, 0, 1 + LINK_SIZE);
5836 PUTINC(code, 0, 1 + LINK_SIZE);
5847 if (bravalue ==
OP_DEF)
break;
5856 zeroreqbyte = reqbyte;
5857 zerofirstbyte = firstbyte;
5858 groupsetfirstbyte =
FALSE;
5870 if (subfirstbyte >= 0)
5872 firstbyte = subfirstbyte;
5873 groupsetfirstbyte =
TRUE;
5883 else if (subfirstbyte >= 0 && subreqbyte < 0)
5884 subreqbyte = subfirstbyte | tempreqvary;
5889 if (subreqbyte >= 0) reqbyte = subreqbyte;
5900 else if (bravalue ==
OP_ASSERT && subreqbyte >= 0) reqbyte = subreqbyte;
5915 c = check_escape(&ptr, errorcodeptr, cd->
bracount, options,
FALSE);
5916 if (*errorcodeptr != 0)
goto FAILED;
5928 if (-c ==
ESC_E)
continue;
5938 zerofirstbyte = firstbyte;
5939 zeroreqbyte = reqbyte;
5961 reset_bracount =
FALSE;
5968 for (p = ptr + 1; *p != 0 && *p != terminator; p++)
5970 if ((cd->
ctypes[*p] & ctype_digit) == 0) isnumber =
FALSE;
5971 if ((cd->
ctypes[*p] & ctype_word) == 0)
break;
5973 if (*p != terminator)
5975 *errorcodeptr =
ERR57;
5981 goto HANDLE_NUMERICAL_RECURSION;
5984 goto NAMED_REF_OR_RECURSE;
5990 while ((digitab[*p] & ctype_digit) != 0) p++;
5991 if (*p != terminator)
5993 *errorcodeptr =
ERR57;
5997 goto HANDLE_NUMERICAL_RECURSION;
6010 goto NAMED_REF_OR_RECURSE;
6027 cd->
backref_map |= (recno < 32)? (1 << recno) : 1;
6051 int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);
6052 if (ptype < 0)
goto FAILED;
6065 *errorcodeptr =
ERR45;
6081 ptr = substitutes[-c -
ESC_DU] - 1;
6086 previous = (-c >
ESC_b && -c <
ESC_Z)? code : NULL;
6098 if (utf8 && c > 127)
6121 if (utf8 && c >= 0xc0)
6123 while ((ptr[1] & 0xc0) == 0x80)
6124 mcbuffer[mclength++] = *(++ptr);
6134 for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];
6149 zeroreqbyte = reqbyte;
6154 if (mclength == 1 || req_caseopt == 0)
6156 firstbyte = mcbuffer[0] | req_caseopt;
6157 if (mclength != 1) reqbyte = code[-1] | cd->
req_varyopt;
6159 else firstbyte = reqbyte =
REQ_NONE;
6167 zerofirstbyte = firstbyte;
6168 zeroreqbyte = reqbyte;
6169 if (mclength == 1 || req_caseopt == 0)
6170 reqbyte = code[-1] | req_caseopt | cd->
req_varyopt;
6226 compile_regex(
int options,
int oldims,
uschar **codeptr,
const uschar **ptrptr,
6227 int *errorcodeptr,
BOOL lookbehind,
BOOL reset_bracount,
int skipbytes,
6231 const uschar *ptr = *ptrptr;
6233 uschar *last_branch = code;
6234 uschar *start_bracket = code;
6235 uschar *reverse_count = NULL;
6238 int firstbyte, reqbyte;
6239 int branchfirstbyte, branchreqbyte;
6258 length = 2 + 2*LINK_SIZE + skipbytes;
6271 capnumber =
GET2(code, 1 + LINK_SIZE);
6272 capitem.
number = capnumber;
6281 code += 1 + LINK_SIZE + skipbytes;
6285 orig_bracount = max_bracount = cd->
bracount;
6291 if (reset_bracount) cd->
bracount = orig_bracount;
6295 if ((options & PCRE_IMS) != oldims)
6307 reverse_count = code;
6315 if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstbyte,
6316 &branchreqbyte, &bc, cd, (lengthptr == NULL)? NULL : &length))
6338 if (lengthptr == NULL)
6343 if (*last_branch !=
OP_ALT)
6345 firstbyte = branchfirstbyte;
6346 reqbyte = branchreqbyte;
6360 if (firstbyte >= 0 && firstbyte != branchfirstbyte)
6362 if (reqbyte < 0) reqbyte = firstbyte;
6369 if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0)
6370 branchreqbyte = branchfirstbyte;
6376 else reqbyte |= branchreqbyte;
6391 fixed_length = find_fixedlength(last_branch, options,
FALSE, cd);
6392 DPRINTF((
"fixed length = %d\n", fixed_length));
6393 if (fixed_length == -3)
6397 else if (fixed_length < 0)
6399 *errorcodeptr = (fixed_length == -2)?
ERR36 :
ERR25;
6403 else { PUT(reverse_count, 0, fixed_length); }
6418 if (lengthptr == NULL)
6420 int branch_length = (int)(code - last_branch);
6423 int prev_length = GET(last_branch, 1);
6424 PUT(last_branch, 1, branch_length);
6425 branch_length = prev_length;
6426 last_branch -= branch_length;
6428 while (branch_length > 0);
6434 PUT(code, 1, (
int)(code - start_bracket));
6445 memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
6446 code - start_bracket);
6449 PUT(start_bracket, 1, (
int)(code - start_bracket));
6451 PUT(code, 1, (
int)(code - start_bracket));
6475 *firstbyteptr = firstbyte;
6476 *reqbyteptr = reqbyte;
6477 if (lengthptr != NULL)
6481 *errorcodeptr =
ERR20;
6484 *lengthptr += length;
6498 if (lengthptr != NULL)
6500 code = *codeptr + 1 + LINK_SIZE + skipbytes;
6506 PUT(code, 1, (
int)(code - last_branch));
6558 is_anchored(
register const uschar *code,
int *options,
unsigned int bracket_map,
6559 unsigned int backref_map)
6563 options, PCRE_MULTILINE,
FALSE);
6564 register int op = *scode;
6570 if (!is_anchored(scode, options, bracket_map, backref_map))
return FALSE;
6577 int n =
GET2(scode, 1+LINK_SIZE);
6578 int new_map = bracket_map | ((n < 32)? (1 << n) : 1);
6579 if (!is_anchored(scode, options, new_map, backref_map))
return FALSE;
6586 if (!is_anchored(scode, options, bracket_map, backref_map))
return FALSE;
6595 if (scode[1] !=
OP_ALLANY || (bracket_map & backref_map) != 0)
6602 ((*options & PCRE_MULTILINE) != 0 || op !=
OP_CIRC))
6604 code += GET(code, 1);
6634 is_startline(
const uschar *code,
unsigned int bracket_map,
6635 unsigned int backref_map)
6640 register int op = *scode;
6661 if (!is_startline(scode, bracket_map, backref_map))
return FALSE;
6662 do scode += GET(scode, 1);
while (*scode ==
OP_ALT);
6666 scode = first_significant_code(scode, NULL, 0,
FALSE);
6674 if (!is_startline(scode, bracket_map, backref_map))
return FALSE;
6681 int n =
GET2(scode, 1+LINK_SIZE);
6682 int new_map = bracket_map | ((n < 32)? (1 << n) : 1);
6683 if (!is_startline(scode, new_map, backref_map))
return FALSE;
6690 if (!is_startline(scode, bracket_map, backref_map))
return FALSE;
6698 if (scode[1] !=
OP_ANY || (bracket_map & backref_map) != 0)
return FALSE;
6707 code += GET(code, 1);
6736 find_firstassertedchar(
const uschar *code,
int *options,
BOOL inassert)
6738 register int c = -1;
6742 first_significant_code(code + 1+LINK_SIZE, options, PCRE_CASELESS,
TRUE);
6743 register int op = *scode;
6755 if ((d = find_firstassertedchar(scode, options, op ==
OP_ASSERT)) < 0)
6757 if (c < 0) c = d;
else if (c != d)
return -1;
6768 if (!inassert)
return -1;
6772 if ((*options & PCRE_CASELESS) != 0) c |=
REQ_CASELESS;
6774 else if (c != scode[1])
return -1;
6778 code += GET(code, 1);
6810 int *erroroffset,
const unsigned char *tables)
6812 return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
6818 const char **errorptr,
int *erroroffset,
const unsigned char *tables)
6822 int firstbyte, reqbyte, newline;
6824 int skipatstart = 0;
6843 ptr = (
const uschar *)pattern;
6849 if (errorptr == NULL)
6851 if (errorcodeptr != NULL) *errorcodeptr = 99;
6856 if (errorcodeptr != NULL) *errorcodeptr =
ERR0;
6860 if (erroroffset == NULL)
6863 goto PCRE_EARLY_ERROR_RETURN2;
6881 goto PCRE_EARLY_ERROR_RETURN;
6894 { skipatstart += 7; options |=
PCRE_UTF8;
continue; }
6896 { skipatstart += 6; options |=
PCRE_UCP;
continue; }
6918 else if (newbsr != 0)
6932 goto PCRE_EARLY_ERROR_RETURN2;
6938 goto PCRE_EARLY_ERROR_RETURN;
6945 if ((options & PCRE_UCP) != 0)
6948 goto PCRE_EARLY_ERROR_RETURN;
6960 default: errorcode =
ERR56;
goto PCRE_EARLY_ERROR_RETURN;
6969 case 0: newline =
NEWLINE;
break;
6976 default: errorcode =
ERR56;
goto PCRE_EARLY_ERROR_RETURN;
6983 else if (newline < 0)
6993 cd->
nl[0] = (newline >> 8) & 255;
6994 cd->
nl[1] = newline & 255;
6999 cd->
nl[0] = newline;
7012 DPRINTF((
"------------------------------------------------------------------\n"));
7028 cd->
hwm = cworkspace;
7046 &code, &ptr, &errorcode,
FALSE,
FALSE, 0, &firstbyte, &reqbyte, NULL, cd,
7048 if (errorcode != 0)
goto PCRE_EARLY_ERROR_RETURN;
7050 DPRINTF((
"end pre-compile: length=%d workspace=%d\n", length,
7051 cd->
hwm - cworkspace));
7053 if (length > MAX_PATTERN_SIZE)
7056 goto PCRE_EARLY_ERROR_RETURN;
7070 goto PCRE_EARLY_ERROR_RETURN;
7080 re->
size = (int)size;
7106 cd->
hwm = cworkspace;
7116 ptr = (
const uschar *)pattern + skipatstart;
7117 code = (
uschar *)codestart;
7119 (void)compile_regex(re->
options, re->
options & PCRE_IMS, &code, &ptr,
7120 &errorcode,
FALSE,
FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);
7129 if (errorcode == 0 && *ptr != 0) errorcode =
ERR22;
7137 if (code - codestart > length) errorcode =
ERR23;
7142 while (errorcode == 0 && cd->
hwm > cworkspace)
7147 offset = GET(cd->
hwm, 0);
7148 recno = GET(codestart, offset);
7150 if (groupptr == NULL) errorcode =
ERR53;
7151 else PUT(((
uschar *)codestart), offset, (
int)(groupptr - codestart));
7180 if (GET(cc, 1) == 0)
7183 uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
7186 fixed_length = find_fixedlength(cc, re->
options,
TRUE, cd);
7188 DPRINTF((
"fixed length = %d\n", fixed_length));
7189 if (fixed_length < 0)
7191 errorcode = (fixed_length == -2)?
ERR36 :
ERR25;
7194 PUT(cc, 1, fixed_length);
7205 PCRE_EARLY_ERROR_RETURN:
7206 *erroroffset = (int)(ptr - (
const uschar *)pattern);
7207 PCRE_EARLY_ERROR_RETURN2:
7208 *errorptr = find_error_text(errorcode);
7209 if (errorcodeptr != NULL) *errorcodeptr = errorcode;
7225 int temp_options = re->
options;
7226 if (is_anchored(codestart, &temp_options, 0, cd->
backref_map))
7231 firstbyte = find_firstassertedchar(codestart, &temp_options,
FALSE);
7234 int ch = firstbyte & 255;
7236 cd->
fcc[ch] == ch)? ch : firstbyte;
7239 else if (is_startline(codestart, 0, cd->
backref_map))
7251 int ch = reqbyte & 255;
7261 printf(
"Length = %d top_bracket = %d top_backref = %d\n",
7264 printf(
"Options=%08x\n", re->
options);
7271 if (isprint(ch)) printf(
"First char = %c%s\n", ch, caseless);
7272 else printf(
"First char = \\x%02x%s\n", ch, caseless);
7280 if (isprint(ch)) printf(
"Req char = %c%s\n", ch, caseless);
7281 else printf(
"Req char = \\x%02x%s\n", ch, caseless);
7284 pcre_printint(re, stdout,
TRUE);
7289 if (code - codestart > length)
7292 *errorptr = find_error_text(
ERR23);
7293 *erroroffset = ptr - (
uschar *)pattern;
7294 if (errorcodeptr != NULL) *errorcodeptr =
ERR23;