diff --git a/include/regex.h b/include/regex.h index dce2177..449e606 100644 --- a/include/regex.h +++ b/include/regex.h @@ -31,6 +31,7 @@ typedef struct { #define REG_NOTBOL 1 #define REG_NOTEOL 2 +#define REG_STARTEND 4 #define REG_OK 0 #define REG_NOMATCH 1 @@ -46,6 +47,7 @@ typedef struct { #define REG_ERANGE 11 #define REG_ESPACE 12 #define REG_BADRPT 13 +#define REG_INVARG 14 #define REG_ENOSYS -1 diff --git a/src/regex/regexec.c b/src/regex/regexec.c index 16c5d0a..ae65726 100644 --- a/src/regex/regexec.c +++ b/src/regex/regexec.c @@ -29,6 +29,7 @@ */ +#include #include #include #include @@ -51,11 +52,15 @@ tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, #define GET_NEXT_WCHAR() do { \ prev_c = next_c; pos += pos_add_next; \ - if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0) { \ - if (pos_add_next < 0) { ret = REG_NOMATCH; goto error_exit; } \ - else pos_add_next++; \ + if (len >= 0 && pos >= len) \ + next_c = L'\0'; \ + else { \ + if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0) { \ + if (pos_add_next < 0) { ret = REG_NOMATCH; goto error_exit; } \ + else pos_add_next++; \ + } \ + str_byte += pos_add_next; \ } \ - str_byte += pos_add_next; \ } while (0) #define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c)) @@ -166,7 +171,7 @@ typedef struct { static reg_errcode_t -tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, +tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, ssize_t len, int *match_tags, int eflags, int *match_end_ofs) { @@ -306,7 +311,9 @@ tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, } /* Check for end of string. */ - if (!next_c) break; + if (len < 0) { + if (!next_c) break; + } else if (pos >= len) break; GET_NEXT_WCHAR(); @@ -577,7 +584,7 @@ typedef struct tre_backtrack_struct { #define MIN(a, b) ((a) <= (b) ? (a) : (b)) static reg_errcode_t -tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, +tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, ssize_t len, int *match_tags, int eflags, int *match_end_ofs) { /* State variables required by GET_NEXT_WCHAR. */ @@ -767,8 +774,14 @@ tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, eo = pmatch[bt].rm_eo; bt_len = eo - so; - result = strncmp((const char*)string + so, str_byte - 1, - (size_t)bt_len); + if (so < 0) + result = 1; /* Back reference of nomatch doesn't match */ + else if (len < 0) + result = strncmp((const char *)string + so, str_byte - 1, (size_t)bt_len); + else if (len - pos < bt_len) + result = 1; + else + result = memcmp((const char *)string + so, str_byte - 1, (size_t)bt_len); if (result == 0) { @@ -796,8 +809,9 @@ tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, else { /* Check for end of string. */ - if (next_c == L'\0') - goto backtrack; + if (len < 0) { + if (!next_c) goto backtrack; + } else if (pos >= len) goto backtrack; /* Read the next character. */ GET_NEXT_WCHAR(); @@ -870,10 +884,10 @@ tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, { /* Try starting from a later position in the input string. */ /* Check for end of string. */ - if (next_c == L'\0') - { - break; - } + if (len < 0) { + if (!next_c) break; + } else if (pos >= len) break; + next_c = next_c_start; #ifdef TRE_MBSTATE mbstate = mbstate_start; @@ -984,6 +998,18 @@ regexec(const regex_t *restrict preg, const char *restrict string, tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; reg_errcode_t status; int *tags = NULL, eo; + size_t offset = 0; + ssize_t len = -1; + + if ((eflags & REG_STARTEND) && pmatch) { + if (pmatch->rm_so < 0 || pmatch->rm_eo < 0 || + pmatch->rm_so > pmatch->rm_eo) + return REG_INVARG; + + offset = pmatch->rm_so; + len = pmatch->rm_eo - pmatch->rm_so; + } + if (tnfa->cflags & REG_NOSUB) nmatch = 0; if (tnfa->num_tags > 0 && nmatch > 0) { @@ -996,17 +1022,34 @@ regexec(const regex_t *restrict preg, const char *restrict string, if (tnfa->have_backrefs) { /* The regex has back references, use the backtracking matcher. */ - status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo); + status = tre_tnfa_run_backtrack(tnfa, string+offset, len, tags, eflags, &eo); } else { /* Exact matching, no back references, use the parallel matcher. */ - status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo); + status = tre_tnfa_run_parallel(tnfa, string+offset, len, tags, eflags, &eo); } - if (status == REG_OK) + if (status == REG_OK) { /* A match was found, so fill the submatch registers. */ tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo); + + /* + * If doing REG_STARTEND, adjust the pmatch array (we can't build + * this into tre_fill_pmatch, because tre_tnfa_run_backtrack calls + * tre_fill_pmatch itself). + */ + if (!(tnfa->cflags & REG_NOSUB) && (eflags & REG_STARTEND) + && pmatch && nmatch > 0) { + size_t i; + regmatch_t *p; + for (i = nmatch, p = pmatch; i > 0; p++, i--) { + if (p->rm_so >= 0) p->rm_so += offset; + if (p->rm_eo >= 0) p->rm_eo += offset; + } + } + } + if (tags) xfree(tags); return status;