>From d73884a4510ca5d2d2e9f4ee2ad345b304f67342 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Mon, 23 Mar 2026 17:33:20 +0000 Subject: [PATCH] regex: reject invalid \digit back reference in BRE in BRE \n matches the nth subexpression, but regcomp did not check if the nth subexpression was complete or not, only that there were more subexpressions overall than the largest backref. fix regcomp to error if the referenced subexpression is incomplete. the bug could cause an infinite loop in regexec: regcomp(&re, "\\(^a*\\1\\)*", 0); regexec(&re, "aa", 0, 0, 0); so this is a DoS vuln if the pattern is not under control (ERE is not affected). Note: the regexec backtracking code is not supposed to loop forever, i didn't check how the loop happened in this case. Note: this is likely a conformance bug too but POSIX is badly worded: The expression is invalid if less than n subexpressions precede the '\n'. subexpressions are numbered by their ( and interpreting "precede" by counting either (, or ) is wrong, the expectation is: \(\(a\)\)\2 ok, matches aa \(\(a\)\2\) ok, matches aa \(\(a\2\)\) invalid \(\2\(a\)\) invalid Reported-by: Simon Resch --- src/regex/regcomp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/regex/regcomp.c b/src/regex/regcomp.c index fb24556e..b4b81968 100644 --- a/src/regex/regcomp.c +++ b/src/regex/regcomp.c @@ -409,6 +409,8 @@ typedef struct { int position; /* The highest back reference or -1 if none seen so far. */ int max_backref; + /* Bit mask of submatch IDs that can be back referenced. */ + int backref_ok; /* Compilation flags. */ int cflags; } tre_parse_ctx_t; @@ -769,6 +771,8 @@ static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int sub node->submatch_id = subid; node->num_submatches++; ctx->n = node; + if (subid < 10) + ctx->backref_ok |= 1<backref_ok & 1<mem, BACKREF, val, ctx->position++); ctx->max_backref = MAX(val, ctx->max_backref); } else { -- 2.52.0