[openssh-commits] [openssh] 01/08: upstream: Replace the old recursive match_pattern() with an

git+noreply at mindrot.org git+noreply at mindrot.org
Sun May 31 15:04:04 AEST 2026


This is an automated email from the git hooks/post-receive script.

djm pushed a commit to branch master
in repository openssh.

commit 9d4c0b31f172782def72ccc2fb2dc217d3135e6f
Author: djm at openbsd.org <djm at openbsd.org>
AuthorDate: Sun May 31 04:19:16 2026 +0000

    upstream: Replace the old recursive match_pattern() with an
    
    implementation that uses a NFA for matching. This avoids the exponential
    worst- case behaviour for the old implementation.
    
    ok markus@
    
    OpenBSD-Commit-ID: fc6b75a52f4c0acb52b7900658c8d25ff873cbae
---
 match.c | 141 +++++++++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 90 insertions(+), 51 deletions(-)

diff --git a/match.c b/match.c
index 3ef536931..b8f350df1 100644
--- a/match.c
+++ b/match.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: match.c,v 1.45 2024/09/06 02:30:44 djm Exp $ */
+/* $OpenBSD: match.c,v 1.46 2026/05/31 04:19:16 djm Exp $ */
 /*
  * Author: Tatu Ylonen <ylo at cs.hut.fi>
  * Copyright (c) 1995 Tatu Ylonen <ylo at cs.hut.fi>, Espoo, Finland
@@ -13,6 +13,7 @@
  */
 /*
  * Copyright (c) 2000 Markus Friedl.  All rights reserved.
+ * Copyright (c) 2026 Damien Miller.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -49,67 +50,105 @@
 #include "match.h"
 #include "misc.h"
 
+/*
+ * Computes the epsilon closure of an NFA set.
+ * In our wildcard grammar, epsilon transitions only exist for '*' wildcards,
+ * allowing us to transition from state i to i+1 without consuming input.
+ *
+ * This function modifies 'states' in place.
+ */
+static void
+epsilon_closure(char *states, const char *pattern, size_t M)
+{
+	size_t i;
+
+	/* only need a forward pass as there are no back jumps in our grammar */
+	for (i = 0; i < M; i++) {
+		if (!states[i] || pattern[i] != '*')
+			continue;
+		/*
+		 * State i is active, and pattern[i] is '*', so we can
+		 * epsilon-transition to i+1.
+		 */
+		states[i + 1] = 1;
+	}
+}
+
 /*
  * Returns true if the given string matches the pattern (which may contain ?
- * and * as wildcards), and zero if it does not match.
+ * and * as wildcards), and zero if it does not match. Uses an NFA internally.
  */
 int
 match_pattern(const char *s, const char *pattern)
 {
-	for (;;) {
-		/* If at end of pattern, accept if also at end of string. */
-		if (!*pattern)
-			return !*s;
-
-		if (*pattern == '*') {
-			/* Skip this and any consecutive asterisks. */
-			while (*pattern == '*')
-				pattern++;
-
-			/* If at end of pattern, accept immediately. */
-			if (!*pattern)
-				return 1;
-
-			/* If next character in pattern is known, optimize. */
-			if (*pattern != '?' && *pattern != '*') {
+	size_t M;
+	size_t i;
+	char *states, *next_states, *tmp;
+	int active, matched = 0;
+
+	/* trivial case: empty pattern vs empty input */
+	if ((M = strlen(pattern)) == 0)
+		return *s == '\0';
+
+	/* A state for each pattern character, plus one final accepting state */
+	states = xcalloc(M + 1, sizeof(*states));
+	next_states = xcalloc(M + 1, sizeof(*next_states));
+
+	/* Initial state: state 0 is active */
+	states[0] = 1;
+	/* Other states might be reachable now if the pattern starts with '*' */
+	epsilon_closure(states, pattern, M);
+
+	for (; *s; s++) {
+		memset(next_states, 0, M + 1);
+
+		/* Calculate the reachable next states given the input char */
+		for (i = 0; i < M; i++) {
+			if (!states[i])
+				continue;
+			if (pattern[i] == '*') {
+				/*
+				 * '*' matches any character, so we can
+				 * stay in state i
+				 */
+				next_states[i] = 1;
+			} else if (pattern[i] == '?' || pattern[i] == *s) {
 				/*
-				 * Look instances of the next character in
-				 * pattern, and try to match starting from
-				 * those.
+				 * '?' matches any character, or we have
+				 * a literal match.
 				 */
-				for (; *s; s++)
-					if (*s == *pattern &&
-					    match_pattern(s + 1, pattern + 1))
-						return 1;
-				/* Failed. */
-				return 0;
+				next_states[i + 1] = 1;
+			}
+		}
+
+		/* Expand the reachable next states with epsilon transitions */
+		epsilon_closure(next_states, pattern, M);
+
+		/* Swap states and next_states */
+		tmp = states;
+		states = next_states;
+		next_states = tmp;
+
+		/* Check if we have any active pattern states left */
+		active = 0;
+		for (i = 0; i <= M; i++) {
+			if (states[i]) {
+				active = 1;
+				break;
 			}
-			/*
-			 * Move ahead one character at a time and try to
-			 * match at each position.
-			 */
-			for (; *s; s++)
-				if (match_pattern(s, pattern))
-					return 1;
-			/* Failed. */
-			return 0;
 		}
-		/*
-		 * There must be at least one more character in the string.
-		 * If we are at the end, fail.
-		 */
-		if (!*s)
-			return 0;
-
-		/* Check if the next character of the string is acceptable. */
-		if (*pattern != '?' && *pattern != *s)
-			return 0;
-
-		/* Move to the next character, both in string and in pattern. */
-		s++;
-		pattern++;
+		if (!active)
+			goto out; /* No active states, fail early */
 	}
-	/* NOTREACHED */
+	/*
+	 * We matched only if we ended up in the final, accepting state
+	 * after consuming all the input.
+	 */
+	matched = states[M];
+ out:
+	free(states);
+	free(next_states);
+	return matched;
 }
 
 /*

-- 
To stop receiving notification emails like this one, please contact
djm at mindrot.org.


More information about the openssh-commits mailing list