commit 1263ab60d5bf5612d273b80e87a430b9e0a75afd
parent da63ba04988e68bca1395eff60cc362ea14e3c34
Author: Bert Münnich <ber.t@posteo.de>
Date: Fri, 5 May 2017 06:43:07 +0200
Glob matching in linear time
As described by Russ Cox: https://research.swtch.com/glob
The exponential complexity of the old implementation was not a real problem,
though, as no glob triggering excessive backtracking has ever been spotted in
the wild.
Diffstat:
M | match.c | | | 74 | +++++++++++++++++++++++++++++++++++++------------------------------------- |
1 file changed, 37 insertions(+), 37 deletions(-)
diff --git a/match.c b/match.c
@@ -4,56 +4,56 @@
static int rangematch(char *, char);
-enum { RANGE_FAIL = -1, RANGE_ERROR = -2 };
-
/* match() matches a single pattern against a single string. */
extern bool match(char *p, char *m, char *s) {
- int i, j;
+ struct { char *p, *m, *s; } next;
if (m == NULL)
return streq(p, s);
- i = 0;
- while (1) {
- if (p[i] == '\0')
- return *s == '\0';
- else if (m[i]) {
- switch (p[i++]) {
+ next.s = NULL;
+ while (*p || *s) {
+ if (*p) {
+ if (!*m) {
+ /* ordinary character */
+ if (*s == *p) {
+ p++, m++, s++;
+ continue;
+ }
+ } else switch (*p) {
case '?':
- if (*s++ == '\0')
- return FALSE;
+ if (*s) {
+ p++, m++, s++;
+ continue;
+ }
break;
- case '*':
- while (p[i] == '*' && m[i] == 1) /* collapse multiple stars */
- i++;
- if (p[i] == '\0') /* star at end of pattern? */
- return TRUE;
- while (*s != '\0')
- if (match(p + i, m + i, s++))
- return TRUE;
- return FALSE;
- case '[':
- if (*s == '\0')
- return FALSE;
- switch (j = rangematch(p + i, *s)) {
- default:
- i += j;
- break;
- case RANGE_FAIL:
- return FALSE;
- case RANGE_ERROR:
- if (*s != '[')
- return FALSE;
+ case '[': {
+ int r = 1 + rangematch(p+1, *s);
+ if (r > 0) {
+ p += r, m += r, s++;
+ continue;
}
- s++;
break;
+ }
+ case '*':
+ next.p = p++;
+ next.m = m++;
+ next.s = *s ? s+1 : NULL;
+ continue;
default:
panic("bad metacharacter in match");
/* NOTREACHED */
return FALSE; /* hush up gcc -Wall */
}
- } else if (p[i++] != *s++)
- return FALSE;
+ }
+ if (next.s != NULL) {
+ p = next.p;
+ m = next.m;
+ s = next.s;
+ continue;
+ }
+ return FALSE;
}
+ return TRUE;
}
/*
@@ -83,7 +83,7 @@ static int rangematch(char *p, char c) {
}
for (; *p != ']'; p++) {
if (*p == '\0')
- return RANGE_ERROR; /* bad syntax */
+ return c == '[' ? 0 : -1; /* no right-bracket */
if (p[1] == '-' && p[2] != ']') { /* check for [..-..] but ignore [..-] */
if (c >= *p)
matched |= (c <= p[2]);
@@ -95,5 +95,5 @@ static int rangematch(char *p, char c) {
if (matched ^ neg)
return p - orig + 1; /* skip the right-bracket */
else
- return RANGE_FAIL;
+ return -1;
}