477 lines
18 KiB
Diff
477 lines
18 KiB
Diff
diff -up unzip60/match.c.recmatch unzip60/match.c
|
|
--- unzip60/match.c.recmatch 2005-08-14 13:00:36.000000000 -0400
|
|
+++ unzip60/match.c 2013-05-28 10:29:57.949077543 -0400
|
|
@@ -27,16 +27,14 @@
|
|
|
|
---------------------------------------------------------------------------
|
|
|
|
- Copyright on recmatch() from Zip's util.c (although recmatch() was almost
|
|
- certainly written by Mark Adler...ask me how I can tell :-) ):
|
|
+ Copyright on recmatch() from Zip's util.c
|
|
+ Copyright (c) 1990-2005 Info-ZIP. All rights reserved.
|
|
|
|
- Copyright (C) 1990-1992 Mark Adler, Richard B. Wales, Jean-loup Gailly,
|
|
- Kai Uwe Rommel and Igor Mandrichenko.
|
|
+ See the accompanying file LICENSE, version 2004-May-22 or later
|
|
+ for terms of use.
|
|
+ If, for some reason, both of these files are missing, the Info-ZIP license
|
|
+ also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
|
|
|
|
- Permission is granted to any individual or institution to use, copy,
|
|
- or redistribute this software so long as all of the original files are
|
|
- included unmodified, that it is not sold for profit, and that this copy-
|
|
- right notice is retained.
|
|
|
|
---------------------------------------------------------------------------
|
|
|
|
@@ -53,7 +51,7 @@
|
|
|
|
A set is composed of characters or ranges; a range looks like ``character
|
|
hyphen character'' (as in 0-9 or A-Z). [0-9a-zA-Z_] is the minimal set of
|
|
- characters allowed in the [..] pattern construct. Other characters are
|
|
+ characters ALlowed in the [..] pattern construct. Other characters are
|
|
allowed (i.e., 8-bit characters) if your system will support them.
|
|
|
|
To suppress the special syntactic significance of any of ``[]*?!^-\'', in-
|
|
@@ -101,8 +99,32 @@
|
|
# define WILDCHAR '?'
|
|
# define BEG_RANGE '['
|
|
# define END_RANGE ']'
|
|
+# define WILDCHR_SINGLE '?'
|
|
+# define DIRSEP_CHR '/'
|
|
+# define WILDCHR_MULTI '*'
|
|
#endif
|
|
|
|
+#ifdef WILD_STOP_AT_DIR
|
|
+ int wild_stop_at_dir = 1; /* default wildcards do not include / in matches */
|
|
+#else
|
|
+ int wild_stop_at_dir = 0; /* default wildcards do include / in matches */
|
|
+#endif
|
|
+
|
|
+
|
|
+
|
|
+/*
|
|
+ * case mapping functions. case_map is used to ignore case in comparisons,
|
|
+ * to_up is used to force upper case even on Unix (for dosify option).
|
|
+ */
|
|
+#ifdef USE_CASE_MAP
|
|
+# define case_map(c) upper[(c) & 0xff]
|
|
+# define to_up(c) upper[(c) & 0xff]
|
|
+#else
|
|
+# define case_map(c) (c)
|
|
+# define to_up(c) ((c) >= 'a' && (c) <= 'z' ? (c)-'a'+'A' : (c))
|
|
+#endif /* USE_CASE_MAP */
|
|
+
|
|
+
|
|
#if 0 /* GRR: add this to unzip.h someday... */
|
|
#if !(defined(MSDOS) && defined(DOSWILD))
|
|
#ifdef WILD_STOP_AT_DIR
|
|
@@ -114,8 +136,8 @@ int recmatch OF((ZCONST uch *pattern, ZC
|
|
int ignore_case __WDLPRO));
|
|
#endif
|
|
#endif /* 0 */
|
|
-static int recmatch OF((ZCONST uch *pattern, ZCONST uch *string,
|
|
- int ignore_case __WDLPRO));
|
|
+static int recmatch OF((ZCONST char *, ZCONST char *,
|
|
+ int));
|
|
static char *isshexp OF((ZCONST char *p));
|
|
static int namecmp OF((ZCONST char *s1, ZCONST char *s2));
|
|
|
|
@@ -154,192 +176,240 @@ int match(string, pattern, ignore_case _
|
|
}
|
|
dospattern[j-1] = '\0'; /* nuke the end "." */
|
|
}
|
|
- j = recmatch((uch *)dospattern, (uch *)string, ignore_case __WDL);
|
|
+ j = recmatch(dospattern, string, ignore_case);
|
|
free(dospattern);
|
|
return j == 1;
|
|
} else
|
|
#endif /* MSDOS && DOSWILD */
|
|
- return recmatch((uch *)pattern, (uch *)string, ignore_case __WDL) == 1;
|
|
+ return recmatch(pattern, string, ignore_case) == 1;
|
|
}
|
|
|
|
+#ifdef _MBCS
|
|
+
|
|
+char *___tmp_ptr;
|
|
|
|
+#endif
|
|
|
|
-static int recmatch(p, s, ic __WDL)
|
|
- ZCONST uch *p; /* sh pattern to match */
|
|
- ZCONST uch *s; /* string to which to match it */
|
|
- int ic; /* true for case insensitivity */
|
|
- __WDLDEF /* directory sepchar for WildStopAtDir mode, or 0 */
|
|
+static int recmatch(p, s, cs)
|
|
+ZCONST char *p; /* sh pattern to match */
|
|
+ZCONST char *s; /* string to match it to */
|
|
+int cs; /* flag: force case-sensitive matching */
|
|
/* Recursively compare the sh pattern p with the string s and return 1 if
|
|
- * they match, and 0 or 2 if they don't or if there is a syntax error in the
|
|
- * pattern. This routine recurses on itself no more deeply than the number
|
|
- * of characters in the pattern. */
|
|
+ they match, and 0 or 2 if they don't or if there is a syntax error in the
|
|
+ pattern. This routine recurses on itself no deeper than the number of
|
|
+ characters in the pattern. */
|
|
{
|
|
- unsigned int c; /* pattern char or start of range in [-] loop */
|
|
+ int c; /* pattern char or start of range in [-] loop */
|
|
+ /* Get first character, the pattern for new recmatch calls follows */
|
|
+ /* borrowed from Zip's global.c */
|
|
+ int no_wild = 0;
|
|
+ int allow_regex=1;
|
|
+ /* This fix provided by akt@m5.dion.ne.jp for Japanese.
|
|
+ See 21 July 2006 mail.
|
|
+ It only applies when p is pointing to a doublebyte character and
|
|
+ things like / and wildcards are not doublebyte. This probably
|
|
+ should not be needed. */
|
|
|
|
- /* Get first character, the pattern for new recmatch calls follows */
|
|
- c = *p; INCSTR(p);
|
|
+#ifdef _MBCS
|
|
+ if (CLEN(p) == 2) {
|
|
+ if (CLEN(s) == 2) {
|
|
+ return (*p == *s && *(p+1) == *(s+1)) ?
|
|
+ recmatch(p + 2, s + 2, cs) : 0;
|
|
+ } else {
|
|
+ return 0;
|
|
+ }
|
|
+ }
|
|
+#endif /* ?_MBCS */
|
|
|
|
- /* If that was the end of the pattern, match if string empty too */
|
|
- if (c == 0)
|
|
- return *s == 0;
|
|
+ c = *POSTINCSTR(p);
|
|
|
|
- /* '?' (or '%') matches any character (but not an empty string). */
|
|
- if (c == WILDCHAR)
|
|
-#ifdef WILD_STOP_AT_DIR
|
|
- /* If uO.W_flag is non-zero, it won't match '/' */
|
|
- return (*s && (!sepc || *s != (uch)sepc))
|
|
- ? recmatch(p, s + CLEN(s), ic, sepc) : 0;
|
|
-#else
|
|
- return *s ? recmatch(p, s + CLEN(s), ic) : 0;
|
|
-#endif
|
|
+ /* If that was the end of the pattern, match if string empty too */
|
|
+ if (c == 0)
|
|
+ return *s == 0;
|
|
+
|
|
+ /* '?' (or '%' or '#') matches any character (but not an empty string) */
|
|
+ if (c == WILDCHR_SINGLE) {
|
|
+ if (wild_stop_at_dir)
|
|
+ return (*s && *s != DIRSEP_CHR) ? recmatch(p, s + CLEN(s), cs) : 0;
|
|
+ else
|
|
+ return *s ? recmatch(p, s + CLEN(s), cs) : 0;
|
|
+ }
|
|
|
|
- /* '*' matches any number of characters, including zero */
|
|
+ /* WILDCHR_MULTI ('*') matches any number of characters, including zero */
|
|
#ifdef AMIGA
|
|
- if (c == '#' && *p == '?') /* "#?" is Amiga-ese for "*" */
|
|
- c = '*', p++;
|
|
+ if (!no_wild && c == '#' && *p == '?') /* "#?" is Amiga-ese for "*" */
|
|
+ c = WILDCHR_MULTI, p++;
|
|
#endif /* AMIGA */
|
|
- if (c == '*') {
|
|
-#ifdef WILD_STOP_AT_DIR
|
|
- if (sepc) {
|
|
- /* check for single "*" or double "**" */
|
|
-# ifdef AMIGA
|
|
- if ((c = p[0]) == '#' && p[1] == '?') /* "#?" is Amiga-ese for "*" */
|
|
- c = '*', p++;
|
|
- if (c != '*') {
|
|
-# else /* !AMIGA */
|
|
- if (*p != '*') {
|
|
-# endif /* ?AMIGA */
|
|
- /* single "*": this doesn't match the dirsep character */
|
|
- for (; *s && *s != (uch)sepc; INCSTR(s))
|
|
- if ((c = recmatch(p, s, ic, sepc)) != 0)
|
|
- return (int)c;
|
|
- /* end of pattern: matched if at end of string, else continue */
|
|
- if (*p == '\0')
|
|
- return (*s == 0);
|
|
- /* continue to match if at sepc in pattern, else give up */
|
|
- return (*p == (uch)sepc || (*p == '\\' && p[1] == (uch)sepc))
|
|
- ? recmatch(p, s, ic, sepc) : 2;
|
|
- }
|
|
- /* "**": this matches slashes */
|
|
- ++p; /* move p behind the second '*' */
|
|
- /* and continue with the non-W_flag code variant */
|
|
- }
|
|
-#endif /* WILD_STOP_AT_DIR */
|
|
+ if (!no_wild && c == WILDCHR_MULTI)
|
|
+ {
|
|
+ if (wild_stop_at_dir) {
|
|
+ /* Check for an immediately following WILDCHR_MULTI */
|
|
+# ifdef AMIGA
|
|
+ if ((c = p[0]) == '#' && p[1] == '?') /* "#?" is Amiga-ese for "*" */
|
|
+ c = WILDCHR_MULTI, p++;
|
|
+ if (c != WILDCHR_MULTI) {
|
|
+# else /* !AMIGA */
|
|
+ if (*p != WILDCHR_MULTI) {
|
|
+# endif /* ?AMIGA */
|
|
+ /* Single WILDCHR_MULTI ('*'): this doesn't match slashes */
|
|
+ for (; *s && *s != DIRSEP_CHR; INCSTR(s))
|
|
+ if ((c = recmatch(p, s, cs)) != 0)
|
|
+ return c;
|
|
+ /* end of pattern: matched if at end of string, else continue */
|
|
if (*p == 0)
|
|
- return 1;
|
|
- if (isshexp((ZCONST char *)p) == NULL) {
|
|
- /* Optimization for rest of pattern being a literal string:
|
|
- * If there are no other shell expression chars in the rest
|
|
- * of the pattern behind the multi-char wildcard, then just
|
|
- * compare the literal string tail.
|
|
- */
|
|
- ZCONST uch *srest;
|
|
-
|
|
- srest = s + (strlen((ZCONST char *)s) - strlen((ZCONST char *)p));
|
|
- if (srest - s < 0)
|
|
- /* remaining literal string from pattern is longer than rest
|
|
- * of test string, there can't be a match
|
|
- */
|
|
- return 0;
|
|
- else
|
|
- /* compare the remaining literal pattern string with the last
|
|
- * bytes of the test string to check for a match
|
|
- */
|
|
+ return (*s == 0);
|
|
+ /* continue to match if at DIRSEP_CHR in pattern, else give up */
|
|
+ return (*p == DIRSEP_CHR || (*p == '\\' && p[1] == DIRSEP_CHR))
|
|
+ ? recmatch(p, s, cs) : 2;
|
|
+ }
|
|
+ /* Two consecutive WILDCHR_MULTI ("**"): this matches DIRSEP_CHR ('/') */
|
|
+ p++; /* move p past the second WILDCHR_MULTI */
|
|
+ /* continue with the normal non-WILD_STOP_AT_DIR code */
|
|
+ } /* wild_stop_at_dir */
|
|
+
|
|
+ /* Not wild_stop_at_dir */
|
|
+ if (*p == 0)
|
|
+ return 1;
|
|
+ if (!isshexp((char *)p))
|
|
+ {
|
|
+ /* optimization for rest of pattern being a literal string */
|
|
+
|
|
+ /* optimization to handle patterns like *.txt */
|
|
+ /* if the first char in the pattern is '*' and there */
|
|
+ /* are no other shell expression chars, i.e. a literal string */
|
|
+ /* then just compare the literal string at the end */
|
|
+
|
|
+ ZCONST char *srest;
|
|
+
|
|
+ srest = s + (strlen(s) - strlen(p));
|
|
+ if (srest - s < 0)
|
|
+ /* remaining literal string from pattern is longer than rest of
|
|
+ test string, there can't be a match
|
|
+ */
|
|
+ return 0;
|
|
+ else
|
|
+ /* compare the remaining literal pattern string with the last bytes
|
|
+ of the test string to check for a match */
|
|
#ifdef _MBCS
|
|
- {
|
|
- ZCONST uch *q = s;
|
|
+ {
|
|
+ ZCONST char *q = s;
|
|
|
|
- /* MBCS-aware code must not scan backwards into a string from
|
|
- * the end.
|
|
- * So, we have to move forward by character from our well-known
|
|
- * character position s in the test string until we have
|
|
- * advanced to the srest position.
|
|
- */
|
|
- while (q < srest)
|
|
- INCSTR(q);
|
|
- /* In case the byte *srest is a trailing byte of a multibyte
|
|
- * character in the test string s, we have actually advanced
|
|
- * past the position (srest).
|
|
- * For this case, the match has failed!
|
|
- */
|
|
- if (q != srest)
|
|
- return 0;
|
|
- return ((ic
|
|
- ? namecmp((ZCONST char *)p, (ZCONST char *)q)
|
|
- : strcmp((ZCONST char *)p, (ZCONST char *)q)
|
|
- ) == 0);
|
|
- }
|
|
+ /* MBCS-aware code must not scan backwards into a string from
|
|
+ * the end.
|
|
+ * So, we have to move forward by character from our well-known
|
|
+ * character position s in the test string until we have advanced
|
|
+ * to the srest position.
|
|
+ */
|
|
+ while (q < srest)
|
|
+ INCSTR(q);
|
|
+ /* In case the byte *srest is a trailing byte of a multibyte
|
|
+ * character, we have actually advanced past the position (srest).
|
|
+ * For this case, the match has failed!
|
|
+ */
|
|
+ if (q != srest)
|
|
+ return 0;
|
|
+ return ((cs ? strcmp(p, q) : namecmp(p, q)) == 0);
|
|
+ }
|
|
#else /* !_MBCS */
|
|
- return ((ic
|
|
- ? namecmp((ZCONST char *)p, (ZCONST char *)srest)
|
|
- : strcmp((ZCONST char *)p, (ZCONST char *)srest)
|
|
- ) == 0);
|
|
+ return ((cs ? strcmp(p, srest) : namecmp(p, srest)) == 0);
|
|
#endif /* ?_MBCS */
|
|
- } else {
|
|
- /* pattern contains more wildcards, continue with recursion... */
|
|
- for (; *s; INCSTR(s))
|
|
- if ((c = recmatch(p, s, ic __WDL)) != 0)
|
|
- return (int)c;
|
|
- return 2; /* 2 means give up--match will return false */
|
|
- }
|
|
}
|
|
-
|
|
- /* Parse and process the list of characters and ranges in brackets */
|
|
- if (c == BEG_RANGE) {
|
|
- int e; /* flag true if next char to be taken literally */
|
|
- ZCONST uch *q; /* pointer to end of [-] group */
|
|
- int r; /* flag true to match anything but the range */
|
|
-
|
|
- if (*s == 0) /* need a character to match */
|
|
- return 0;
|
|
- p += (r = (*p == '!' || *p == '^')); /* see if reverse */
|
|
- for (q = p, e = 0; *q; INCSTR(q)) /* find closing bracket */
|
|
- if (e)
|
|
- e = 0;
|
|
- else
|
|
- if (*q == '\\') /* GRR: change to ^ for MS-DOS, OS/2? */
|
|
- e = 1;
|
|
- else if (*q == END_RANGE)
|
|
- break;
|
|
- if (*q != END_RANGE) /* nothing matches if bad syntax */
|
|
- return 0;
|
|
- for (c = 0, e = (*p == '-'); p < q; INCSTR(p)) {
|
|
- /* go through the list */
|
|
- if (!e && *p == '\\') /* set escape flag if \ */
|
|
- e = 1;
|
|
- else if (!e && *p == '-') /* set start of range if - */
|
|
- c = *(p-1);
|
|
- else {
|
|
- unsigned int cc = Case(*s);
|
|
-
|
|
- if (*(p+1) != '-')
|
|
- for (c = c ? c : *p; c <= *p; c++) /* compare range */
|
|
- if ((unsigned)Case(c) == cc) /* typecast for MSC bug */
|
|
- return r ? 0 : recmatch(q + 1, s + 1, ic __WDL);
|
|
- c = e = 0; /* clear range, escape flags */
|
|
- }
|
|
- }
|
|
- return r ? recmatch(q + CLEN(q), s + CLEN(s), ic __WDL) : 0;
|
|
- /* bracket match failed */
|
|
+ else
|
|
+ {
|
|
+ /* pattern contains more wildcards, continue with recursion... */
|
|
+ for (; *s; INCSTR(s))
|
|
+ if ((c = recmatch(p, s, cs)) != 0)
|
|
+ return c;
|
|
+ return 2; /* 2 means give up--shmatch will return false */
|
|
}
|
|
+ }
|
|
|
|
- /* if escape ('\\'), just compare next character */
|
|
- if (c == '\\' && (c = *p++) == 0) /* if \ at end, then syntax error */
|
|
- return 0;
|
|
+#ifndef VMS /* No bracket matching in VMS */
|
|
+ /* Parse and process the list of characters and ranges in brackets */
|
|
+ if (!no_wild && allow_regex && c == '[')
|
|
+ {
|
|
+ int e; /* flag true if next char to be taken literally */
|
|
+ ZCONST char *q; /* pointer to end of [-] group */
|
|
+ int r; /* flag true to match anything but the range */
|
|
+
|
|
+ if (*s == 0) /* need a character to match */
|
|
+ return 0;
|
|
+ p += (r = (*p == '!' || *p == '^')); /* see if reverse */
|
|
+ for (q = p, e = 0; *q; q++) /* find closing bracket */
|
|
+ if (e)
|
|
+ e = 0;
|
|
+ else
|
|
+ if (*q == '\\')
|
|
+ e = 1;
|
|
+ else if (*q == ']')
|
|
+ break;
|
|
+ if (*q != ']') /* nothing matches if bad syntax */
|
|
+ return 0;
|
|
+ for (c = 0, e = *p == '-'; p < q; p++) /* go through the list */
|
|
+ {
|
|
+ if (e == 0 && *p == '\\') /* set escape flag if \ */
|
|
+ e = 1;
|
|
+ else if (e == 0 && *p == '-') /* set start of range if - */
|
|
+ c = *(p-1);
|
|
+ else
|
|
+ {
|
|
+ uch cc = (cs ? (uch)*s : case_map((uch)*s));
|
|
+ uch uc = (uch) c;
|
|
+ if (*(p+1) != '-')
|
|
+ for (uc = uc ? uc : (uch)*p; uc <= (uch)*p; uc++)
|
|
+ /* compare range */
|
|
+ if ((cs ? uc : case_map(uc)) == cc)
|
|
+ return r ? 0 : recmatch(q + CLEN(q), s + CLEN(s), cs);
|
|
+ c = e = 0; /* clear range, escape flags */
|
|
+ }
|
|
+ }
|
|
+ return r ? recmatch(q + CLEN(q), s + CLEN(s), cs) : 0;
|
|
+ /* bracket match failed */
|
|
+ }
|
|
+#endif /* !VMS */
|
|
|
|
- /* just a character--compare it */
|
|
-#ifdef QDOS
|
|
- return QMatch(Case((uch)c), Case(*s)) ?
|
|
- recmatch(p, s + CLEN(s), ic __WDL) : 0;
|
|
-#else
|
|
- return Case((uch)c) == Case(*s) ?
|
|
- recmatch(p, s + CLEN(s), ic __WDL) : 0;
|
|
-#endif
|
|
+ /* If escape ('\'), just compare next character */
|
|
+ if (!no_wild && c == '\\')
|
|
+ if ((c = *p++) == '\0') /* if \ at end, then syntax error */
|
|
+ return 0;
|
|
+
|
|
+#ifdef VMS
|
|
+ /* 2005-11-06 SMS.
|
|
+ Handle "..." wildcard in p with "." or "]" in s.
|
|
+ */
|
|
+ if ((c == '.') && (*p == '.') && (*(p+ CLEN( p)) == '.') &&
|
|
+ ((*s == '.') || (*s == ']')))
|
|
+ {
|
|
+ /* Match "...]" with "]". Continue after "]" in both. */
|
|
+ if ((*(p+ 2* CLEN( p)) == ']') && (*s == ']'))
|
|
+ return recmatch( (p+ 3* CLEN( p)), (s+ CLEN( s)), cs);
|
|
+
|
|
+ /* Else, look for a reduced match in s, until "]" in or end of s. */
|
|
+ for (; *s && (*s != ']'); INCSTR(s))
|
|
+ if (*s == '.')
|
|
+ /* If reduced match, then continue after "..." in p, "." in s. */
|
|
+ if ((c = recmatch( (p+ CLEN( p)), s, cs)) != 0)
|
|
+ return (int)c;
|
|
+
|
|
+ /* Match "...]" with "]". Continue after "]" in both. */
|
|
+ if ((*(p+ 2* CLEN( p)) == ']') && (*s == ']'))
|
|
+ return recmatch( (p+ 3* CLEN( p)), (s+ CLEN( s)), cs);
|
|
+
|
|
+ /* No reduced match. Quit. */
|
|
+ return 2;
|
|
+ }
|
|
+
|
|
+#endif /* def VMS */
|
|
+
|
|
+ /* Just a character--compare it */
|
|
+ return (cs ? c == *s : case_map((uch)c) == case_map((uch)*s)) ?
|
|
+ recmatch(p, s + CLEN(s), cs) : 0;
|
|
+}
|
|
|
|
-} /* end function recmatch() */
|
|
|
|
|
|
|
|
+/*************************************************************************************************/
|
|
static char *isshexp(p)
|
|
ZCONST char *p;
|
|
/* If p is a sh expression, a pointer to the first special character is
|