⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fnmatch.c

📁 UNIX下SH的实现源码
💻 C
📖 第 1 页 / 共 2 页
字号:
/* fnmatch.c -- ksh-like extended pattern matching for the shell and filename
		globbing. */

/* Copyright (C) 1991, 1997 Free Software Foundation, Inc.

   This file is part of GNU Bash, the Bourne Again SHell.
   
   Bash is free software; you can redistribute it and/or modify it under
   the terms of the GNU General Public License as published by the Free
   Software Foundation; either version 2, or (at your option) any later
   version.
              
   Bash is distributed in the hope that it will be useful, but WITHOUT ANY
   WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   for more details.
                         
   You should have received a copy of the GNU General Public License along
   with Bash; see the file COPYING.  If not, write to the Free Software
   Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */

#include <config.h>

#include <stdio.h>	/* for debugging */
                                
#include "fnmatch.h"
#include "collsyms.h"
#include <ctype.h>

#if defined (HAVE_STRING_H)
#  include <string.h>
#else
#  include <strings.h>
#endif /* HAVE_STRING_H */

static int gmatch ();
static char *brackmatch ();
#ifdef EXTENDED_GLOB
static int extmatch ();
static char *patscan ();
#endif
  
#if !defined (isascii)
#  define isascii(c)	((unsigned int)(c) <= 0177)
#endif

/* Note that these evaluate C many times.  */

#ifndef isblank
#  define isblank(c)	((c) == ' ' || (c) == '\t')
#endif

#ifndef isgraph
#  define isgraph(c)	((c) != ' ' && isprint((c)))
#endif

#ifndef isxdigit
#  define isxdigit(c)	(((c) >= '0' && (c) <= '9') || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
#endif

/* The result of FOLD is an `unsigned char' */
# define FOLD(c) ((flags & FNM_CASEFOLD) && isupper ((unsigned char)c) \
	? tolower ((unsigned char)c) \
	: ((unsigned char)c))

#ifndef STREQ
#define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
#define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
#endif

/* We don't use strcoll(3) for range comparisons in bracket expressions,
   even if we have it, since it can have unwanted side effects in locales
   other than POSIX or US.  For instance, in the de locale, [A-Z] matches
   all characters.  So, for ranges we use ASCII collation, and for
   collating symbol equivalence we use strcoll().  The casts to int are
   to handle tests that use unsigned chars. */

#define rangecmp(c1, c2)	((int)(c1) - (int)(c2))

#if defined (HAVE_STRCOLL)
/* Helper function for collating symbol equivalence. */
static int rangecmp2 (c1, c2)
     int c1, c2;
{
  static char s1[2] = { ' ', '\0' };
  static char s2[2] = { ' ', '\0' };
  int ret;

  /* Eight bits only.  Period. */
  c1 &= 0xFF;
  c2 &= 0xFF;

  if (c1 == c2)
    return (0);

  s1[0] = c1;
  s2[0] = c2;

  if ((ret = strcoll (s1, s2)) != 0)
    return ret;
  return (c1 - c2);
}
#else /* !HAVE_STRCOLL */
#  define rangecmp2(c1, c2)	((int)(c1) - (int)(c2))
#endif /* !HAVE_STRCOLL */

#if defined (HAVE_STRCOLL)
static int collequiv (c1, c2)
     int c1, c2;
{
  return (rangecmp2 (c1, c2) == 0);
}
#else
#  define collequiv(c1, c2)	((c1) == (c2))
#endif

static int
collsym (s, len)
     char *s;
     int len;
{
  register struct _collsym *csp;

  for (csp = posix_collsyms; csp->name; csp++)
    {
      if (STREQN(csp->name, s, len) && csp->name[len] == '\0')
        return (csp->code);
    }
  if (len == 1)
    return s[0];
  return -1;
}

int
fnmatch (pattern, string, flags)
     char *pattern;
     char *string;
     int flags;
{
  char *se, *pe;

  if (string == 0 || pattern == 0)
    return FNM_NOMATCH;

  se = string + strlen (string);
  pe = pattern + strlen (pattern);

  return (gmatch (string, se, pattern, pe, flags));
}

/* Match STRING against the filename pattern PATTERN, returning zero if
   it matches, FNM_NOMATCH if not.  */
static int
gmatch (string, se, pattern, pe, flags)
     char *string, *se;
     char *pattern, *pe;
     int flags;
{
  register char *p, *n;		/* pattern, string */
  register char c;		/* current pattern character */
  register char sc;		/* current string character */

  p = pattern;
  n = string;

  if (string == 0 || pattern == 0)
    return FNM_NOMATCH;

  while (p < pe)
    {
      c = *p++;
      c = FOLD (c);

      sc = n < se ? *n : '\0';

#ifdef EXTENDED_GLOB
      /* extmatch () will handle recursively calling gmatch, so we can
	 just return what extmatch() returns. */
      if ((flags & FNM_EXTMATCH) && *p == '(' &&
	  (c == '+' || c == '*' || c == '?' || c == '@' || c == '!')) /* ) */
	{
	  int lflags;
	  /* If we're not matching the start of the string, we're not
	     concerned about the special cases for matching `.' */
	  lflags = (n == string) ? flags : (flags & ~FNM_PERIOD);
	  return (extmatch (c, n, se, p, pe, lflags));
	}
#endif

      switch (c)
	{
	case '?':		/* Match single character */
	  if (sc == '\0')
	    return FNM_NOMATCH;
	  else if ((flags & FNM_PATHNAME) && sc == '/')
	    /* If we are matching a pathname, `?' can never match a `/'. */
	    return FNM_NOMATCH;
	  else if ((flags & FNM_PERIOD) && sc == '.' &&
		   (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
	    /* `?' cannot match a `.' if it is the first character of the
	       string or if it is the first character following a slash and
	       we are matching a pathname. */
	    return FNM_NOMATCH;
	  break;

	case '\\':		/* backslash escape removes special meaning */
	  if (p == pe)
	    return FNM_NOMATCH;

	  if ((flags & FNM_NOESCAPE) == 0)
	    {
	      c = *p++;
	      /* A trailing `\' cannot match. */
	      if (p > pe)
		return FNM_NOMATCH;
	      c = FOLD (c);
	    }
	  if (FOLD (sc) != (unsigned char)c)
	    return FNM_NOMATCH;
	  break;

	case '*':		/* Match zero or more characters */
	  if (p == pe)
	    return 0;
	  
	  if ((flags & FNM_PERIOD) && sc == '.' &&
	      (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
	    /* `*' cannot match a `.' if it is the first character of the
	       string or if it is the first character following a slash and
	       we are matching a pathname. */
	    return FNM_NOMATCH;

	  /* Collapse multiple consecutive, `*' and `?', but make sure that
	     one character of the string is consumed for each `?'. */
	  for (c = *p++; (c == '?' || c == '*'); c = *p++)
	    {
	      if ((flags & FNM_PATHNAME) && sc == '/')
		/* A slash does not match a wildcard under FNM_PATHNAME. */
		return FNM_NOMATCH;
	      else if (c == '?')
		{
		  if (sc == '\0')
		    return FNM_NOMATCH;
		  /* One character of the string is consumed in matching
		     this ? wildcard, so *??? won't match if there are
		     fewer than three characters. */
		  n++;
		  sc = n < se ? *n : '\0';
		}

#ifdef EXTENDED_GLOB
	      /* Handle ******(patlist) */
	      if ((flags & FNM_EXTMATCH) && c == '*' && *p == '(')  /*)*/
		{
		  char *newn;
		  /* We need to check whether or not the extended glob
		     pattern matches the remainder of the string.
		     If it does, we match the entire pattern. */
		  for (newn = n; newn < se; ++newn)
		    {
		      if (extmatch (c, newn, se, p, pe, flags) == 0)
			return (0);
		    }
		  /* We didn't match the extended glob pattern, but
		     that's OK, since we can match 0 or more occurrences.
		     We need to skip the glob pattern and see if we
		     match the rest of the string. */
		  newn = patscan (p + 1, pe, 0);
		  p = newn;
		}
#endif
	      if (p == pe)
	        break;
	    }

	  /* If we've hit the end of the pattern and the last character of
	     the pattern was handled by the loop above, we've succeeded.
	     Otherwise, we need to match that last character. */
	  if (p == pe && (c == '?' || c == '*'))
	    return (0);

	  /* General case, use recursion. */
	  {
	    unsigned char c1;

	    c1 = (unsigned char)((flags & FNM_NOESCAPE) == 0 && c == '\\') ? *p : c;
	    c1 = FOLD (c1);
	    for (--p; n < se; ++n)
	      {
		/* Only call fnmatch if the first character indicates a
		   possible match.  We can check the first character if
		   we're not doing an extended glob match. */
		if ((flags & FNM_EXTMATCH) == 0 && c != '[' && FOLD (*n) != c1)
		  continue;

		/* If we're doing an extended glob match and the pattern is not
		   one of the extended glob patterns, we can check the first
		   character. */
		if ((flags & FNM_EXTMATCH) && p[1] != '(' && /*)*/
		    strchr ("?*+@!", *p) == 0 && c != '[' && FOLD (*n) != c1)
		  continue;

		/* Otherwise, we just recurse. */
		if (gmatch (n, se, p, pe, flags & ~FNM_PERIOD) == 0)
		  return (0);
	      }
	    return FNM_NOMATCH;
	  }

	case '[':
	  {
	    if (sc == '\0' || n == se)
	      return FNM_NOMATCH;

	    /* A character class cannot match a `.' if it is the first
	       character of the string or if it is the first character
	       following a slash and we are matching a pathname. */
	    if ((flags & FNM_PERIOD) && sc == '.' &&
		(n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
	      return (FNM_NOMATCH);

	    p = brackmatch (p, sc, flags);
	    if (p == 0)
	      return FNM_NOMATCH;
	  }
	  break;

	default:
	  if ((unsigned char)c != FOLD (sc))
	    return (FNM_NOMATCH);
	}

      ++n;
    }

  if (n == se)
    return (0);

  if ((flags & FNM_LEADING_DIR) && *n == '/')
    /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
    return 0;
          
  return (FNM_NOMATCH);
}

/* Parse a bracket expression collating symbol ([.sym.]) starting at P, find
   the value of the symbol, and move P past the collating symbol expression.
   The value is returned in *VP, if VP is not null. */
static char *
parse_collsym (p, vp)
     char *p;
     int *vp;
{
  register int pc;
  int val;

  p++;				/* move past the `.' */
          
  for (pc = 0; p[pc]; pc++)
    if (p[pc] == '.' && p[pc+1] == ']')
      break;
   val = collsym (p, pc);
   if (vp)
     *vp = val;
   return (p + pc + 2);
}

static char *
brackmatch (p, test, flags)
     char *p;
     unsigned char test;
     int flags;
{
  register char cstart, cend, c;
  register int not;    /* Nonzero if the sense of the character class is inverted.  */
  int pc, brcnt;
  char *savep;

  test = FOLD (test);

  savep = p;

  /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the
     circumflex (`^') in its role in a `nonmatching list'.  A bracket
     expression starging with an unquoted circumflex character produces
     unspecified results.  This implementation treats the two identically. */
  if (not = (*p == '!' || *p == '^'))
    ++p;

  c = *p++;
  for (;;)
    {
      /* Initialize cstart and cend in case `-' is the last
	 character of the pattern. */
      cstart = cend = c;

      /* POSIX.2 equivalence class:  [=c=].  See POSIX.2 2.8.3.2.  Find
	 the end of the equivalence class, move the pattern pointer past
	 it, and check for equivalence.  XXX - this handles only
	 single-character equivalence classes, which is wrong, or at
	 least incomplete. */
      if (c == '[' && *p == '=' && p[2] == '=' && p[3] == ']')
	{
	  pc = FOLD (p[1]);
	  p += 4;
	  if (collequiv (test, pc))
	    {
/*[*/	      /* Move past the closing `]', since the first thing we do at
	         the `matched:' label is back p up one. */
	      p++;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -