⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 wcpattern.cpp

📁 C++正则表达式解析
💻 CPP
📖 第 1 页 / 共 4 页
字号:
      {
        bool useN = 1, useR = 1;
        NFAClassUNode * clazz = new NFAClassUNode(1);
        if ((flags & WCPattern::UNIX_LINE_MODE)  != 0) useR = 0;
        if ((flags & WCPattern::DOT_MATCHES_ALL) != 0) useN = useR = 0;
        if (useN) clazz->vals[(wchar_t)'\n'] = 1;
        if (useR) clazz->vals[(wchar_t)'\r'] = 1;
        next = registerNode(clazz);
      }
      break;
    case (wchar_t)'(':
      {
        NFAUNode * end, * t1, * t2;
        t1 = parse(1, 0, &end);
        if (!t1) raiseError();
        else if (t1->isGroupHeadNode() && (t2 = quantifyGroup(t1, end, grc)) != NULL)
        {
          cur->next = t2;
          cur = t2->next;
        }
        else
        {
          cur->next = t1;
          cur = end;
        }
      }
      break;
    case (wchar_t)')':
      if (!inParen) raiseError();
      else if (inOr)
      {
        --curInd;
        cur = cur->next = registerNode(new NFAAcceptUNode);
        flags = oldFlags;
        return start;
      }
      else
      {
        if (ahead)
        {
          cur = cur->next = registerNode(new NFAAcceptUNode);
          flags = oldFlags;
          return *end = registerNode(new NFALookAheadUNode(start, pos));
        }
        else if (indep)
        {
          cur = cur->next = registerNode(new NFAAcceptUNode);
          flags = oldFlags;
          return *end = registerNode(new NFAPossessiveQuantifierUNode(this, start, 1, 1));
        }
        else // capping or noncapping, it doesnt matter
        {
          *end = cur = cur->next = registerNode(new NFAGroupTailUNode(grc));
          next = quantifyGroup(start, *end, grc);
          if (next)
          {
            start = next;
            *end = next->next;
          }
          flags = oldFlags;
          return start;
        }
      }
      break;
    case (wchar_t)'{': // registered pattern
      cur->next = parseRegisteredWCPattern(&next);
      if (cur->next) cur = next;
      break;
    case (wchar_t)'*':
    case (wchar_t)'+':
    case (wchar_t)'?':
    case (wchar_t)'}':
    case (wchar_t)']':
      raiseError();
      break;
    default:
      if ((flags & WCPattern::CASE_INSENSITIVE) != 0) next = registerNode(new NFACICharUNode(ch));
      else                                          next = registerNode(new NFACharUNode(ch));
      break;
    }
    if (next) cur = cur->next = quantify(next);
  }
  if (inParen) raiseError();
  else
  {
    if (inOr) cur = cur->next = registerNode(new NFAAcceptUNode);
    if (end) *end = cur;
  }

  flags = oldFlags;
  if (error) return NULL;

  return start;
}

WCPattern * WCPattern::compile(const std::wstring & pattern, const unsigned long mode)
{
  WCPattern * p = new WCPattern(pattern);
  NFAUNode * end;

  p->flags = mode;
  if ((mode & WCPattern::LITERAL) != 0)
  {
    p->head = p->registerNode(new NFAStartUNode);
    if ((mode & WCPattern::CASE_INSENSITIVE) != 0)  p->head->next = p->registerNode(new NFACIQuoteUNode(pattern));
    else                                          p->head->next = p->registerNode(new NFAQuoteUNode(pattern));
    p->head->next->next = p->registerNode(new NFAEndUNode);
  }
  else
  {
    p->head = p->parse(0, 0, &end);
    if (!p->head)
    {
      delete p;
      p = NULL;
    }
    else
    {
      if (!(p->head && p->head->isStartOfInputNode()))
      {
        NFAUNode * n = p->registerNode(new NFAStartUNode);
        n->next = p->head;
        p->head = n;
      }
      end->next = p->registerNode(new NFAEndUNode);
    }
  }
  if (p != NULL)
  {
    p->matcher = new WCMatcher(p, L"");
  }

  return p;
}

WCPattern * WCPattern::compileAndKeep(const std::wstring & pattern, const unsigned long mode)
{
  WCPattern * ret = NULL;
  std::map<std::wstring, WCPattern*>::iterator it = compiledWCPatterns.find(pattern);

  if (it != compiledWCPatterns.end())
  {
    ret = it->second;
  }
  else
  {
    ret = compile(pattern, mode);
    compiledWCPatterns[pattern] = ret;
  }

  return ret;
}
std::wstring WCPattern::replace(const std::wstring & pattern, const std::wstring & str,
                                     const std::wstring & replacementText, const unsigned long mode)
{
  std::wstring ret;
  WCPattern * p = WCPattern::compile(pattern, mode);
  if (p)
  {
    ret = p->replace(str, replacementText);
    delete p;
  }
  return ret;
}

std::vector<std::wstring> WCPattern::split(const std::wstring & pattern, const std::wstring & str, const bool keepEmptys,
                              const unsigned long limit, const unsigned long mode)
{
  std::vector<std::wstring> ret;
  WCPattern * p = WCPattern::compile(pattern, mode);
  if (p)
  {
    ret = p->split(str, keepEmptys, limit);
    delete p;
  }
  return ret;
}

std::vector<std::wstring> WCPattern::findAll(const std::wstring & pattern, const std::wstring & str, const unsigned long mode)
{
  std::vector<std::wstring> ret;
  WCPattern * p = WCPattern::compile(pattern, mode);
  if (p)
  {
    ret = p->findAll(str);
    delete p;
  }
  return ret;
}

bool WCPattern::matches(const std::wstring & pattern, const std::wstring & str, const unsigned long mode)
{
  bool ret = 0;
  WCPattern * p = compile(pattern, mode);

  if (p)
  {
    ret = p->matches(str);
    delete p;
  }

  return ret;
}

bool WCPattern::registerWCPattern(const std::wstring & name, const std::wstring & pattern, const unsigned long mode)
{
  WCPattern * p = WCPattern::compile(pattern, mode);
  if (!p) return 0;
  WCPattern::registeredWCPatterns[name] = std::make_pair(pattern, mode);
  delete p;
  return 1;
}

void WCPattern::unregisterWCPatterns()
{
  registeredWCPatterns.clear();
}
void WCPattern::clearWCPatternCache()
{
  std::map<std::wstring, WCPattern*>::iterator it;
  for (it = compiledWCPatterns.begin(); it != compiledWCPatterns.end(); ++it)
  {
    delete it->second;
  }
  compiledWCPatterns.clear();
}

std::pair<std::wstring, int>  WCPattern::findNthMatch(const std::wstring & pattern, const std::wstring & str,
                                         const int matchNum, const unsigned long mode)
{
  std::pair<std::wstring, int> ret;
  WCPattern * p = WCPattern::compile(pattern, mode);

  ret.second = -1;
  if (p)
  {
    int i = -1;
    p->matcher->setString(str);
    while (i < matchNum && p->matcher->findNextMatch()) { ++i; }
    if (i == matchNum && p->matcher->getStartingIndex() >= 0)
    {
      ret.first = p->matcher->getGroup(0);
      ret.second = p->matcher->getStartingIndex();
    }
    delete p;
  }

  return ret;
}

WCPattern::~WCPattern()
{
  /*
  nodes.clear();
  if (head) head->findAllNodes(nodes);
  */
  if (matcher) delete matcher;
  for (std::map<NFAUNode*, bool>::iterator it = nodes.begin(); it != nodes.end(); ++it) delete it->first;
}
std::wstring WCPattern::replace(const std::wstring & str, const std::wstring & replacementText)
{
  int li = 0;
  std::wstring ret = L"";

  matcher->setString(str);
  while (matcher->findNextMatch())
  {
    ret += str.substr(li, matcher->getStartingIndex() - li);
    ret += matcher->replaceWithGroups(replacementText);
    li = matcher->getEndingIndex();
  }
  ret += str.substr(li);

  return ret;
}
std::vector<std::wstring> WCPattern::split(const std::wstring & str, const bool keepEmptys, const unsigned long limit)
{
  unsigned long lim = (limit == 0 ? MAX_QMATCH : limit);
  int li = 0;
  std::vector<std::wstring> ret;

  matcher->setString(str);

  while (matcher->findNextMatch() && ret.size() < lim)
  {
    if (matcher->getStartingIndex() == 0 && keepEmptys) ret.push_back(L"");
    if ((matcher->getStartingIndex() != matcher->getEndingIndex()) || keepEmptys)
    {
      if (li != matcher->getStartingIndex() || keepEmptys)
      {
        ret.push_back(str.substr(li, matcher->getStartingIndex() - li));
      }
      li = matcher->getEndingIndex();
    }
  }
  if (li < (int)str.size()) ret.push_back(str.substr(li));

  return ret;
}
std::vector<std::wstring> WCPattern::findAll(const std::wstring & str)
{
  matcher->setString(str);
  return matcher->findAll();
}
bool WCPattern::matches(const std::wstring & str)
{
  matcher->setString(str);
  return matcher->matches();
}
unsigned long WCPattern::getFlags() const
{
  return flags;
}
std::wstring WCPattern::getWCPattern() const
{
  return pattern;
}
WCMatcher * WCPattern::createWCMatcher(const std::wstring & str)
{
  return new WCMatcher(this, str);
}

// NFAUNode

NFAUNode::NFAUNode() { next = NULL; }
NFAUNode::~NFAUNode() { }
void NFAUNode::findAllNodes(std::map<NFAUNode*, bool> & soFar)
{
  if (soFar.find(this) == soFar.end()) return;
  soFar[this] = 1;
  if (next) next->findAllNodes(soFar);
}

// NFACharUNode

NFACharUNode::NFACharUNode(const wchar_t c) { ch = c; }
int NFACharUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  if (curInd < (int)str.size() && str[curInd] == ch) return next->match(str, matcher, curInd + 1);
  return -1;
}

// NFACICharUNode

NFACICharUNode::NFACICharUNode(const wchar_t c) { ch = towlower(c); }
int NFACICharUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  if (curInd < (int)str.size() && (wchar_t)towlower(str[curInd]) == ch) return next->match(str, matcher, curInd + 1);
  return -1;
}

// NFAStartUNode

NFAStartUNode::NFAStartUNode() { }
int NFAStartUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  int ret = -1, ci = curInd;

  matcher->starts[0] = curInd;
  if ((matcher->getFlags() & WCMatcher::MATCH_ENTIRE_STRING) == (unsigned int)WCMatcher::MATCH_ENTIRE_STRING)
  {
    if (curInd != 0)
    {
      matcher->starts[0] = -1;
      return -1;
    }
    return next->match(str, matcher, 0);
  }
  while ((ret = next->match(str, matcher, ci)) == -1 && ci < (int)str.size())
  {
    matcher->clearGroups();
    matcher->starts[0] = ++ci;
  }
  if (ret < 0) matcher->starts[0] = -1;
  return ret;
}

// NFAEndUNode

NFAEndUNode::NFAEndUNode() { }
int NFAEndUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  matcher->ends[0] = curInd;
  if ((matcher->getFlags() & WCMatcher::MATCH_ENTIRE_STRING) != 0)
  {
    if (curInd == (int)str.size()) return curInd;
    matcher->ends[0] = -1;
    return -1;
  }
  return curInd;
}

// NFAQuantifierUNode

void NFAQuantifierUNode::findAllNodes(std::map<NFAUNode*, bool> & soFar)
{
  inner->findAllNodes(soFar);
  NFAUNode::findAllNodes(soFar);
}
NFAQuantifierUNode::NFAQuantifierUNode(WCPattern * pat, NFAUNode * internal, const int minMatch, const int maxMatch)
{
  inner = internal;
  inner->next = pat->registerNode(new NFAAcceptUNode);
  min = (minMatch < WCPattern::MIN_QMATCH) ? WCPattern::MIN_QMATCH : minMatch;
  max = (maxMatch > WCPattern::MAX_QMATCH) ? WCPattern::MAX_QMATCH : maxMatch;
}

int NFAQuantifierUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  int i0, i1, i2 = 0;

  i0 = i1 = curInd;
  while (i2 < min)
  {

    ++i2;
    i1 = inner->match(str, matcher, i0);
    if (i1 <= i0) return i1; // i1 < i0 means i1 is -1
    i0 = i1;
  }

  return i1;
}
// NFAGreedyQuantifierUNode

NFAGreedyQuantifierUNode::NFAGreedyQuantifierUNode(WCPattern * pat, NFAUNode * internal, const int minMatch, const int maxMatch)
                        : NFAQuantifierUNode(pat, internal, minMatch, maxMatch) { }
int NFAGreedyQuantifierUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  int t = NFAQuantifierUNode::match(str, matcher, curInd);
  if (t != -1) return matchInternal(str, matcher, t, min);
  return t;
}
int NFAGreedyQuantifierUNode::matchInternal(const std::wstring & str, WCMatcher * matcher, const int curInd, const int soFar) const

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -