⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 wcpattern.cpp

📁 C++正则表达式解析
💻 CPP
📖 第 1 页 / 共 4 页
字号:
{
  if (soFar >= max) return next->match(str, matcher, curInd);

  int i, j;

  i = inner->match(str, matcher, curInd);
  if (i != -1)
  {
    j = matchInternal(str, matcher, i, soFar + 1);
    if (j != -1) return j;
  }
  return next->match(str, matcher, curInd);
}

// NFALazyQuantifierUNode

NFALazyQuantifierUNode::NFALazyQuantifierUNode(WCPattern * pat, NFAUNode * internal, const int minMatch, const int maxMatch)
                      : NFAQuantifierUNode(pat, internal, minMatch, maxMatch) { }
int NFALazyQuantifierUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  int i, j, m = NFAQuantifierUNode::match(str, matcher, curInd);

  if (m == -1) return -1;

  for (i = min; i < max; ++i)
  {
    j = next->match(str, matcher, m);
    if (j == -1)
    {
      j = inner->match(str, matcher, m);
      // if j < m, then j is -1, so we bail.
      // if j == m, then we would just go and call next->match on the same index,
      // but it already failed trying to match right there, so we know we can
      // just bail
      if (j <= m) return -1;
      m = j;
    }
    else return j;
  }
  return next->match(str, matcher, m);
}

// NFAPossessiveQuantifierUNode

NFAPossessiveQuantifierUNode::NFAPossessiveQuantifierUNode(WCPattern * pat, NFAUNode * internal, const int minMatch, const int maxMatch)
                            : NFAQuantifierUNode(pat, internal, minMatch, maxMatch) { }
int NFAPossessiveQuantifierUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  int i, j, m = NFAQuantifierUNode::match(str, matcher, curInd);

  if (m == -1) return -1;
  for (i = min; i < max; ++i)
  {
    j = inner->match(str, matcher, m);
    if (j <= m) return next->match(str, matcher, m);
    m = j;
  }
  return next->match(str, matcher, m);
}

// NFAAcceptUNode

NFAAcceptUNode::NFAAcceptUNode() { }
int NFAAcceptUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  if (!next) return curInd;
  else return next->match(str, matcher, curInd);
}

// NFAClassUNode

NFAClassUNode::NFAClassUNode(const bool invert)
{
  inv = invert;
}
NFAClassUNode::NFAClassUNode(const std::wstring & clazz, const bool invert)
{
  inv = invert;
  for (int i = 0; i < (int)clazz.size(); ++i) vals[clazz[i]] = 1;
}
int NFAClassUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  if (curInd < (int)str.size() && ((vals.find(str[curInd]) != vals.end()) ^ inv))
  {
    return next->match(str, matcher, curInd + 1);
  }
  return -1;
}

// NFACIClassUNode

NFACIClassUNode::NFACIClassUNode(const bool invert)
{
  inv = invert;
}
NFACIClassUNode::NFACIClassUNode(const std::wstring & clazz, const bool invert)
{
  inv = invert;
  for (int i = 0; i < (int)clazz.size(); ++i) vals[towlower(clazz[i])] = 1;
}
int NFACIClassUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  if (curInd < (int)str.size() && ((vals.find(towlower(str[curInd])) != vals.end()) ^ inv))
  {
    return next->match(str, matcher, curInd + 1);
  }
  return -1;
}

// NFASubStartUNode

NFASubStartUNode::NFASubStartUNode() { }
int NFASubStartUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  return next->match(str, matcher, curInd);
}

// NFAOrUNode

NFAOrUNode::NFAOrUNode(NFAUNode * first, NFAUNode * second) : one(first), two(second) { }
void NFAOrUNode::findAllNodes(std::map<NFAUNode*, bool> & soFar)
{
  if (one) one->findAllNodes(soFar);
  if (two) two->findAllNodes(soFar);
  NFAUNode::findAllNodes(soFar);
}
int NFAOrUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  int ci = one->match(str, matcher, curInd);

  if (ci != -1) ci = next->match(str, matcher, ci);
  if (ci != -1) return ci;
  if (ci == -1) ci = two->match(str, matcher, curInd);
  if (ci != -1) ci = next->match(str, matcher, ci);
  return ci;
}

// NFAQuoteUNode

NFAQuoteUNode::NFAQuoteUNode(const std::wstring & quoted) : qStr(quoted) { }
int NFAQuoteUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  if (curInd + qStr.size() > str.size())       return -1;
  if (str.substr(curInd, qStr.size()) != qStr) return -1;
  return next->match(str, matcher, curInd + qStr.size());
}

// NFACIQuoteUNode

NFACIQuoteUNode::NFACIQuoteUNode(const std::wstring & quoted) : qStr(quoted) { }
int NFACIQuoteUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  if (curInd + qStr.size() > str.size()) return -1;
  if (str_icmp(str.substr(curInd, qStr.size()).c_str(),  qStr.c_str())) return -1;
  return next->match(str, matcher, qStr.size());
}

// NFALookAheadUNode

NFALookAheadUNode::NFALookAheadUNode(NFAUNode * internal, const bool positive) : NFAUNode(), pos(positive), inner(internal) { }
void NFALookAheadUNode::findAllNodes(std::map<NFAUNode*, bool> & soFar)
{
  if (inner) inner->findAllNodes(soFar);
  NFAUNode::findAllNodes(soFar);
}
int NFALookAheadUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  return ((inner->match(str, matcher, curInd) == -1) ^ pos) ? next->match(str, matcher, curInd) : -1;
}

// NFALookBehindUNode

NFALookBehindUNode::NFALookBehindUNode(const std::wstring & str, const bool positive) : pos(positive), mStr(str) { }
int NFALookBehindUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  if (pos)
  {
    if (curInd < (int)mStr.size()) return -1;
    if (str.substr(curInd - mStr.size(), mStr.size()) == mStr) return next->match(str, matcher, curInd);
  }
  else
  {
    if (curInd < (int)mStr.size()) return next->match(str, matcher, curInd);
    if (str.substr(curInd - mStr.size(), mStr.size()) == mStr) return -1;
     return next->match(str, matcher, curInd);
  }
  return -1;
}

// NFAStartOfLineUNode

NFAStartOfLineUNode::NFAStartOfLineUNode() { }
int NFAStartOfLineUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  if (curInd == 0 || str[curInd - 1] == (wchar_t)'\n' || str[curInd - 1] == (wchar_t)'\r')
  {
    return next->match(str, matcher, curInd);
  }
  return -1;
}

// NFAEndOfLineUNode

NFAEndOfLineUNode::NFAEndOfLineUNode() { }
int NFAEndOfLineUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  if (curInd >= (int)str.size() || str[curInd] == (wchar_t)'\n' || str[curInd] == (wchar_t)'\r')
  {
    return next->match(str, matcher, curInd);
  }
  return -1;
}

// NFAReferenceUNode

NFAReferenceUNode::NFAReferenceUNode(const int groupIndex) : gi(groupIndex) { }
int NFAReferenceUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  int len = matcher->ends[gi] - matcher->starts[gi];
  int ni = -1;
  if      (gi < 1 || matcher->ends[gi] < matcher->starts[gi] || len == 0)             ni = curInd;
  else if (curInd + len > (int)str.size())                                            return -1;
  else if (str.substr(curInd, len) != str.substr(matcher->starts[gi], len))  return -1;
  else                                                                                ni = curInd + len;

  return next->match(str, matcher, ni);
}

// NFAStartOfInputUNode

NFAStartOfInputUNode::NFAStartOfInputUNode() { }
int NFAStartOfInputUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  if (curInd == 0) return next->match(str, matcher, curInd);
  return -1;
}

// NFAEndOfInputUNode

NFAEndOfInputUNode::NFAEndOfInputUNode(const bool lookForTerm) : term(lookForTerm) { }
int NFAEndOfInputUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  int len = (int)str.size();
  if      (curInd == len) return next->match(str, matcher, curInd);
  else if (term)
  {
    if      (curInd == len - 1 && (str[curInd] == (wchar_t)'\r' || str[curInd] == (wchar_t)'\n'))
    {
      return next->match(str, matcher, curInd);
    }
    else if (curInd == len - 2 && str.substr(curInd, 2) == L"\r\n")
    {
      return next->match(str, matcher, curInd);
    }
  }
  return -1;
}

// NFAWordBoundaryUNode

NFAWordBoundaryUNode::NFAWordBoundaryUNode(const bool positive) : pos(positive) { }
int NFAWordBoundaryUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  #define is_alpha(x) (((x) >= (wchar_t)'a' && (x) <= (wchar_t)'z') || ((x) >= (wchar_t)'A' && (x) <= (wchar_t)'Z'))

  int len = (int)str.size();
  bool ok = 0;
  wchar_t c1 = (curInd - 1 < len) ? str[curInd - 1] : -1;
  wchar_t c2 = (curInd     < len) ? str[curInd    ] : -1;

  if      (curInd == len) return next->match(str, matcher, curInd);
  if      (is_alpha(c1) ^ is_alpha(c2)) ok = 1;
  if (ok && pos) return next->match(str, matcher, curInd);
  return -1;

  #undef is_alpha
}

// NFAEndOfMatchUNode

NFAEndOfMatchUNode::NFAEndOfMatchUNode() { }
int NFAEndOfMatchUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  if (curInd == matcher->lm) return next->match(str, matcher, curInd);
  return -1;
}

// NFAGroupHeadUNode

NFAGroupHeadUNode::NFAGroupHeadUNode(const int groupIndex) : gi(groupIndex) { }
int NFAGroupHeadUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  int ret, o = matcher->starts[gi];

  matcher->starts[gi] = curInd;
  ret = next->match(str, matcher, curInd);
  if (ret < 0) matcher->starts[gi] = o;

  return ret;
}

// NFAGroupTailUNode

NFAGroupTailUNode::NFAGroupTailUNode(const int groupIndex) : gi(groupIndex) { }
int NFAGroupTailUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  int ret, o = matcher->ends[gi];

  matcher->ends[gi] = curInd;
  ret = next->match(str, matcher, curInd);
  if (ret < 0) matcher->ends[gi] = o;

  return ret;
}

// NFAGroupLoopPrologueUNode

NFAGroupLoopPrologueUNode::NFAGroupLoopPrologueUNode(const int groupIndex) : gi(groupIndex) { }
int NFAGroupLoopPrologueUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  int ret, o1 = matcher->groups[gi], o2 = matcher->groupPos[gi], o3 = matcher->groupIndeces[gi];

  matcher->groups[gi] = 0;
  matcher->groupPos[gi] = 0;
  matcher->groupIndeces[gi] = -1;
  ret = next->match(str, matcher, curInd);
  if (ret < 0)
  {
    matcher->groups[gi] = o1;
    matcher->groupPos[gi] = o2;
    matcher->groupIndeces[gi] = o3;
  }

  return ret;
}

// NFAGroupLoopUNode

NFAGroupLoopUNode::NFAGroupLoopUNode(NFAUNode * internal, const int minMatch, const int maxMatch,
                                   const int groupIndex, const int matchType)
{
  inner = internal;
  min = minMatch;
  max = maxMatch;
  gi = groupIndex;
  type = matchType;
}
void NFAGroupLoopUNode::findAllNodes(std::map<NFAUNode*, bool> & soFar)
{
  if (inner) inner->findAllNodes(soFar);
  NFAUNode::findAllNodes(soFar);
}
int NFAGroupLoopUNode::match(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  bool b = (curInd > matcher->groupIndeces[gi]);

  if (b && matcher->groups[gi] < min)
  {
    ++matcher->groups[gi];
    int o = matcher->groupIndeces[gi];
    matcher->groupIndeces[gi] = curInd;
    int ret = inner->match(str, matcher, curInd);
    if (ret < 0)
    {
      matcher->groupIndeces[gi] = o;
      --matcher->groups[gi];
    }
    return ret;
  }
  else if (!b || matcher->groups[gi] >= max)
  {
    return next->match(str, matcher, curInd);
  }
  else
  {
    switch (type)
    {
    case 0: return matchGreedy(str, matcher, curInd);
    case 1: return matchLazy(str, matcher, curInd);
    case 2: return matchPossessive(str, matcher, curInd);
    }
  }
  return -1;
}
int NFAGroupLoopUNode::matchGreedy(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  int o = matcher->groupIndeces[gi];            // save our info for backtracking
  matcher->groupIndeces[gi] = curInd;           // move along
  ++matcher->groups[gi];
  int ret = inner->match(str, matcher, curInd); // match internally
  if (ret < 0)
  {                                             // if we failed, then restore info and match next
    --matcher->groups[gi];
    matcher->groupIndeces[gi] = o;
    ret = next->match(str, matcher, curInd);
  }
  return ret;
}
int NFAGroupLoopUNode::matchLazy(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  int ret = next->match(str, matcher, curInd);  // be lazy, just go on
  if (ret < 0)
  {
    int o = matcher->groupIndeces[gi];          // save info for backtracking
    matcher->groupIndeces[gi] = curInd;         // advance our position
    ++matcher->groups[gi];
    ret = inner->match(str, matcher, curInd);   // match our internal stuff
    if (ret < 0)                                // if we failed, then restore the info
    {
      --matcher->groups[gi];
      matcher->groupIndeces[gi] = o;
    }
  }
  return ret;
}
int NFAGroupLoopUNode::matchPossessive(const std::wstring & str, WCMatcher * matcher, const int curInd) const
{
  int o = matcher->groupIndeces[gi];            // save info for backtracking
  matcher->groupPos[gi] = matcher->groups[gi];  // set a flag stating we have matcher at least this much
  matcher->groupIndeces[gi] = curInd;           // move along
  ++matcher->groups[gi];
  int ret = inner->match(str, matcher, curInd); // try and match again
  if (ret < 0)
  {                                             // if we fail, back off, but to an extent
    --matcher->groups[gi];
    matcher->groupIndeces[gi] = o;
    if (matcher->groups[gi] == matcher->groupPos[gi]) ret = next->match(str, matcher, curInd);
  }
  return ret;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -