📄 re_syntex.inl
字号:
// re_syntex.inl
//
// 正则表达式解析库
//
// 与2004年在北京写于创意鹰翔公司
// 版权申明:
// 作者放弃除著作署名权之外的任何权益。你可以在保留此申明的前提下,自由的,无限制的
// 分发、修改和使用本函数库。
// 如果你有任何建议和BUG汇报,欢迎你联系原始作者:tearshark@eaglefly.com.cn
//
// 原始作者 : lanzhengpeng(兰征鹏)
// Modify time : 2004-6-15 18:47
// 历史: 参看re_history.txt
//-----------------------------------------------------------------------------
public:
// helper
static inline const re_char * Next(const re_char * sz){
return traits::Next(sz);}
static inline int Cmp(const re_char *szLeft, const re_char *szRight, size_t nByteCount){
return traits::Cmp(szLeft,szRight,nByteCount);}
static inline int CmpNoCase(const re_char *szLeft, const re_char *szRight, size_t nByteCount){
return traits::CmpNoCase(szLeft,szRight,nByteCount);}
static inline void ToLower(const re_char *sz,const re_char * szEnd,re_char * szOut){
traits::ToLower(sz,szEnd,szOut);}
static inline int IsDigit(const re_char * sz){
return traits::IsDigit(sz);}
static inline const re_char** Abbrevs(){
return traits::Abbrevs();}
static inline int ByteLen(const re_char *sz){
return traits::ByteLen(sz); }
static inline unsigned char ToUnsigned(re_char c){
return traits::ToUnsigned(c);}
static inline unsigned char ToUnsigned(const re_char * sz){
return traits::ToUnsigned(*c);}
static inline size_t ToSize_t(const re_char * sz){
return traits::ToSize_t(sz);}
inline long to_int(const re_char *szStart,const re_char * szEnd,re_char **szOut){
return traits::to_int(szStart,szEnd,szOut);}
inline long to_hex(const re_char *szStart,const re_char * szEnd,re_char **szOut){
return traits::to_hex(szStart,szEnd,szOut);}
inline long to_oct(const re_char *szStart,const re_char * szEnd,re_char **szOut){
return traits::to_oct(szStart,szEnd,szOut);}
private:
struct re_block
{
typedef std::vector<re_block *> vector;
re_types eType;
const re_char * szStart;
const re_char * szEnd;
vector m_Block;
bool _is_end(const re_char * sz) const{
return sz >= szEnd;}
~re_block(){
for(vector::iterator i=m_Block.begin(); i!=m_Block.end(); ++i)
delete *i;
}
};
struct re_match_temp
{
bool bBreak;
const re_char * szStart;
const re_char * szEnd;
re_flags eFlags;
re_error eError;
re_match_temp(const re_char * _s,const re_char * _e,re_flags _f)
:bBreak(false),szStart(_s),szEnd(_e),eFlags(_f),eError(ree_ok){}
bool _is_start(const re_char * sz) const{
return sz == szStart;}
bool _is_end(const re_char * sz) const{
return sz >= szEnd;}
bool _is_break() const{
return bBreak != false;
}
void _break(){
bBreak = true;
}
re_flags option() const{
return eFlags;
}
};
//syntex
//规则一:匹配则将指针移动到下一个位置
//规则二:不匹配不要移动指针
struct rp_base
{
virtual ~rp_base(){}
virtual bool match(re_match_temp & pattern,const re_char * & sz,result_type & result) = 0;
};
struct rp_array : public rp_base
{
typedef std::vector<rp_base *> vector;
vector m_Syntex;
virtual ~rp_array(){
for(vector::iterator i=m_Syntex.begin(); i!=m_Syntex.end(); ++i)
delete *i;
}
virtual bool match(re_match_temp & pattern,const re_char * & sz,result_type & result)
{
for(vector::iterator i=m_Syntex.begin(); i!=m_Syntex.end(); ++i)
{
if((*i)->match(pattern,sz,result) == false)
return false;
}
return true;
}
};
struct rp_group : public rp_base
{
size_t m_GroupId;
rp_array m_Array;
virtual bool match(re_match_temp & pattern,const re_char * & sz,result_type & result)
{
const re_char * begin = sz;
if(m_Array.match(pattern,sz,result))
{
result.m_Groups.push_back(match_type(m_GroupId,begin,sz));
return true;
}
return false;
}
};
struct rp_exactness : public rp_base
{
size_t m_Coding;
virtual bool match(re_match_temp & pattern,const re_char * & sz,result_type & result)
{
if(!pattern._is_end(sz) && ToSize_t(sz) == m_Coding)
{
sz = Next(sz);
return true;
}
return false;
}
};
struct rp_any_single_line : public rp_base
{
virtual bool match(re_match_temp & pattern,const re_char * & sz,result_type & result)
{
if(pattern._is_end(sz))
return false;
sz = Next(sz);
return true;
}
};
struct rp_any_multi_line : public rp_base
{
virtual bool match(re_match_temp & pattern,const re_char * & sz,result_type & result)
{
if(pattern._is_end(sz))
return false;
if('\n' != ToSize_t(sz))
{
sz = Next(sz);
return true;
}
return false;
}
};
struct range_range{
size_t begin;
size_t end;
};
struct rp_range : public rp_base
{
std::vector<range_range> m_Range;
std::vector<size_t> m_In;
virtual bool match(re_match_temp & pattern,const re_char * & sz,result_type & result)
{
if(pattern._is_end(sz))
return false;
size_t c = ToSize_t(sz);
if(std::find(m_In.begin(),m_In.end(),c) != m_In.end())
{
sz = Next(sz);
return true;
}
typedef std::vector<range_range>::iterator iterator;
for(iterator i=m_Range.begin(); i<m_Range.end(); ++i)
{
if((*i).begin <= c && c <= (*i).end)
{
sz = Next(sz);
return true;
}
}
return false;
}
};
struct rp_not_range : public rp_range
{
virtual bool match(re_match_temp & pattern,const re_char * & sz,result_type & result)
{
if(pattern._is_end(sz))
return false;
size_t c = ToSize_t(sz);
if(std::find(m_In.begin(),m_In.end(),c) != m_In.end())
return false;
typedef std::vector<range_range>::iterator iterator;
for(iterator i=m_Range.begin(); i<m_Range.end(); ++i)
{
if((*i).begin <= c && c <= (*i).end)
return false;
}
sz = Next(sz);
return true;
}
};
struct rp_begin_line : public rp_base
{
virtual bool match(re_match_temp & pattern,const re_char * & sz,result_type & result){
if(pattern._is_start(sz))
return true;
pattern._break();
return false;
}
};
struct rp_end_line : public rp_base
{
virtual bool match(re_match_temp & pattern,const re_char * & sz,result_type & result){
if(pattern._is_end(sz))
return (pattern.option() & rf_endasenter) ? true : false;
switch(ToSize_t(sz))
{
case '\r':
{
const re_char * szSave = sz;
sz = Next(sz);
if(pattern._is_end(sz))
return true;
if('\n' == ToSize_t(sz))
{
sz = Next(sz);
return true;
}
sz = szSave;
return false;
}
break;
case '\n':
sz = Next(sz);
return true;
break;
}
return false;
}
};
struct rp_end_all : public rp_base
{
virtual bool match(re_match_temp & pattern,const re_char * & sz,result_type & result){
return pattern._is_end(sz);
}
};
//<m:n>
struct rp_repeat_specify_greed : public rp_base
{
rp_base * m_Leaf;
long m_Min,m_Max;
virtual ~rp_repeat_specify_greed(){
delete m_Leaf;
}
virtual bool match(re_match_temp & pattern,const re_char * & sz,result_type & result){
const re_char * szSave = sz;
long l;
for(l=0; l<m_Min; ++l)
if(pattern._is_end(szSave) || m_Leaf->match(pattern,szSave,result) == false)
return false;
for(;l<m_Max && !pattern._is_end(szSave) && m_Leaf->match(pattern,szSave,result); ++l);
sz = szSave;
return true;
}
};
//<m:n>?
struct rp_repeat_specify : public rp_base
{
rp_base * m_Leaf;
rp_array * m_Next;
long m_Min,m_Max;
virtual ~rp_repeat_specify(){
delete m_Leaf;
}
virtual bool match(re_match_temp & pattern,const re_char * & sz,result_type & result){
const re_char * szSave = sz;
long l;
for(l=0; l<m_Min; ++l)
if(pattern._is_end(szSave) || m_Leaf->match(pattern,szSave,result) == false)
return false;
for(;l<m_Max && !pattern._is_end(szSave); ++l)
{
const re_char * szTemp = szSave;
if(m_Next->match(pattern,szTemp,result))
{
sz = szSave;
return true;
}
if(!m_Leaf->match(pattern,szSave,result))
break;
}
sz = szSave;
return true;
}
};
//*?
struct rp_repeat_more_zero : public rp_base
{
rp_base * m_Leaf;
rp_array * m_Next;
virtual ~rp_repeat_more_zero(){
delete m_Leaf;
}
virtual bool match(re_match_temp & pattern,const re_char * & sz,result_type & result){
const re_char * szSave = sz;
for(;!pattern._is_end(szSave);)
{
const re_char * szTemp = szSave;
if(m_Next->match(pattern,szTemp,result))
{
sz = szSave;
return true;
}
szTemp = szSave;
if(!m_Leaf->match(pattern,szSave,result))
break;
else
{
if(szSave == szTemp)
{//有些无穷循环的在分析阶段没有检测到
pattern.eError = ree_recursion_always;
pattern._break();
return false;
}
}
}
sz = szSave;
return true;
}
};
//+?
struct rp_repeat_more_one : public rp_base
{
rp_base * m_Leaf;
rp_array * m_Next;
virtual ~rp_repeat_more_one(){
delete m_Leaf;
}
virtual bool match(re_match_temp & pattern,const re_char * & sz,result_type & result){
const re_char * szSave = sz;
if(pattern._is_end(szSave) || m_Leaf->match(pattern,szSave,result) == false)
return false;
for(;!pattern._is_end(szSave);)
{
const re_char * szTemp = szSave;
if(m_Next->match(pattern,szTemp,result))
{
sz = szSave;
return true;
}
szTemp = szSave;
if(!m_Leaf->match(pattern,szSave,result))
break;
else
{
if(szSave == szTemp)
{//有些无穷循环的在分析阶段没有检测到
pattern.eError = ree_recursion_always;
pattern._break();
return false;
}
}
}
sz = szSave;
return true;
}
};
//??
struct rp_repeat_zero_one : public rp_base
{
rp_base * m_Leaf;
rp_array * m_Next;
virtual ~rp_repeat_zero_one(){
delete m_Leaf;
}
virtual bool match(re_match_temp & pattern,const re_char * & sz,result_type & result){
const re_char * szTemp = sz;
if(m_Next->match(pattern,szTemp,result))
return true;
if(!pattern._is_end(sz))
m_Leaf->match(pattern,sz,result);
return true;
}
};
//*
struct rp_repeat_more_zero_greed : public rp_base
{
rp_base * m_Leaf;
virtual ~rp_repeat_more_zero_greed(){
delete m_Leaf;
}
virtual bool match(re_match_temp & pattern,const re_char * & sz,result_type & result){
const re_char * szSave = sz;
for(;!pattern._is_end(szSave);)
{
const re_char * szTemp = szSave;
if(m_Leaf->match(pattern,szSave,result))
{
if(szSave == szTemp)
{//有些无穷循环的在分析阶段没有检测到
pattern.eError = ree_recursion_always;
pattern._break();
return false;
}
}
else
{
break;
}
}
sz = szSave;
return true;
}
};
//+
struct rp_repeat_more_one_greed : public rp_base
{
rp_base * m_Leaf;
virtual ~rp_repeat_more_one_greed(){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -