📄 regexx.cpp
字号:
/////////////////////////////////////////////////////////////////////////////
// $Header: /shorthand/src/regexx.cpp 3 8/28/02 6:27a Arm $
//---------------------------------------------------------------------------
// This file is part of "libAndrix" library - a collection of classes
// and functions developed by Andrei Remenchuk.
//---------------------------------------------------------------------------
// While you may own complete copyright on the project with which you have
// received this file, the author reserves the right to use code contained
// in this very file for any purposes, including publishing and usage in
// any free or commercial software.
//
// You may re-distribute this file or re-use it in your own free or
// commercial software provided that this text is included in the file.
// If you change this file you must include clear notice stating that
// you changed this file and the date of change.
//
// This statement doesn't apply to other files that are part of the same
// package unless otherwise noted.
//---------------------------------------------------------------------------
// (c) 1998-2002 Andrei Remenchuk <andrei@remenchuk.com>
//---------------------------------------------------------------------------
// regexx.h - C++ wrapper of GNU regex library
/////////////////////////////////////////////////////////////////////////////
#include "regexx.h"
#include <malloc.h>
#include <string.h>
#include "regex.h"
#define m_buffer ((re_pattern_buffer*)m_pattern_buffer)
#define m_regs ((re_registers*)m_registers)
const int RXX_NO_BK_PARENS = RE_NO_BK_PARENS;
const int RXX_NO_BK_VBAR = RE_NO_BK_VBAR;
const int RXX_INTERVALS = RE_INTERVALS;
const int RXX_NO_BK_BRACES = RE_NO_BK_BRACES;
RX::RX(const char* pattern, int options)
: m_pattern(pattern), m_syntax_options(options)
{
if (options == 0)
{
m_syntax_options = (RE_NO_BK_PARENS|RE_CHAR_CLASSES|RE_NO_BK_VBAR|RE_DOT_NEWLINE);
}
ctor();
compile();
}
RX::RX(const RX& rx)
: m_pattern(rx.m_pattern), m_syntax_options(rx.m_syntax_options)
{
ctor();
compile();
}
RX* RX::clone() const
{
return new RX(*this);
}
void RX::ctor()
{
m_pattern_buffer = malloc(sizeof(re_pattern_buffer));
m_registers = malloc(sizeof(re_registers));
memset(m_pattern_buffer, 0, sizeof(re_pattern_buffer));
memset(m_registers, 0, sizeof(re_registers));
}
void RX::aquire(const RX& rx)
{
m_pattern = rx.m_pattern;
m_syntax_options = rx.m_syntax_options;
compile();
}
bool RX::compile()
{
shrink();
m_buffer->regs_allocated = REGS_UNALLOCATED;
::re_syntax_options = m_syntax_options;
const char* err = re_compile_pattern(m_pattern, m_pattern.length(), m_buffer);
if (err != NULL)
{
// rx compilation failed
throw new ShhObjectException(1904, "%s", err);
}
// fastmap is a must for matching long strings
m_buffer->fastmap = (char*) malloc(256);
memset(m_buffer->fastmap, 0, 256);
return true;
}
/**
* Tries to search regular expression within the string.
*
* If the match was found, internal sub-pattern spaces are set so that
* you can retrieve each sub-pattern by calling submatch() method.
*
* parameters:
* s [IN] a string to match. RX makes internal copy of this string
* which is kept until next call to match(), in order to be able
* to extract sub-matches.
* return value:
* the index within the string where the match ended. this index
* can be used as 'start' parameter in next successive call to search().
* returns -1 if no match has beens found
*/
int RX::search(const char* s, int size, int start, bool make_copy )
{
//memset(m_pattern_buffer.fastmap, 0, 256);
//m_pattern_buffer.fastmap = NULL;
int off = re_search(m_buffer, s, size, start, size-start, m_regs);
if (off == -2)
{
throw new ShhObjectException(9102, "internal error in rx search");
}
if (off == -1) return off;
if (make_copy)
m_content = s;
else
m_content.clear();
return m_regs->end[0];
}
int RX::search_no_exception(const char* s, int size, int start, bool make_copy)
{
int off = re_search(m_buffer, s, size, start, size-start, m_regs);
if (off == -2) return -2;
if (off == -1) return off;
if (make_copy)
m_content = s;
else
m_content.clear();
return m_regs->end[0];
}
/**
* Replaces every occurence of the pattern within the string by fixed string
*/
int RX::replace(const char* source, const char* replacement, string& target)
{
int off = 0, last_off = 0;
int source_length = strlen(source);
int count = 0;
target.clear();
while((off = search(source, source_length, last_off, false)) != -1)
{
if (off > last_off)
{
target.append(source + last_off, off - last_off);
}
target.append(replacement);
count++;
last_off = off;
}
if (last_off < source_length)
{
target.append(source + last_off, source_length - last_off);
}
return count;
}
int RX::replace(const char* source, replace_function_t replacer, string& target)
{
int off = 0, last_off = 0;
int source_length = strlen(source);
int count = 0;
int last_match_start = 0;
target.clear();
while((off = search(source, source_length, last_off, false)) != -1)
{
int match_start = m_regs->start[0];
if (match_start > last_off)
{
target.append(source + last_off, match_start - last_off);
}
string m; submatch(source, 0, m);
replacer(m, target);
count++;
last_off = off;
last_match_start = match_start;
}
if (last_off < source_length)
{
target.append(source + last_off, source_length - last_off);
}
return count;
}
bool RX::match(const char* s)
{
int off = re_match(m_buffer, s, strlen(s), 0, m_regs);
if (off <= -1)
{
m_buffer->re_nsub = (size_t) -1;
return false;
}
else
{
m_content = s; // make a copy of the entire string
return true;
}
}
bool RX::submatch(unsigned int index, string& sub, int offset) const
{
return submatch(m_content, index, sub, offset);
}
/**
* extracts sub-string that has been matched by one of grouping operators '()'
* returns true if index is valid.
* sub-expression indexes start with 1, zero means entire matched text.
* in case if index is out of range, the output string is emptied.
*/
bool RX::submatch(const char* origin, unsigned int index, string& sub, int offset) const
{
if (index <= m_buffer->re_nsub)
{
sub.paste(origin, m_regs->start[index]+offset, m_regs->end[index]);
return true;
}
else
{
sub.clear();
return false;
}
}
bool RX::match(const char* s, string& sub1)
{
if (!match(s)) return false;
submatch(1, sub1);
return true;
}
bool RX::match(const char* s, string& sub1, string& sub2)
{
if (!match(s)) return false;
submatch(1, sub1);
submatch(2, sub2);
return true;
}
bool RX::match(const char* s, string& sub1, string& sub2, string& sub3)
{
if (!match(s)) return false;
submatch(1, sub1);
submatch(2, sub2);
submatch(3, sub3);
return true;
}
bool RX::match(const char* s, string& sub1, string& sub2, string& sub3, string& sub4)
{
if (!match(s)) return false;
submatch(1, sub1);
submatch(2, sub2);
submatch(3, sub3);
submatch(4, sub4);
return true;
}
bool RX::match(const char* s, string& sub1, string& sub2, string& sub3, string& sub4, string& sub5)
{
if (!match(s)) return false;
submatch(1, sub1);
submatch(2, sub2);
submatch(3, sub3);
submatch(4, sub4);
submatch(5, sub5);
return true;
}
bool RX::match(const char* s, string_array& subs)
{
if (!match(s)) return false;
subs.clear();
for(unsigned int i=1; i<=m_buffer->re_nsub; i++)
{
int start = m_regs->start[i];
int end = m_regs->end[i];
subs.add(new string(s + start, end - start));
}
return true;
}
void RX::shrink()
{
if (m_buffer->buffer != NULL) free(m_buffer->buffer);
if (m_buffer->fastmap != NULL) free(m_buffer->fastmap);
if (m_regs->end != NULL) free(m_regs->end);
if (m_regs->start != NULL) free(m_regs->start);
m_content.clear();
ctor();
}
RX::~RX()
{
shrink();
if (m_pattern_buffer != NULL) free(m_pattern_buffer);
if (m_registers != NULL) free(m_registers);
}
static void dehtmlifier(const string& submatch, string& target)
{
const char* sm = submatch.cstr();
if (*sm == '<') return;
else if (submatch.ieq("<")) target.append("<");
else if (submatch.ieq(">")) target.append(">");
else if (submatch.ieq("&")) target.append("&");
else if (submatch.ieq(" ")) target.append(" ");
else if (sm[0] == '&' && sm[1] == '#')
{
int code = atoi(sm+2) & 0xff;
target.append( (char)code );
}
}
void dehtmlify(const char* source, string& target)
{
RX tags("(<[^>]+>)|(<)|(>)|(&)|( )|(&#[0-9]+;)");
tags.replace(source, dehtmlifier, target);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -