📄 gsar.cpp
字号:
/* gsarbmg.c *************************************** updated: 2004-12-07 20:29 TT
*
* Subroutines for fast string searching; no regular expressions
*
* Adapted from:
*
* Boyer/Moore/Gosper-assisted 'egrep' search, with delta0 table as in
* original paper (CACM, October, 1977). No delta1 or delta2. According to
* experiment (Horspool, Soft. Prac. Exp., 1982), delta2 is of minimal
* practical value. However, to improve for worst case input, integrating
* the improved Galil strategies (Apostolico/Giancarlo, Siam. J. Comput.,
* February 1986) deserves consideration.
*
* James A. Woods
* NASA Ames Research Center
*
* Shub-Nigurrath of ARTeam
*
* December 2005 Shub-Nigurrath
* adapted it to C++ and added memory search support
* 29 April 1986 Jeff Mogul Stanford
* Adapted as a set of subroutines for use by a program. No
* regular-expression support.
*
* 12 February 1992 Tormod Tjaberg
* Used parts of the original routines to implement extremely fast
* file search & replace mechanisms on ASCII & non ASCII files taking
* care not to 'chop' up the search pattern.
*
* Note:
* If a file consists of the following bytes: 'abrabra' a search for
* 'abra' will yield two matches. However if we are to replace 'abra'
* with 'foobar' only one occurrence will be changed and the output
* file will contain 'foobarbra'.
*
* Copyright (C) 1992-2002 Tormod Tjaberg
* This is free software, distributed under the terms of the
* GNU General Public License. For details see the file COPYING
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <sys/types.h>
#include "comp_dep.h"
#include "gsar.h"
BMG_gsar::BMG_gsar() {
BMG_Patlen=0;
memset(BMG_Pattern, 0, PAT_BUFSIZ*sizeof(unsigned char));
memset(BMG_Delta0, 0, 256*sizeof(int));
memset(BMG_Buffer, 0, (BUFSIZ + PAT_BUFSIZ)*sizeof(unsigned char)); /* search buffer */
memset(BMG_Cmap, 0, 256*sizeof(unsigned char));
}
/* Input : pCtrl - pointer to structure containg output and ctrl info
* FileOfs - actual offset in file
* BufOfs - match offset in search buffer
* pStart - pointer to start of the search buffer
* pEnd - pointer to end of the search buffer
*
* Returns: nothing
*
* Displays buffer information (filename, offset, context) according
* to the flags set in the structure. i.e. be a bit verbose.
*/
void BMG_gsar::Verbose(OUTPUT_CTRL *pCtrl, unsigned long FileOfs, int BufOfs,
unsigned char *pStart, unsigned char *pEnd)
{
if (pCtrl->fFileSpec) /* display file name */
fprintf(pCtrl->fpMsg, "%s: ", pCtrl->pInputFile);
if (pCtrl->fByteOffset) /* display byte offset */
fprintf(pCtrl->fpMsg, "0x%lx%s",
FileOfs + BufOfs,
(pCtrl->fTextual) ? ": " : "");
VerboseBase(pCtrl, FileOfs, BufOfs, pStart, pEnd);
}
void BMG_gsar::VerboseBase(OUTPUT_CTRL_BASE *pCtrl, unsigned long FileOfs, int BufOfs,
unsigned char *pStart, unsigned char *pEnd)
{
unsigned char *pSC; /* start of context */
unsigned char *pEC; /* end of context */
unsigned char *pLastSC; /* last start of context */
unsigned long CtxOfs; /* context offset */
int i; /* loop counter */
/* Display a textual or a hexadecimal context
*/
if (pCtrl->fTextual || pCtrl->fHex)
{
pSC = pStart + BufOfs - pCtrl->Context / 2 + BMG_Patlen / 2;
if (pSC < pStart) /* outside the buffer ? */
pSC = pStart;
pEC = pSC + pCtrl->Context;
if (pEC > pEnd) /* outside the buffer ? */
{
pEC = pEnd;
/* if we have to truncate to pEnd readjust the start
* of the context if possible.
*/
if (pEC - pCtrl->Context > pStart)
pSC = pEC - pCtrl->Context;
}
/* display a hexadecimal context
*/
if (pCtrl->fHex)
{
fputc('\n', pCtrl->fpMsg);
CtxOfs = FileOfs + (pSC - pStart);
while (pSC != pEC)
{
pLastSC = pSC; /* remember where we started */
fprintf(pCtrl->fpMsg, "0x%08lx: ", CtxOfs); /* hex offset */
for (i = 0; i < 16; i++) /* display 16 hex digits */
{
if (pSC != pEC)
fprintf(pCtrl->fpMsg, "%02x ", (unsigned char) * pSC++);
else
fprintf(pCtrl->fpMsg, " ");
}
pSC = pLastSC; /* start again */
for (i = 0; i < 16; i++) /* display 16 characters */
{
if (pSC != pEC)
{
#ifdef MSDOS /* MSDOS can display all characters except CTRL chars */
if (!iscntrl((int) * pSC))
#else /* its __UNIX__ */
if (isprint((int) * pSC))
#endif
fputc(*pSC, pCtrl->fpMsg);
else
fputc('.', pCtrl->fpMsg);
pSC++;
}
}
CtxOfs += 16; /* increment context offset by 16 */
fputc('\n', pCtrl->fpMsg);
}
}
/* display textual context...
*/
if (pCtrl->fTextual)
{
while (pSC != pEC)
{
#ifdef MSDOS /* MSDOS can display all characters except CTRL chars */
if (!iscntrl((int) * pSC))
#else /* its __UNIX__ */
if (isprint((int) * pSC))
#endif
fputc(*pSC, pCtrl->fpMsg);
else
fputc('.', pCtrl->fpMsg);
pSC++;
}
}
}
if (!pCtrl->fHex)
fputc('\n', pCtrl->fpMsg);
}
/* Input : pCtrl - pointer to structure containg output and ctrl info
* Returns : number of matches found in file
*
* The pattern to search for must already have been set up using BMG_Setup
*
* Works by applying the BMG algorithm to a buffer. To ensure the pattern
* is not inadvertently chopped up, BMG_Patlen - 1 bytes is always moved
* to the start of the buffer. The next time we fill the buffer we fill it
* with BUFSIZ - (BMG_Patlen - 1) bytes.
*/
long BMG_gsar::BMG_Search(OUTPUT_CTRL *pCtrl)
{
register unsigned char *k;
register unsigned char *s;
register unsigned char *strend;
register int j;
int nTrans = 0; /* number of bytes to transfer to the start of the buffer */
int BufOfs; /* buffer offset for each match */
int Cnt = BUFSIZ;
long nMatches = 0; /* number of matches found */
long nBytes; /* number of bytes read */
unsigned long FileOfs = 0; /* current file offset */
for (;;)
{
nBytes = fread(&BMG_Buffer[nTrans], 1, (size_t) Cnt, pCtrl->fpIn);
if (!nBytes)
break;
s = BMG_Buffer;
strend = s + nBytes + nTrans;
k = BMG_Buffer + BMG_Patlen - 1;
for (;;)
{
while ((k += BMG_Delta0[ *(unsigned char *) k]) < strend)
;
if (k < (BMG_Buffer + LARGE))
break;
k -= LARGE;
j = BMG_Patlen - 1;
s = k - 1;
while (BMG_Cmap[ *s--] == BMG_Pattern[--j])
;
if (j >= 0)
k++;
else
{
if (k >= strend)
break;
/* found submatch, k is on the last letter in the match */
BufOfs = k - BMG_Buffer + 1 - BMG_Patlen;
nMatches++;
if (pCtrl->fVerbose)
Verbose(pCtrl, FileOfs, BufOfs, BMG_Buffer, strend);
// This modification is essential to bring to the caller the offset of the place found!
pCtrl->fLastOffset= FileOfs + BufOfs;
k++;
}
}
nTrans = BMG_Patlen - 1;
memcpy(BMG_Buffer, strend - nTrans, nTrans); /* move remaining bytes to the start */
Cnt = BUFSIZ - nTrans;
FileOfs += Cnt; /* calculate file offset */
}
return nMatches;
}
/* Input : pCtrl - pointer to structure containg output and ctrl info
* ReplaceBuf - pointer to buffer which contains replacement
* nReplace - number of bytes in replace buffer
*
* Returns : number of matches & replaces performed
* -1 if error in fwrite, disk might be full, or removed
*
* The pattern to search for must already have been set up using BMG_Setup
*
* Works by applying the BMG algorithm to a buffer. To ensure the pattern
* is not inadvertently chopped up we have to be a little careful. Since
* we're doing a search and replace we can't copy BMG_Patlen - 1 bytes
* always. Consider the following:
*
* We're searching for 'aa' in the string 'aaaa'
* This will give a result of 3 matches namely: a[0]a[1] a[1]a[2] a[2]a[3]
*
* But if we're searching for 'aa' and replacing with 'xx' in 'aaaa'
* This will give a result of 2 matches and the new buffer: 'xxxx'
* After a match of a[0]a[1] we must start the next search at a[2].
*
* So if we have a match at the exact end of the buffer. We must
* not transfer anything to the start.
*
* Use the following algorithm:
*
* Calculate the distance between the last match and the end of the buffer
* and call this distance n.
*
* n = end of buffer - last match
* if n >= Bmg_Patlen we transfer BMG_Patlen - 1 bytes to the start
* if n < Bmg_Patlen we transfer n bytes to the start of the buffer
*/
long BMG_gsar::BMG_SearchReplace(OUTPUT_CTRL *pCtrl, char *pReplaceBuf, unsigned short nReplace)
{
register unsigned char *k;
register unsigned char *s;
register unsigned char *strend;
register int j;
register int n;
unsigned char *pLast;
int nTrans = 0; /* number of bytes to transfer to the start of the buffer */
int BufOfs; /* buffer offset for each match */
int Cnt = BUFSIZ;
long nMatches = 0; /* number of matches found */
long nBytes; /* number of bytes read */
unsigned long FileOfs = 0; /* current file offset */
for (;;)
{
nBytes = (unsigned long) fread(&BMG_Buffer[nTrans], sizeof(unsigned char), (size_t) Cnt, pCtrl->fpIn);
if (!nBytes)
{
if (fwrite(BMG_Buffer, sizeof(unsigned char), nTrans, pCtrl->fpOut) != (size_t)nTrans)
return -1;
break;
}
s = BMG_Buffer;
pLast = s;
strend = s + nBytes + nTrans;
k = BMG_Buffer + BMG_Patlen - 1;
for (;;)
{
while ((k += BMG_Delta0[*(unsigned char *) k]) < strend)
;
if (k < (BMG_Buffer + LARGE))
break;
k -= LARGE;
j = BMG_Patlen - 1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -