⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 suffixtree.cpp

📁 doxygen(一个自动从源代码生成文档的工具)的源代码
💻 CPP
字号:
/****************************************************************************** * * $Id: suffixtree.cpp,v 1.10 2001/03/19 19:27:41 root Exp $ * * Copyright (C) 1997-2001 by Dimitri van Heesch. * * Permission to use, copy, modify, and distribute this software and its * documentation under the terms of the GNU General Public License is hereby  * granted. No representations are made about the suitability of this software  * for any purpose. It is provided "as is" without express or implied warranty. * See the GNU General Public License for more details. * * Documents produced by Doxygen are derivative works derived from the * input used in their production; they are not affected by this license. * */#include <stdio.h>#include "qtbc.h"#include "suffixtree.h"#define MAXWORDLEN 1024//----------------------------------------------------------------------------bool writeString(QFile &f,const char *s){  int len=strlen(s)+1;  return (f.writeBlock(s,len)!=len);}bool writeNumber(QFile &f,int num){  return (f.putch((num>>24)&0xff)==-1) ||         (f.putch((num>>16)&0xff)==-1) ||         (f.putch((num>>8)&0xff)==-1)  ||         (f.putch(num&0xff)==-1);}static bool writeEncodedNumber(QFile &f,uint number){  bool error=FALSE;  uint n=number;  while (n>=128)  {    int frac=n&0x7f;    error = error || (f.putch(frac|0x80)==-1);    n>>=7;  }  error = error || (f.putch(n)==-1);   return error; }static int encodedNumberSize(uint number){  uint n=number;  int size=1;  while (n>=128) { size++; n>>=7; }  return size;}//----------------------------------------------------------------------------int SuffixNodeList::compareItems(GCI item1,GCI item2){  SuffixNode *n1=(SuffixNode *)item1;  SuffixNode *n2=(SuffixNode *)item2;  return strcmp(n1->label,n2->label);}SuffixNode::SuffixNode(const char *lab) : references(0){  children = new SuffixNodeList;  children->setAutoDelete(TRUE);   label=lab;  totalFreq=0;  branchOffset=0;}SuffixNode::~SuffixNode() {  delete children;}void SuffixNode::addReference(int refId,int inName,int fullWord){  totalFreq++;  uint s=references.size();  if (s>0 && references.at(s-1).id==refId) // word occured in the same document  {    references.at(s-1).freq++;             // increase word's frequency    references.at(s-1).flags=((references.at(s-1).flags & INNAME_MASK)                               | (inName<<INNAME_BIT))                         +((references.at(s-1).flags & FULLWORD_MASK)                               | (fullWord<<FULLWORD_BIT))                         +((references.at(s-1).flags & WORDINNAME_MASK)                               | ((inName & fullWord)<<WORDINNAME_BIT));  }  else  {    references.resize(s+1);    references.at(s).id=refId;    references.at(s).freq=1;     references.at(s).flags=(inName<<INNAME_BIT)                       +(fullWord<<FULLWORD_BIT)                       +((inName && fullWord)<<WORDINNAME_BIT);  }}int SuffixNode::insert(const char *word,int refId,int inName,int fullWord){  int numNewNodes=0;  //printf("SuffixNode::insert(%s,%d)\n",word,refId);  SuffixNode *sn=children->first();  while (sn)  {    const char *lab=sn->label.data();    char w=word[0],l=lab[0],i=0;    while (w!=0 && l!=0 && w==l) { i++; w=word[i]; l=lab[i]; }    if (w==0 && l==0) // match found    {      sn->addReference(refId,inName,fullWord);      return numNewNodes;    }    if (i>0) // w and l contain a common prefix of length i    {      if (l==0) // w!=0 => follow this branch      {        sn->addReference(refId,inName,FALSE);        numNewNodes+=sn->insert(&word[i],refId,inName,fullWord);      }      else // l!=0 => split branch      {        char leftlab[MAXWORDLEN];        memcpy(leftlab,lab,i);        leftlab[i]='\0';        SuffixNode *r  = new SuffixNode(leftlab);        numNewNodes++;        SuffixNode *n2 = children->take();        // copy reference info        r->references  = n2->references.copy();        int j,refSize  = r->references.size();        for (j=0;j<refSize;j++)         {          //r->references[j].fullWord=FALSE;          //r->references[j].wordInName=FALSE;          r->references[j].flags &= ~(FULLWORD_MASK|WORDINNAME_MASK);        }        r->totalFreq   = n2->totalFreq;        //printf("root branch `%s'\n",leftlab);        if (w!=0) // two sub branches        {          SuffixNode *n1 = new SuffixNode(&word[i]);          numNewNodes++;          n1->addReference(refId,inName,fullWord);          r->addReference(refId,inName,FALSE);          r->children->append(n1);          //printf("Right branch `%s'\n",&word[i]);        }        else // one sub branch        {          r->addReference(refId,inName,fullWord);        }        //printf("Left branch `%s'\n",&lab[i]);        n2->label=&lab[i];        r->children->append(n2);        children->append(r);      }      return numNewNodes;    }    sn=children->next();  }  //printf("new branch `%s'\n",word);  SuffixNode *n=new SuffixNode(word);  numNewNodes++;  n->addReference(refId,inName,fullWord);  children->append(n);  return numNewNodes;}void SuffixNode::dump(int level,const char *prefix){  uint i;  if (references.size()>0)  {    printf("%s (level=%d offset=%d freq=%d) ",           prefix,level,branchOffset,totalFreq);    for (i=0;i<references.size();i++)       printf("%d->%d ",references.at(i).id,references.at(i).freq);    printf("\n");  }  SuffixNode *sn=children->first();  while (sn)  {    sn->dump(level+1,prefix+("-"+sn->label));     sn=children->next();  } }void SuffixNode::resolveForwardReferences(int &offset){  if (children->count()>0)  {    if (!label.isEmpty()) offset++; // terminator for the previous level    branchOffset=offset;  }  else    branchOffset=0;  SuffixNode *sn=children->first();  while (sn)  {    offset+=sn->label.length()+5;    uint i,refs=sn->references.size();    if (refs>0)    {      offset+=encodedNumberSize(sn->totalFreq);      offset+=encodedNumberSize((sn->references[0].id<<3)+                                 sn->references[0].flags);      offset+=encodedNumberSize(sn->references[0].freq);      for (i=1;i<refs;i++)      {        offset+=encodedNumberSize(                   ((sn->references.at(i).id - sn->references.at(i-1).id)<<3)+                     sn->references.at(i).flags);        offset+=encodedNumberSize(sn->references.at(i).freq);      }      offset+=encodedNumberSize(0);    }    //printf("Lab=%s offset=%d\n",sn->lab.data(),offset);    sn=children->next();  }   sn=children->first();  while (sn)  {    //printf("Lab=%s offset=%d\n",sn->lab.data(),offset);    sn->resolveForwardReferences(offset);    sn=children->next();  } }int SuffixNode::size(){  int s=0;  if (!label.isEmpty() && children->count()>0) s++; // for the terminator  SuffixNode *sn=children->first();  while (sn)  {    uint i,refs=sn->references.size();    s+=sn->size()+sn->label.length()+5;    if (refs>0)    {      s+=encodedNumberSize(sn->totalFreq);      s+=encodedNumberSize(                  (sn->references[0].id<<3)+                   sn->references[0].flags);      s+=encodedNumberSize(sn->references[0].freq);      for (i=1;i<refs;i++)      {        s+=encodedNumberSize(                ((sn->references.at(i).id - sn->references.at(i-1).id)<<3)+                  sn->references.at(i).flags);        s+=encodedNumberSize(sn->references.at(i).freq);      }      s+=encodedNumberSize(0);    }    sn=children->next();  }   return s;}bool SuffixNode::write(QFile &f){  bool error=FALSE;  if (children->count()>0 && !label.isEmpty()) error=error || (f.putch(0)==-1);  SuffixNode *sn=children->first();  while (sn)  {    //offset+=sn->lab.length()+1+2*sizeof(int);    int i,refs=sn->references.size();    error=error || writeString(f,sn->label);    error=error || writeNumber(f,sn->branchOffset|((refs==0)?0x80000000:0));    if (refs>0)    {      error=error || writeEncodedNumber(f,sn->totalFreq);      error=error || writeEncodedNumber(f,                 (sn->references[0].id<<3)+                  sn->references[0].flags);      error=error || writeEncodedNumber(f,sn->references[0].freq);      for (i=1;i<refs;i++)      {        error=error || writeEncodedNumber(f,                ((sn->references[i].id - sn->references[i-1].id)<<3)+                  sn->references[i].flags);        error=error || writeEncodedNumber(f,sn->references[i].freq);      }      error=error || writeEncodedNumber(f,0);    }    //printf("Lab=%s offset=%d\n",sn->lab.data(),offset);    sn=children->next();  }   sn=children->first();  while (sn)  {    error=error || sn->write(f);    sn=children->next();  }   return error;}//----------------------------------------------------------------------------SuffixTree::SuffixTree(){  root=new SuffixNode("");  nodes=1;}SuffixTree::~SuffixTree(){  delete root;}void SuffixTree::insertWord(const char *word,int index,bool inName){  QCString suffix=word;  uint i;  for (i=2;i<suffix.length();i++)   {    //printf("Inserting suffix %s\n",suffix.right(i).data());    nodes+=root->insert(suffix.right(i),index,inName?1:0,FALSE);  }  nodes+=root->insert(word,index,inName?1:0,TRUE);}void SuffixTree::dump(){  root->dump(0,"");}void SuffixTree::resolveForwardReferences(){  int offset=8;  root->resolveForwardReferences(offset);}int SuffixTree::size(){  return root->size();}bool SuffixTree::write(QFile &f){  if (!f.isOpen()) { printf("File not open\n"); return FALSE; }  bool error=FALSE;  error = error || root->write(f);  return !error;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -