⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmloutputdev.cc

📁 将pdf文档转换为高质量的html文档
💻 CC
📖 第 1 页 / 共 3 页
字号:
//========================================================================//// HtmlOutputDev.cc//// Copyright 1997-2002 Glyph & Cog, LLC//// Changed 1999-2000 by G.Ovtcharov//// Changed 2002 by Mikhail Kruk////========================================================================#ifdef __GNUC__#pragma implementation#endif#include <stdio.h>#include <stdlib.h>#include <stdarg.h>#include <stddef.h>#include <ctype.h>#include <math.h>#include "GString.h"#include "GList.h"#include "UnicodeMap.h"#include "gmem.h"#include "config.h"#include "Error.h"#include "GfxState.h"#include "GlobalParams.h"#include "HtmlOutputDev.h"#include "HtmlFonts.h"int HtmlPage::pgNum=0;int HtmlOutputDev::imgNum=1;extern double scale;extern GBool complexMode;extern GBool ignore;extern GBool printCommands;extern GBool printHtml;extern GBool noframes;extern GBool stout;extern GBool xml;extern GBool showHidden;extern GBool noMerge;static GString* basename(GString* str){    char *p=str->getCString();  int len=str->getLength();  for (int i=len-1;i>=0;i--)    if (*(p+i)==SLASH)       return new GString((p+i+1),len-i-1);  return new GString(str);}static GString* Dirname(GString* str){    char *p=str->getCString();  int len=str->getLength();  for (int i=len-1;i>=0;i--)    if (*(p+i)==SLASH)       return new GString(p,i+1);  return new GString();} //------------------------------------------------------------------------// HtmlString//------------------------------------------------------------------------HtmlString::HtmlString(GfxState *state, double fontSize, double _charspace, HtmlFontAccu* fonts) {  GfxFont *font;  double x, y;  state->transform(state->getCurX(), state->getCurY(), &x, &y);  if ((font = state->getFont())) {    yMin = y - font->getAscent() * fontSize;    yMax = y - font->getDescent() * fontSize;    GfxRGB rgb;    state->getFillRGB(&rgb);    GString *name = state->getFont()->getName();    if (!name) name = HtmlFont::getDefaultFont(); //new GString("default");   // HtmlFont hfont=HtmlFont(name, static_cast<int>(fontSize-1),_charspace, rgb);    HtmlFont hfont=HtmlFont(name, static_cast<int>(fontSize-1),0.0, rgb);    fontpos = fonts->AddFont(hfont);  } else {    // this means that the PDF file draws text without a current font,    // which should never happen    yMin = y - 0.95 * fontSize;    yMax = y + 0.35 * fontSize;    fontpos=0;  }  if (yMin == yMax) {    // this is a sanity check for a case that shouldn't happen -- but    // if it does happen, we want to avoid dividing by zero later    yMin = y;    yMax = y + 1;  }  col = 0;  text = NULL;  xRight = NULL;  link = NULL;  len = size = 0;  yxNext = NULL;  xyNext = NULL;  strSize = 0;  htext=new GString();  htext2=new GString();  dir = textDirUnknown;}HtmlString::~HtmlString() {  delete text;  delete htext;  delete htext2;//  delete strSize;  gfree(xRight);}void HtmlString::addChar(GfxState *state, double x, double y,			 double dx, double dy, Unicode u) {  if (dir == textDirUnknown) {    dir = UnicodeMap::getDirection(u);  }   if (len == size) {    size += 16;    text = (Unicode *)grealloc(text, size * sizeof(Unicode));    xRight = (double *)grealloc(xRight, size * sizeof(double));  }  text[len] = u;  if (len == 0) {    xMin = x;  }  xMax = xRight[len] = x + dx;  //xMax = xRight[len] = x;  ++strSize;//printf("added char: %f %f xright = %f\n", x, dx, x+dx);  ++len;}void HtmlString::endString(){  if( dir == textDirRightLeft && len > 1 )  {    //printf("will reverse!\n");    for (int i = 0; i < len / 2; i++)    {      Unicode ch = text[i];      text[i] = text[len - i - 1];      text[len - i - 1] = ch;    }  }}//------------------------------------------------------------------------// HtmlPage//------------------------------------------------------------------------HtmlPage::HtmlPage(GBool rawOrder, char *imgExtVal) {  this->rawOrder = rawOrder;  curStr = NULL;  yxStrings = NULL;  xyStrings = NULL;  yxCur1 = yxCur2 = NULL;  fonts=new HtmlFontAccu();  links=new HtmlLinks();  pageWidth=0;  pageHeight=0;  fontsPageMarker = 0;  DocName=NULL;  firstPage = -1;  imgExt = new GString(imgExtVal);}HtmlPage::~HtmlPage() {  clear();  if (DocName) delete DocName;  if (fonts) delete fonts;  if (links) delete links;  if (imgExt) delete imgExt;  }void HtmlPage::updateFont(GfxState *state) {  GfxFont *font;  double *fm;  char *name;  int code;  double w;    // adjust the font size  fontSize = state->getTransformedFontSize();  if ((font = state->getFont()) && font->getType() == fontType3) {    // This is a hack which makes it possible to deal with some Type 3    // fonts.  The problem is that it's impossible to know what the    // base coordinate system used in the font is without actually    // rendering the font.  This code tries to guess by looking at the    // width of the character 'm' (which breaks if the font is a    // subset that doesn't contain 'm').    for (code = 0; code < 256; ++code) {      if ((name = ((Gfx8BitFont *)font)->getCharName(code)) &&	  name[0] == 'm' && name[1] == '\0') {	break;      }    }    if (code < 256) {      w = ((Gfx8BitFont *)font)->getWidth(code);      if (w != 0) {	// 600 is a generic average 'm' width -- yes, this is a hack	fontSize *= w / 0.6;      }    }    fm = font->getFontMatrix();    if (fm[0] != 0) {      fontSize *= fabs(fm[3] / fm[0]);    }  }}void HtmlPage::beginString(GfxState *state, GString *s) {  curStr = new HtmlString(state, fontSize,charspace, fonts);}void HtmlPage::conv(){  HtmlString *tmp;  int linkIndex = 0;  HtmlFont* h;  for(tmp=yxStrings;tmp;tmp=tmp->yxNext){     int pos=tmp->fontpos;     //  printf("%d\n",pos);     h=fonts->Get(pos);     if (tmp->htext) delete tmp->htext;      tmp->htext=HtmlFont::simple(h,tmp->text,tmp->len);     tmp->htext2=HtmlFont::simple(h,tmp->text,tmp->len);     if (links->inLink(tmp->xMin,tmp->yMin,tmp->xMax,tmp->yMax, linkIndex)){       tmp->link = links->getLink(linkIndex);       /*GString *t=tmp->htext;       tmp->htext=links->getLink(k)->Link(tmp->htext);       delete t;*/     }  }}void HtmlPage::addChar(GfxState *state, double x, double y,		       double dx, double dy, 			double ox, double oy, Unicode *u, int uLen) {  double x1, y1, w1, h1, dx2, dy2;  int n, i, d;  state->transform(x, y, &x1, &y1);  n = curStr->len;  d = 0;   // check that new character is in the same direction as current string  // and is not too far away from it before adding /*  if ((UnicodeMap::getDirection(u[0]) != curStr->dir) ||      (n > 0 &&       fabs(x1 - curStr->xRight[n-1]) > 0.1 * (curStr->yMax - curStr->yMin))) {    endString();    beginString(state, NULL);  }*/  state->textTransformDelta(state->getCharSpace() * state->getHorizScaling(),			    0, &dx2, &dy2);  dx -= dx2;  dy -= dy2;  state->transformDelta(dx, dy, &w1, &h1);  if (uLen != 0) {    w1 /= uLen;    h1 /= uLen;  }/* if (d != 3) { endString(); beginString(state, NULL); }*/  for (i = 0; i < uLen; ++i)   {	if (u[i] == ' ')        {	    endString();	    beginString(state, NULL);	} else {	    curStr->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]);        }   }/*  for (i = 0; i < uLen; ++i) {    curStr->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]);  }*/}void HtmlPage::endString() {  HtmlString *p1, *p2;  double h, y1, y2;  // throw away zero-length strings -- they don't have valid xMin/xMax  // values, and they're useless anyway  if (curStr->len == 0) {    delete curStr;    curStr = NULL;    return;  }  curStr->endString();#if 0 //~tmp  if (curStr->yMax - curStr->yMin > 20) {    delete curStr;    curStr = NULL;    return;  }#endif  // insert string in y-major list  h = curStr->yMax - curStr->yMin;  y1 = curStr->yMin + 0.5 * h;  y2 = curStr->yMin + 0.8 * h;  if (rawOrder) {    p1 = yxCur1;    p2 = NULL;  } else if ((!yxCur1 ||              (y1 >= yxCur1->yMin &&               (y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) &&             (!yxCur2 ||              (y1 < yxCur2->yMin ||               (y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) {    p1 = yxCur1;    p2 = yxCur2;  } else {    for (p1 = NULL, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) {      if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin))        break;    }    yxCur2 = p2;  }  yxCur1 = curStr;  if (p1)    p1->yxNext = curStr;  else    yxStrings = curStr;  curStr->yxNext = p2;  curStr = NULL;}void HtmlPage::coalesce() {  HtmlString *str1, *str2;  HtmlFont *hfont1, *hfont2;  double space, horSpace, vertSpace, vertOverlap;  GBool addSpace, addLineBreak;  int n, i;  double curX, curY, lastX, lastY;  int sSize = 0;        double diff = 0.0;  double pxSize = 0.0;  double strSize = 0.0;  double cspace = 0.0;#if 0 //~ for debugging  for (str1 = yxStrings; str1; str1 = str1->yxNext) {    printf("x=%f..%f  y=%f..%f  size=%2d '",	   str1->xMin, str1->xMax, str1->yMin, str1->yMax,	   (int)(str1->yMax - str1->yMin));    for (i = 0; i < str1->len; ++i) {      fputc(str1->text[i] & 0xff, stdout);    }    printf("'\n");  }  printf("\n------------------------------------------------------------\n\n");#endif  str1 = yxStrings;  if( !str1 ) return;  //----- discard duplicated text (fake boldface, drop shadows)  if( !complexMode )  {	HtmlString *str3;	GBool found;  	while (str1)	{		double size = str1->yMax - str1->yMin;		double xLimit = str1->xMin + size * 0.2;		found = gFalse;		for (str2 = str1, str3 = str1->yxNext;			str3 && str3->xMin < xLimit;			str2 = str3, str3 = str2->yxNext)		{			if (str3->len == str1->len &&				!memcmp(str3->text, str1->text, str1->len * sizeof(Unicode)) &&				fabs(str3->yMin - str1->yMin) < size * 0.2 &&				fabs(str3->yMax - str1->yMax) < size * 0.2 &&				fabs(str3->xMax - str1->xMax) < size * 0.2)			{				found = gTrue;				//printf("found duplicate!\n");				break;			}		}		if (found)		{			str2->xyNext = str3->xyNext;			str2->yxNext = str3->yxNext;			delete str3;		}		else		{			str1 = str1->yxNext;		}	}		  }    str1 = yxStrings;    hfont1 = getFont(str1);  str1->htext2->append(str1->htext);  if( str1->getLink() != NULL ) {    GString *ls = str1->getLink()->getLinkStart();    str1->htext->insert(0, ls);    delete ls;  }  curX = str1->xMin; curY = str1->yMin;  lastX = str1->xMin; lastY = str1->yMin;  while (str1 && (str2 = str1->yxNext)) {    hfont2 = getFont(str2);    space = str1->yMax - str1->yMin;    horSpace = str2->xMin - str1->xMax;    addLineBreak = !noMerge && (fabs(str1->xMin - str2->xMin) < 0.4);    vertSpace = str2->yMin - str1->yMax;//printf("coalesce %d %d %f? ", str1->dir, str2->dir, d);    if (str2->yMin >= str1->yMin && str2->yMin <= str1->yMax)    {	vertOverlap = str1->yMax - str2->yMin;    } else    if (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax)    {	vertOverlap = str2->yMax - str1->yMin;    } else    {    	vertOverlap = 0;    }         if (	(	 (	  (	   (rawOrder && vertOverlap > 0.5 * space) 	   ||	   (!rawOrder && str2->yMin < str1->yMax)	  ) &&	  (horSpace > -0.5 * space && horSpace < space)	 ) ||       	 (vertSpace >= 0 && vertSpace < 0.5 * space && addLineBreak)	) && // in complex mode fonts must be the same, in other modes fonts do not metter	str1->dir == str2->dir // text direction the same       )     {     diff = str2->xMax - str1->xMin;     n = str1->len + str2->len;     if ((addSpace = horSpace > 0.1 * space)) {        ++n;      }          if (addLineBreak) {        ++n;      }        str1->size = (n + 15) & ~15;      str1->text = (Unicode *)grealloc(str1->text,				       str1->size * sizeof(Unicode));      str1->xRight = (double *)grealloc(str1->xRight,					str1->size * sizeof(double));      if (addSpace) {		/*  if (addSpace > (xoutRoundLower(hfont1->getSize()/scale)))		  {		  	str1->text[str1->len] = 0x20;			str1->htext->append(" ");			str1->htext2->append(" ");			str1->xRight[str1->len] = str2->xMin;			++str1->len;			++str1->strSize;		 } */  	   	 str1->text[str1->len] = 0x20;                 str1->htext->append(" ");                 str1->htext2->append(" ");                 str1->xRight[str1->len] = str2->xMin;                 ++str1->len;                ++str1->strSize;      }      if (addLineBreak) {	  str1->text[str1->len] = '\n';	  str1->htext->append("<br>");	  str1->htext2->append(" ");	  str1->xRight[str1->len] = str2->xMin;	  ++str1->len;	  str1->yMin = str2->yMin;	  str1->yMax = str2->yMax;	  str1->xMax = str2->xMax;	  int fontLineSize = hfont1->getLineSize();	  int curLineSize = (int)(vertSpace + space); 	  if( curLineSize != fontLineSize )	  {	      HtmlFont *newfnt = new HtmlFont(*hfont1);	      newfnt->setLineSize(curLineSize);	      str1->fontpos = fonts->AddFont(*newfnt);	      delete newfnt;	      hfont1 = getFont(str1);	      // we have to reget hfont2 because it's location could have	      // changed on resize  GStri;ng *iStr=GString::fromInt(i);	      hfont2 = getFont(str2); 	  }      }      str1->htext2->append(str2->htext2);      HtmlLink *hlink1 = str1->getLink();      HtmlLink *hlink2 = str2->getLink();      GString *fntFix;      GString *iStr=GString::fromInt(str2->fontpos);           fntFix = new GString("</span><span class=\"ft");      fntFix->append(iStr);      fntFix->append("\">");      if (((hlink1 == NULL) && (hlink2 == NULL)) && (hfont1->isEqualIgnoreBold(*hfont2) == gFalse))

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -