📄 rtf2htmlconverter.cpp
字号:
// RTF2HTMLConverter.cpp: implementation of the CRTF_HTMLConverter class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "RTF2HTMLTree.h"
#include "RTF2HTMLConverter.h"
#include "Util.h"
#include "..\..\shared\HtmlCharMap.h"
#include "..\..\shared\misc.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
const LPCTSTR MULTIBYTETAG = "\\'";
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CRTF_HTMLConverter::CRTF_HTMLConverter(TMode enMode)
{
ASSERT(enMode==c_modRTF2HTML); //vice versa unsupported at the moment
m_enMode = enMode;
m_RTFTree = new CRTFTree;
m_strTitle = "RTF2HTML Generated Document";
ResetMetaData();
}
CRTF_HTMLConverter::~CRTF_HTMLConverter()
{
delete m_RTFTree;
ResetMetaData();
}
void CRTF_HTMLConverter::SetTitle(const CString& strTitle)
{
m_strTitle = strTitle;
}
void CRTF_HTMLConverter::ResetMetaData()
{
m_strCharset = "1252";
m_strActFontColor = "#000000";
m_strActFontName = "Arial";
m_strActFontSize = "10";
m_mapFontNames.RemoveAll();
m_arrColors.RemoveAll();
// m_bTextSinceLastPara = FALSE;
for (int i=0;i<m_arrHTMLElements.GetSize();i++)
delete m_arrHTMLElements[i];
m_arrHTMLElements.RemoveAll();
}
bool CRTF_HTMLConverter::ConvertRTF2HTML(BOOL bWantHeaderFooter)
{
//Initializing
m_strHTML = "";
//Check_Valid_RTF
m_strRTF.TrimRight("\n");
m_strRTF.TrimRight("\r");
CString strEndChar=m_strRTF.Right(1);
strEndChar;
if (!((m_strRTF.GetLength()>=7)&&(m_strRTF.Left(6)=="{\\rtf1")))
{
//Invalid RTF file. Must start with "{RTF1" and end with "}"
ASSERT(FALSE);
return false;
}
// make sure it ends in a '}'
int nLen = m_strRTF.GetLength();
while (m_strRTF[nLen - 1] != '}')
nLen--;
if (nLen < m_strRTF.GetLength())
m_strRTF = m_strRTF.Left(nLen);
ASSERT(m_strRTF[nLen - 1] == '}');
//Clear internal members
ResetMetaData();
//Build up RTF Tree. Each tree in a node coresponds to a {} section in the RTF file (code)
//and has a designator (name), like \RTF or \FONTTBL. We start with the whole RTF file
//(Section \RTF1)
delete m_RTFTree;
m_RTFTree = new CRTFTree;
CRTFNode NodeRoot = R2H_BuildTree(m_strRTF);
CString strRTFCode = (NodeRoot)->m_strThisCode;
//Fill internal meta data members
R2H_SetMetaData(NodeRoot);
//Create html main body
R2H_CreateHTMLElements(strRTFCode);
//HTML Header
R2H_GetHTMLElements(m_strHTML);
if (bWantHeaderFooter)
{
R2H_GetHTMLElements(m_strHTML);
m_strHTML = R2H_GetHTMLHeader() + m_strHTML + "\r\n" + R2H_GetHTMLFooter();
}
return true;
}
bool CRTF_HTMLConverter::Convert(const CString& sRtf, CString& sHtml, BOOL bWantHeaderFooter)
{
CRTF_HTMLConverter r2h(c_modRTF2HTML);
r2h.m_strRTF = sRtf;
if (r2h.ConvertRTF2HTML(bWantHeaderFooter))
{
sHtml = r2h.m_strHTML;
return true;
}
return false;
}
CString CRTF_HTMLConverter::R2H_GetHTMLHeader()
{
CString strHTMLHeader;
strHTMLHeader+= "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//DE\" \"http://www.w3.org/TR/html4/loose.dtd\">\r\n";
strHTMLHeader+= "<html>\r\n";
strHTMLHeader+= " <head>\r\n";
strHTMLHeader+= " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=windows-"+m_strCharset+"\">\r\n";
strHTMLHeader+= " <title>"+m_strTitle+"</title>\r\n";
strHTMLHeader+= " </head>\r\n";
strHTMLHeader+= " <body>\r\n";
return strHTMLHeader;
}
CString CRTF_HTMLConverter::R2H_GetHTMLFooter()
{
CString strHTMLFooter;
strHTMLFooter+= " </body>\r\n";
strHTMLFooter+= "</html>\r\n";
return strHTMLFooter;
}
CString CRTF_HTMLConverter::R2H_GetRTFTag(const CString& strRTFSource, long lPos)
{
//Initializing
CString strTag;
ASSERT(lPos<strRTFSource.GetLength()); //lPos is POS(\) within strRTFSource
ASSERT(strRTFSource.GetAt(lPos)=='\\');
int nCnt=0;
const CString TERMINATORS = " {}\\\r";
//Starting from lPos, we say everything is an rtf element until \ or blank or { occurs
for (int iStrPos=lPos + 1;iStrPos<strRTFSource.GetLength();iStrPos++)
{
char ChTest=strRTFSource[iStrPos];
if (TERMINATORS.Find(ChTest) != -1)
break; //designator terminated
if (strTag.GetLength() >= 3 && strTag[0]=='\'')
break;
strTag+=ChTest;
nCnt++;
}
return strTag;
}
void CRTF_HTMLConverter::R2H_SetMetaData(CRTFNode& NodeRoot)
{
//Initializing
CString strRTFSource = (NodeRoot)->m_strThisCode;
//Go thru RTF main string's global data
for (int iStrPos=0;iStrPos<strRTFSource.GetLength();iStrPos++)
{
char ChTest=strRTFSource[iStrPos];
if (ChTest=='\\')
{
CString strTag=R2H_GetRTFTag(strRTFSource, iStrPos);
CString strTestTag;
strTestTag="ansicpg";
if ((strTag.GetLength()>=strTestTag.GetLength())&&(strTag.Left(strTestTag.GetLength())==strTestTag))
{
m_strCharset = strTag;
m_strCharset.Delete(0, strTestTag.GetLength());
}
iStrPos+=strTag.GetLength();
if (((iStrPos+1)<strRTFSource.GetLength())&&(strRTFSource[iStrPos+1]==' '))
iStrPos++; //Ignore Blank after Tag
}
else
{
//Normal character
}
} //loop thru string
//Go thru 1st-level-nodes
for(int iRTFNodes=0;iRTFNodes<NodeRoot.Count;iRTFNodes++)
{
//Color table
if (NodeRoot.Nodes[iRTFNodes]->m_strName=="colortbl")
{
CString strColorTbl=NodeRoot.Nodes[iRTFNodes]->m_strThisCode;
strColorTbl.Delete(0,NodeRoot.Nodes[iRTFNodes]->m_strName.GetLength()+1); //+1=leading \ (backslash)
//Palette entries are separated with semicolon ;
CStringArray arrStrColors;
StringToArray(strColorTbl, arrStrColors);
for (int iColor=0;iColor<arrStrColors.GetSize();iColor++)
{
//RGB values within palette entry are \redXXX\greenXXX\blueXXX
DWORD r=0,g=0,b=0;
CString strColorEntry=arrStrColors[iColor];
strColorEntry.TrimLeft();
strColorEntry.TrimRight();
if (strColorEntry.GetLength()>0)
{
//Loop thru base colors (r,g,b) and set them
CStringArray arrStrRGBs;
R2H_GetRTFTags(strColorEntry, arrStrRGBs);
ASSERT(arrStrRGBs.GetSize()==3); //RTF must contain exactly 3 entries for red/green/blue
for (int iBaseColor=0;iBaseColor<arrStrRGBs.GetSize();iBaseColor++)
{
CString strBaseColor=arrStrRGBs[iBaseColor];
CString strTestBaseColor;
strTestBaseColor="red";
if ((strBaseColor.GetLength()>=strTestBaseColor.GetLength())&&(strBaseColor.Left(strTestBaseColor.GetLength())==strTestBaseColor))
{
strBaseColor.Delete(0, strTestBaseColor.GetLength());
r=StringToLong(strBaseColor);
}
strTestBaseColor="green";
if ((strBaseColor.GetLength()>=strTestBaseColor.GetLength())&&(strBaseColor.Left(strTestBaseColor.GetLength())==strTestBaseColor))
{
strBaseColor.Delete(0, strTestBaseColor.GetLength());
g=StringToLong(strBaseColor);
}
strTestBaseColor="blue";
if ((strBaseColor.GetLength()>=strTestBaseColor.GetLength())&&(strBaseColor.Left(strTestBaseColor.GetLength())==strTestBaseColor))
{
strBaseColor.Delete(0, strTestBaseColor.GetLength());
b=StringToLong(strBaseColor);
}
}
}
COLORREF ref=RGB(r,g,b);
m_arrColors.Add(ref);
}
continue;
}
//Font Table
if (NodeRoot.Nodes[iRTFNodes]->m_strName=="fonttbl")
{
for(int iFontNodes=0;iFontNodes<NodeRoot.Nodes[iRTFNodes].Count;iFontNodes++)
{
CString strFontName=NodeRoot.Nodes[iRTFNodes].Nodes[iFontNodes]->m_strName;
CString strFont=NodeRoot.Nodes[iRTFNodes].Nodes[iFontNodes]->m_strPlain;
ASSERT((strFontName.GetLength()>=2)&&(strFontName.Left(1)=="f")); //No valid RTF
ASSERT(strFont.GetLength()>0);
strFont.TrimLeft();
strFont.TrimRight();
strFont.Remove(';');
m_mapFontNames.SetAt(strFontName, strFont);
} //for-each-font
continue;
} //if font-table
} //for each 1st-level-node
}
void CRTF_HTMLConverter::R2H_InterpretTag(const CString& strRTFTag)
{
CString strTestTag;
CHTMLElement* pElement = NULL;
//Char attributes
if (strRTFTag=="b")
pElement = new CHTMLElement(CHTMLElement::c_nodHTMLBegin, "b");
else if (strRTFTag=="b0")
pElement = new CHTMLElement(CHTMLElement::c_nodHTMLEnd, "b");
else if (strRTFTag=="i")
pElement = new CHTMLElement(CHTMLElement::c_nodHTMLBegin, "i");
else if (strRTFTag=="i0")
pElement = new CHTMLElement(CHTMLElement::c_nodHTMLEnd, "i");
else if (strRTFTag=="ul")
pElement = new CHTMLElement(CHTMLElement::c_nodHTMLBegin, "u");
else if (strRTFTag=="ulnone")
pElement = new CHTMLElement(CHTMLElement::c_nodHTMLEnd, "u");
else
pElement = new CHTMLElement();
{
// TRACE ("RTF Tag was '%s'\n", strRTFTag);
}
//Special character (umlaut)
strTestTag="'";
if ((pElement->m_enNodeType==CHTMLElement::c_nodInvalid)&&(strRTFTag.GetLength()>=strTestTag.GetLength())&&(strRTFTag.Left(strTestTag.GetLength())==strTestTag))
{
CString strSpecialChar=strRTFTag;
strSpecialChar.Delete(0, strTestTag.GetLength());
ASSERT(strSpecialChar.GetLength()==2); //Invalid RTF (must be 2-digit hex code)
strSpecialChar=strSpecialChar.Left(2);
pElement->m_enNodeType=CHTMLElement::c_nodText;
pElement->m_strNodeText = "&#x"+strSpecialChar+";";
}
//Paragraph Tag
if ((pElement->m_enNodeType==CHTMLElement::c_nodInvalid)&&(strRTFTag=="par"))
{
long lLastUnclosedPStart=-1;
//Look if we first must close paragraph
for (int iLastElements=m_arrHTMLElements.GetSize()-1;iLastElements>=0;iLastElements--)
{
/*
CHTMLElement* pElementTest = m_arrHTMLElements[iLastElements];
if ((pElementTest->m_enNodeType==CHTMLElement::c_nodHTMLEnd)&&(pElementTest->m_strNodeText=="p")) break; //everything is OK
if ((pElementTest->m_enNodeType==CHTMLElement::c_nodHTMLBegin)&&(pElementTest->m_strNodeText=="p"))
{
lLastUnclosedPStart=iLastElements;
break; //everything is OK
}
*/
}
if (lLastUnclosedPStart>=0)
{
//Look if there is no text between last <p> and this <p-end> (e.g. <p></p>)
//HTML does then not display a linefeed, therefore make it to <p> </p>
BOOL bLastParaEmpty=TRUE;
for (int iLastPara=lLastUnclosedPStart;iLastPara<m_arrHTMLElements.GetSize();iLastPara++)
{
CHTMLElement* pElementTest = m_arrHTMLElements[iLastPara];
if (pElementTest->m_enNodeType==CHTMLElement::c_nodText)
{
if ((pElementTest->m_strNodeText!="")&&
(pElementTest->m_strNodeText!="\r")&&
(pElementTest->m_strNodeText!="\n")&&
(pElementTest->m_strNodeText!="\r\n")&&
(pElementTest->m_strNodeText!="b"))
{
bLastParaEmpty = FALSE;
}
}
}
/*
if (bLastParaEmpty)
{ //Insert modified blank (see above)
CHTMLElement* pElementBlank = new CHTMLElement();
pElementBlank->m_enNodeType=CHTMLElement::c_nodText;
pElementBlank->m_strNodeText = " ";
m_arrHTMLElements.Add(pElementBlank);
}
*/
//Insert Closing </p>
/* CHTMLElement* pElementClose = new CHTMLElement();
pElementClose->m_enNodeType=CHTMLElement::c_nodHTMLEnd;
pElementClose->m_strNodeText = "p";
m_arrHTMLElements.Add(pElementClose);
*/
}
//Add paragraph tag (<p>
pElement->m_enNodeType=CHTMLElement::c_nodHTMLBegin;
pElement->m_strNodeText = "br";
// pElement->m_strNodeText = "p";
}
// else Unknown RTF tag, just ignore
//Paragraph Alignment
strTestTag="q";
if ((pElement->m_enNodeType==CHTMLElement::c_nodInvalid)&&(strRTFTag.GetLength()>=strTestTag.GetLength())&&(strRTFTag.Left(strTestTag.GetLength())==strTestTag))
{
//Get RTF alignment
CString strAlignRTF, strAlignHTML;
pElement->m_enNodeType=CHTMLElement::c_nodHTMLBegin;
pElement->m_strNodeText= "font";
strAlignRTF = strRTFTag;
strAlignRTF.Delete(0, strTestTag.GetLength());
ASSERT(strAlignRTF.GetLength()==1); //Invalid RTF
//Convert RTF options to HTML options
if (strAlignRTF=="l")
{
strAlignHTML="left";
}
else if (strAlignRTF=="r")
{
strAlignHTML="right";
}
else if (strAlignRTF=="c")
{
strAlignHTML="center";
}
else
{
; //unsupported
}
//Find last paragraph
long lLastParaStart=-1;
for (int iLastElements=m_arrHTMLElements.GetSize()-1;iLastElements>=0;iLastElements--)
{
CHTMLElement* pElementTest = m_arrHTMLElements[iLastElements];
if ((pElementTest->m_enNodeType==CHTMLElement::c_nodHTMLBegin)&&(pElementTest->m_strNodeText=="p"))
{
lLastParaStart=iLastElements;
break; //everything is OK
}
}
if ((lLastParaStart>=0)&&(strAlignHTML!=""))
{
CHTMLElement* pElementPara = m_arrHTMLElements[lLastParaStart];
pElementPara->m_mapParams.SetAt("align", "\""+strAlignHTML+"\"");
}
}
//font color
strTestTag="cf";
if ((pElement->m_enNodeType==CHTMLElement::c_nodInvalid)&&(strRTFTag.GetLength()>=strTestTag.GetLength())&&(strRTFTag.Left(strTestTag.GetLength())==strTestTag))
{
CString strActColor;
pElement->m_enNodeType=CHTMLElement::c_nodHTMLBegin;
pElement->m_strNodeText= "font";
strActColor = strRTFTag;
strActColor.Delete(0, strTestTag.GetLength());
ASSERT(strActColor.GetLength()>0); //Invalid RTF
long lActColor=StringToLong(strActColor);
ASSERT(lActColor<m_arrColors.GetSize()); //Color not in Colortable !
if (lActColor<m_arrColors.GetSize()) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -