📄 xmlparser.cpp
字号:
/*
* Copyright (c) 2001,2002,2003 Mike Matsnev. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice immediately at the beginning of the file, without modification,
* this list of conditions, and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Absolutely no warranty of function or purpose is made by the author
* Mike Matsnev.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $Id: XMLParser.cpp,v 1.106.2.12 2004/10/13 13:50:55 mike Exp $
*
*/
#include <afxwin.h>
#include <afxtempl.h>
#include <setjmp.h>
#include "FastArray.h"
#include "TextParser.h"
#include "XMLParser.h"
#include "TextViewNG.h"
#include "Unicode.h"
#include "StrBuf.h"
#include "WMap.h"
#include "Image.h"
#include "xscanf.h"
#include "expat.h"
#define FB_NS L"http://www.gribuser.ru/xml/fictionbook/2.0"
#define FB_NS_LEN (sizeof(FB_NS)/sizeof(wchar_t)-1)
#define XLINK_NS L"http://www.w3.org/1999/xlink"
#define HR_STYLE L"text/css"
enum {
CDATA=0x1000000,
LEADSP=0x2000000,
LOCAL=0x4000000,
REALLYLOCAL = 0x8000000,
TRAILSP = 0x10000000,
// PE flags
PE_IMAGE=0x80000000,
};
enum { MAX_CONTENTS_LEN=80 };
enum {
ERR_NOTFB2=1
};
struct CFMT {
Attr attr;
int lindent;
int rindent;
int findent;
BYTE flags;
};
struct CachedImage {
CachedImage *next;
HBITMAP hBmp;
int width;
int height;
int maxwidth;
int maxheight;
int rotation;
const wchar_t *name;
CachedImage() : hBmp(NULL), name(NULL), next(NULL) { }
void Release() { if (hBmp) DeleteObject(hBmp); name=NULL; hBmp=NULL; }
};
class ImageCache {
CachedImage *m_head,*m_tail;
int m_count;
int m_max;
public:
ImageCache(int max=4) : m_head(NULL), m_tail(NULL), m_count(0), m_max(max) { }
~ImageCache() { RemoveAll(); }
CachedImage *Lookup(const wchar_t *name,bool& alloc);
void Remove(CachedImage *img);
void RemoveAll() {
while (m_head) {
CachedImage *next=m_head->next;
m_head->Release();
delete m_head;
m_head=next;
}
m_count=0;
m_head=m_tail=NULL;
}
};
CachedImage *ImageCache::Lookup(const wchar_t *name,bool& alloc) {
CachedImage *img;
alloc=false;
for (img=m_head;img;img=img->next)
if (!wcscmp(name,img->name))
return img;
alloc=true;
// not found, try to allocate new
if (m_count<m_max) {
img=new CachedImage;
++m_count;
img->name=name;
if (m_tail) {
m_tail->next=img;
m_tail=img;
} else
m_head=m_tail=img;
return img;
}
// too many entries, reuse head
ASSERT(m_head!=NULL);
img=m_head;
if (m_head!=m_tail)
m_head=m_head->next;
img->Release();
img->name=name;
img->next=NULL;
if (img!=m_tail)
m_tail->next=img;
m_tail=img;
return img;
}
void ImageCache::Remove(CachedImage *img) {
img->Release();
if (img==m_head) {
m_head=m_head->next;
if (img==m_tail)
m_tail=NULL;
} else { // tough, will have to traverse the list
CachedImage *tmp=NULL;
for (tmp=m_head;tmp;tmp=tmp->next)
if (tmp->next==img)
break;
tmp->next=img->next;
if (img==m_tail)
m_tail=tmp;
}
delete img;
--m_count;
}
class XMLParserImp: public XMLParser {
public:
struct SP_State {
enum {
START,
NAME,
FLAGS,
FM,LM,RM,SIZE,COLOR
};
wchar_t stylename[128];
int stylenameptr;
ElemFmt format;
int state;
int num;
bool sign;
void Init() { stylenameptr=0; format.Clear(); state=START; }
void NAdd(wchar_t ch) {
if (stylenameptr<sizeof(stylename)/sizeof(wchar_t)-1)
stylename[stylenameptr++]=ch;
}
};
struct ParseState {
enum { MAX_NEST=64 };
int len; // current len
int start;
DWORD attr;
Attr last_frag_fmt;
bool last_frag_trailsp;
bool root_element;
CFMT cfmt;
CFMT attr_stack[MAX_NEST];
int attr_stack_ptr;
int acch_lev,in_stylesheet;
int enable;
int section_nest;
int title_start;
int link_start;
const wchar_t *link_name;
int pf_start,pl_start,numfrags;
int binary;
FmtArray *styles;
WMap *stylemap;
jmp_buf jout;
void PushA() {
if (attr_stack_ptr<MAX_NEST)
attr_stack[attr_stack_ptr++]=cfmt;
}
void PopA() {
if (attr_stack_ptr>0)
cfmt=attr_stack[--attr_stack_ptr];
}
DWORD Att() { return attr|cfmt.attr.wa; }
void ApplyFmt(ElemFmt *e,int nest=0);
};
struct Frag { // smallest element - character data
union {
DWORD fpos; // offset into the file
const wchar_t *str; // pointer to a cached value
wchar_t local[2]; // cached right here
};
DWORD len; // raw char count
DWORD attr; // attributes of this run
};
struct PE { // paragraph
enum {
FRAGBITS=10,
MAXFRAGS=1<<FRAGBITS,
FRAGSHIFT=32-FRAGBITS,
IDXMASK=(1<<FRAGSHIFT)-1
};
union {
DWORD idx_nf; // offset into m_frags
const wchar_t *name; // on an image name
};
int start; // start of parsed paragraph
DWORD linkidx_nl; // offset into m_links
DWORD indent; // left, right and first line indentation
DWORD flags;
DWORD nfrags() { return idx_nf>>FRAGSHIFT; }
DWORD idx() { return idx_nf&IDXMASK; }
DWORD nlinks() { return linkidx_nl>>FRAGSHIFT; }
DWORD lidx() { return linkidx_nl&IDXMASK; }
DWORD li() { return (indent>>10)&0x2ff; }
DWORD ri() { return indent&0x2ff; }
DWORD fi() { return (indent>>20)&0x2ff; }
void setidx_nf(DWORD idx,DWORD nf) { idx_nf=(idx&IDXMASK)|(nf<<FRAGSHIFT); }
void setidx_nl(DWORD idx,DWORD nf) { linkidx_nl=(idx&IDXMASK)|(nf<<FRAGSHIFT); }
void setindent(DWORD l,DWORD r,DWORD f) { indent=((f&0x2ff)<<20)|((l&0x2ff)<<10)|(r&0x2ff); }
void Zero() { memset(this,0,sizeof(*this)); }
};
struct Document { // subdocument
int start; // start paragraph
int length; // length in paragraphs
CString name;
};
struct Link {
int start;
int length;
const wchar_t *target;
};
struct Binary {
wchar_t *id;
wchar_t *type;
int numfrags;
int startfrag;
};
friend class Base64BinReader;
FastArray<Frag> m_frags;
FastArray<PE> m_pp;
FastArray<Link> m_links;
FastArray<Binary> m_binarystorage;
FastArray<const wchar_t *> m_inline_images;
StrBuf m_buffer;
XML_Parser m_parser;
CArray<Document,Document&> m_docs;
WMap m_references;
WMap m_binaries;
ParseState *m_ps;
SP_State *m_sps;
CString m_cover;
ImageCache m_imcache;
Paragraph GetParagraphImp(int idx);
// paragraphs
virtual Paragraph GetParagraph(int docid,int para);
virtual int Length(int docid); // in paragraphs
virtual int GetPLength(int docid,int para);
virtual int GetPStart(int docid,int para);
virtual int GetTotalLength(int docid);
virtual int LookupParagraph(int docid,int charpos);
// documents
virtual int GetSubDocCount() { return m_docs.GetSize(); }
virtual CString GetSubDocName(int docid);
// links
virtual bool LookupReference(const wchar_t *name,FilePos& dest);
// images
virtual bool GetImage(const wchar_t *name,HDC hDC,int maxwidth,
int maxheight,int rotation,Image& img);
virtual void InvalidateImageCache() { m_imcache.RemoveAll(); }
// construction and destruction
XMLParserImp(Meter *m,CBufFile *fp,Bookmarks *bmk,
HANDLE heap);
virtual ~XMLParserImp();
virtual bool ParseFile(int encoding);
// paragraphs
void AddP(int pstart,int lstart,int start,int len,CFMT& fmt);
void AddImage(const wchar_t *href,int start,CFMT& fmt);
void AddQ(int start);
void AddToc(FilePos pos,int level);
void PushWS(); // check for leading spaces/format flags
// stylesheet
void ParseStylesheet(const wchar_t *text,int len);
// callbacks
void StartElement(const wchar_t *name,const wchar_t **attr);
void EndElement(const wchar_t *name);
void CharData(const wchar_t *text,int len);
// expat callacks
static void StartElementCB(void *udata,const wchar_t *name,
const wchar_t **attr);
static void EndElementCB(void *udata,const wchar_t *name);
static void CharDataCB(void *udata,const wchar_t *text,int len);
static int UnknownEncodingCB(void *data,const wchar_t *name,
XML_Encoding *info);
static void StartCDataCB(void *udata);
static void EndCDataCB(void *udata);
// binary access
ImageLoader::BinReader *OpenBinary(const wchar_t *name,const wchar_t **type,
const wchar_t **vname);
};
const TCHAR *XMLParser::ElemFmt::flag_names=_T("apofestcdlqxrivbgh");
XMLParserImp::XMLParserImp(Meter *m,CBufFile *fp,Bookmarks *bmk,HANDLE heap) :
XMLParser(m,fp,heap,bmk), m_parser(NULL), m_pp(heap), m_frags(heap),
m_links(heap), m_buffer(heap), m_references(heap), m_binaries(heap),
m_binarystorage(heap), m_inline_images(heap)
{
}
XMLParserImp::~XMLParserImp() {
// destroy parser, if any
if (m_parser)
XML_ParserFree((XML_Parser)m_parser);
// destroy image cache if any
}
int XMLParserImp::Length(int docid) {
return docid<0 || docid>=m_docs.GetSize() ? 0 : m_docs[docid].length;
}
#define SHY 0xAD
Paragraph XMLParserImp::GetParagraph(int docid,int para) {
if (docid<0 || docid>=m_docs.GetSize() ||
para<0 || para>=m_docs[docid].length)
return Paragraph();
return GetParagraphImp(m_docs[docid].start+para);
}
CString XMLParserImp::GetSubDocName(int docid) {
if (docid<0 || docid>=m_docs.GetSize())
return CString();
if (docid==0 && m_docs[docid].name.GetLength()==0)
return _T("Main");
return m_docs[docid].name;
}
static int RClamp(int v,int min,int max) {
if (v<min)
return min;
if (v>max)
return max;
return v;
}
Paragraph XMLParserImp::GetParagraphImp(int idx) {
if (m_pp[idx].flags&PE_IMAGE) { // a very special case
Paragraph p(ImageLoader::IMAGE_VSIZE);
p.flags=(BYTE)m_pp[idx].flags;
p.lindent=m_pp[idx].li();
p.rindent=m_pp[idx].ri();
p.findent=m_pp[idx].fi();
for (int i=0;i<ImageLoader::IMAGE_VSIZE;++i) {
p.str[i]=L' ';
p.cflags[i].wa=0;
}
// abuse links for image href
p.links=Buffer<Paragraph::Link>(1);
p.links[0].off=0;
p.links[0].len=ImageLoader::IMAGE_VSIZE;
p.links[0].target=m_pp[idx].name;
p.flags|=Paragraph::image;
return p;
}
// here we have to read the paragraphs from file
int len=m_pp[idx+1].start-m_pp[idx].start,np=m_pp[idx].nfrags();
int fragbase=m_pp[idx].idx();
Paragraph p(len);
p.flags=(BYTE)m_pp[idx].flags;
p.lindent=m_pp[idx].li();
p.rindent=m_pp[idx].ri();
p.findent=m_pp[idx].fi();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -