📄 xmlparser.cpp
字号:
// check for inline images
int ilinks=0;
for (int ii=0;ii<np;++ii)
if (m_frags[ii+fragbase].attr & PE_IMAGE)
++ilinks;
// add links if any
int nlinks=m_pp[idx].nlinks();
if (nlinks+ilinks) {
int off=m_pp[idx].lidx();
Buffer<Paragraph::Link> links(nlinks+ilinks);
for (int link=0;link<nlinks;++link) {
links[link].off=m_links[off+link].start;
links[link].len=m_links[off+link].length;
links[link].target=m_links[off+link].target;
}
p.links=links;
}
ilinks=0;
// add character data
wchar_t *bp=p.str;
wchar_t *be=bp+len;
wchar_t *bs=bp;
Attr *ap=p.cflags;
for (int f=0;f<np && bp<be;++f) {
Frag *fp=&m_frags[f+fragbase];
// add a space if needed
if (fp->attr&LEADSP) {
*bp++=' ';
(*ap++).wa=(WORD)fp->attr;
}
if (fp->attr&PE_IMAGE) { // inline image
int globidx=fp->local[0] | ((unsigned int)fp->local[1]<<16);
p.links[nlinks+ilinks].off=p.links[nlinks+ilinks].len=0;
p.links[nlinks+ilinks].target=m_inline_images[globidx];
if (bp<be) {
*bp++=nlinks+ilinks;
(*ap++).wa=(WORD)fp->attr;
}
++ilinks;
} else if (fp->attr&(LOCAL|REALLYLOCAL)) { // cached value
const wchar_t *wp=fp->attr & REALLYLOCAL ? fp->local : fp->str;
const wchar_t *we=wp+fp->len;
while (bp<be && wp<we) {
*bp++=*wp++;
(*ap++).wa=(WORD)fp->attr;
}
} else {
Buffer<char> buf(fp->len);
m_fp->seek(fp->fpos);
if (m_fp->read(buf,fp->len)!=(int)fp->len) {
ASSERT(0);
break;
}
Buffer<wchar_t> wbuf(fp->len);
char *cp=buf;
int ul;
if (fp->attr&CDATA)
ul=XML_ConvertCharacterData((XML_Parser)m_parser,&cp,fp->len,wbuf,fp->len);
else
ul=XML_ParseCharacterData((XML_Parser)m_parser,&cp,fp->len,wbuf,fp->len);
if (ul<0) // xml parser returned an error
break;
wchar_t *wp=wbuf,*we=wbuf+ul;
// skip leading ws
while (wp<we && (*wp<=32 || *wp==SHY))
++wp;
goto in;
while (bp<be && wp<we) { // copy, compacting whitespace
// add a space
*bp++=L' ';
(*ap++).wa=(WORD)fp->attr;
in:
// copy non-ws
while (bp<be && wp<we && *wp>32)
if (*wp==SHY) {
if (bp>bs)
ap[-1].hyphen=true;
++wp;
} else {
*bp++=*wp++;
(*ap++).wa=(WORD)fp->attr;
}
// skip over spaces
while (wp<we && (*wp<=32 || *wp==SHY))
++wp;
}
}
// add a trailing space if required
if (fp->attr&TRAILSP && bp<be) {
*bp++=L' ';
(*ap++).wa=(WORD)fp->attr;
}
}
// to avoid endless loops later, we have to pad the paragraph up to the
// advertised length
while (bp-bs<len) {
*bp++=L' ';
(*ap++).wa=0;
}
p.str.setsize(len);
p.cflags.setsize(len);
p.len=len;
p.links.setsize(nlinks+ilinks);
return p;
}
int XMLParserImp::GetPStart(int docid,int para) {
if (docid<0 || docid>=m_docs.GetSize() || para<0)
return 0;
if (para>=m_docs[docid].length)
return m_pp[m_docs[docid].start+m_docs[docid].length].start;
return m_pp[m_docs[docid].start+para].start;
}
int XMLParserImp::GetPLength(int docid,int para) {
if (docid<0 || docid>=m_docs.GetSize() || para<0 || para>=m_docs[docid].length)
return 0;
int idx=m_docs[docid].start+para;
return m_pp[idx+1].start-m_pp[idx].start;
}
int XMLParserImp::GetTotalLength(int docid) {
if (docid<0 || docid>=m_docs.GetSize())
return 0;
return m_pp[m_docs[docid].start+m_docs[docid].length].start;
}
int XMLParserImp::LookupParagraph(int docid,int charpos) {
if (docid<0 || docid>=m_docs.GetSize())
return 0;
int i=m_docs[docid].start,j=i+m_docs[docid].length;
if (charpos<0 || charpos>m_pp[j].start)
return 0;
if (charpos==m_pp[j].start)
return j-i;
--j;
while (i<=j) {
int m=(i+j)>>1;
if (charpos<m_pp[m].start)
j=m-1;
else if (charpos>=m_pp[m+1].start)
i=m+1;
else
return m-m_docs[docid].start;
}
return 0;
}
bool XMLParserImp::LookupReference(const wchar_t *name,FilePos& dest) {
void *data;
if (!m_references.Lookup(name,data))
return false;
int para=(int)data;
// find the docid
for (int docid=0;docid<m_docs.GetSize();++docid)
if (para>=m_docs[docid].start && para<m_docs[docid].start+m_docs[docid].length) {
dest.para=para-m_docs[docid].start;
dest.off=0;
dest.docid=docid;
return true;
}
// should not happen
return false;
}
bool XMLParserImp::GetImage(const wchar_t *name,HDC hDC,int maxwidth,
int maxheight,int rotation,Image& img)
{
bool alloc;
CachedImage *cim=m_imcache.Lookup(name,alloc);
if (!alloc && cim->hBmp && (cim->maxwidth==maxwidth ||
(cim->width<cim->maxwidth && cim->width<maxwidth)) &&
(cim->maxheight==maxheight || (cim->height<cim->maxheight &&
cim->height<maxheight)) &&
cim->rotation==rotation)
{
img.hBmp=cim->hBmp;
img.height=cim->height;
img.width=cim->width;
return true;
}
const wchar_t *type,*vname;
ImageLoader::BinReader *rdr;
if (rdr=OpenBinary(name,&type,&vname)) {
int width,height;
HBITMAP hBmp;
bool ret=ImageLoader::Load(hDC,type,rdr,maxwidth,maxheight,rotation,hBmp,width,height);
delete rdr;
if (ret) {
cim->maxwidth=maxwidth;
cim->maxheight=maxheight;
cim->rotation=rotation;
cim->width=width;
cim->height=height;
cim->hBmp=hBmp;
cim->name=vname;
img.hBmp=hBmp;
img.width=width;
img.height=height;
return true;
}
}
m_imcache.Remove(cim);
return false;
}
void XMLParserImp::AddQ(int start) {
PE pe;
pe.Zero();
pe.start=start;
m_pp.Add(pe);
}
void XMLParserImp::AddToc(FilePos pos,int level) {
if (m_bmk) {
FilePos ref(pos);
ref.off=0;
m_bmk->AddTocEnt(pos.para,pos.off,pos.docid,ref,level);
}
}
void XMLParserImp::AddP(int pstart,int lstart,int start,int len,CFMT& fmt)
{
if (pstart==m_frags.GetSize() || len==0) {
AddQ(start);
return;
}
PE pe;
pe.setidx_nf(pstart,m_frags.GetSize()-pstart);
pe.setidx_nl(lstart,m_links.GetSize()-lstart);
pe.start=start;
pe.flags=fmt.flags;
pe.setindent(RClamp(fmt.lindent,0,500),RClamp(fmt.rindent,0,500),
RClamp(fmt.findent,0,500));
m_pp.Add(pe);
}
void XMLParserImp::AddImage(const wchar_t *href,int start,CFMT& fmt)
{
PE pe;
pe.name=m_buffer.Append(href,wcslen(href)+1);
pe.flags=fmt.flags|PE_IMAGE;
pe.setidx_nl(0,0);
pe.start=start;
pe.setindent(RClamp(fmt.lindent,0,500),RClamp(fmt.rindent,0,500),
RClamp(fmt.findent,0,500));
m_pp.Add(pe);
}
void XMLParserImp::PushWS() {
Frag f;
// if we already have a leading space, but its charformat differs from current
// we have to create a fully whitespace frag
if (m_ps->attr&LEADSP && m_ps->last_frag_fmt!=m_ps->cfmt.attr) {
f.attr=m_ps->last_frag_fmt.wa|REALLYLOCAL;
f.len=1;
f.local[0]=L' ';
m_frags.Add(f);
m_ps->numfrags++;
m_ps->len++;
m_ps->attr&=~LEADSP;
}
}
class Base64BinReader: public ImageLoader::BinReader {
enum { FEOF=65535 };
XMLParserImp *m_parser;
Buffer<wchar_t> m_fragbuf;
int m_maxfragsize;
int m_ptr;
int m_fragptr;
int m_fragtop;
unsigned int m_chunk;
int m_chunkbytes;
bool NextFrag();
wchar_t NextFragChar();
wchar_t Char() { return m_ptr<m_fragbuf.size() ?
m_fragbuf[m_ptr++] : NextFragChar(); }
public:
Base64BinReader(XMLParserImp *xp,XMLParserImp::Binary *b) :
m_parser(xp), m_maxfragsize(0), m_ptr(0),
m_fragptr(b->startfrag), m_fragtop(b->startfrag+b->numfrags),
m_chunk(0), m_chunkbytes(0) { }
virtual int Read(void *buffer,int count);
};
ImageLoader::BinReader *XMLParserImp::OpenBinary(const wchar_t *name,
const wchar_t **type,
const wchar_t **vname) {
void *ptr;
if (!m_binaries.Lookup(name,ptr))
return NULL;
Binary *b=(Binary*)ptr;
if (type)
*type=b->type;
if (vname)
*vname=b->id;
// we only support base64
return new Base64BinReader(this,b);
}
bool Base64BinReader::NextFrag() {
if (m_fragptr>=m_fragtop)
return false;
XMLParserImp::Frag *fp=&m_parser->m_frags[m_fragptr];
if ((int)fp->len>m_maxfragsize) {
m_maxfragsize=fp->len;
m_fragbuf=Buffer<wchar_t>(m_maxfragsize);
}
if (fp->attr&REALLYLOCAL) {
m_fragbuf.setsize(fp->len);
memcpy(m_fragbuf,fp->local,fp->len*sizeof(wchar_t));
} else if (fp->attr&LOCAL) {
m_fragbuf.setsize(fp->len);
memcpy(m_fragbuf,fp->str,fp->len*sizeof(wchar_t));
} else {
Buffer<char> buf(fp->len);
m_parser->m_fp->seek(fp->fpos);
if (m_parser->m_fp->read(buf,fp->len)!=(int)fp->len) {
ASSERT(0);
return false;
}
char *cp=buf;
int ul;
if (fp->attr&CDATA)
ul=XML_ConvertCharacterData((XML_Parser)m_parser->m_parser,&cp,fp->len,m_fragbuf,fp->len);
else
ul=XML_ParseCharacterData((XML_Parser)m_parser->m_parser,&cp,fp->len,m_fragbuf,fp->len);
if (ul<0) // xml parser returned an error
return false;
m_fragbuf.setsize(ul);
}
++m_fragptr;
m_ptr=0;
return true;
}
wchar_t Base64BinReader::NextFragChar() {
if (!NextFrag())
return FEOF;
return m_ptr<m_fragbuf.size() ? m_fragbuf[m_ptr++] : FEOF;
}
int Base64BinReader::Read(void *buffer,int count) {
char *dest=(char*)buffer;
char *de=dest+count,*ds=dest;
do {
// store accumulated bytes
while (m_chunkbytes>0 && ds<de) {
*ds++=((char*)&m_chunk)[2];
m_chunkbytes--;
m_chunk<<=8;
}
if (ds>=de)
break;
// fill in three more octets
m_chunk=0; m_chunkbytes=3;
int i=18;
do {
wchar_t c=Char();
if (c==FEOF) {
m_chunkbytes=0;
break;
}
if (c>='A' && c<='Z')
m_chunk|=(unsigned int)(c-'A')<<i;
else if (c>='a' && c<='z')
m_chunk|=(unsigned int)(c-'a'+26)<<i;
else if (c>='0' && c<='9')
m_chunk|=(unsigned int)(c-'0'+52)<<i;
else if (c=='+')
m_chunk|=62<<i;
else if (c=='/')
m_chunk|=63<<i;
else if (c=='=') {
if (i==6)
m_chunkbytes=1;
else if (i==0)
m_chunkbytes=2;
// force eof
m_fragtop=m_fragptr;
m_fragbuf.setsize(0);
break;
} else
continue; // silently ignore all other chars
i-=6;
} while (i>=0);
if (!m_chunkbytes) // EOF
break;
} while (ds<de);
return ds-dest;
}
static void *my_malloc(void *priv,size_t size) {
return HeapAlloc((HANDLE)priv,HEAP_NO_SERIALIZE,size);
}
static void *my_realloc(void *priv,void *ptr,size_t size) {
return HeapReAlloc((HANDLE)priv,HEAP_NO_SERIALIZE,ptr,size);
}
static void my_free(void *priv,void *ptr) {
HeapFree((HANDLE)priv,HEAP_NO_SERIALIZE,ptr);
}
static void normalize_space(wchar_t *dest,int dlen,const wchar_t *s,int len)
{
wchar_t *q=dest+dlen;
const wchar_t *e=s+len;
while (s<e && *s<=32)
++s;
goto in;
while (s<e && dest<q) {
while (s<e && *s<=32)
++s;
if (s<e)
*dest++=L' ';
in:
while (s<e && dest<q && *s>32)
*dest++=*s++;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -