📄 xmlparser.cpp
字号:
static int normalized_length(const wchar_t *s,int len) {
const wchar_t *e=s+len;
int nl=0;
while (s<e && *s<=32)
++s;
goto in;
while (s<e) {
while (s<e && *s<=32)
++s;
if (s<e)
++nl;
in:
while (s<e && *s>32)
++nl,++s;
}
return nl;
}
static bool iswhitespace(const wchar_t *text,int len) {
const wchar_t *end=text+len;
while (text<end) {
if (*text!=0x0d && *text!=0x0a && *text!=0x20 && *text!=0x09)
return false;
++text;
}
return true;
}
static XMLParser::FmtArray g_eformat;
static WMap *g_elements;
static struct Cleaner {
~Cleaner() { delete g_elements; }
} g_cleaner;
static XMLParser::ElemFmt *LookupElem(const wchar_t *name) {
void *data;
if (name && g_elements->Lookup(name,data))
return &g_eformat[(int)data];
return &g_eformat[0]; // default
}
static const wchar_t *LocalName(const wchar_t *name) {
const wchar_t *end=wcschr(name,L'|');
return end ? end+1 : name;
}
void XMLParserImp::ParseState::ApplyFmt(ElemFmt *e,int nest) {
PushA();
if (e->fsz!=XMLParser::ElemFmt::NOCHG)
cfmt.attr.fsize=nest ? max(e->fsz-nest,1) : e->fsz;
if (e->bold!=XMLParser::ElemFmt::NOCHG)
cfmt.attr.bold=e->bold;
if (e->italic!=XMLParser::ElemFmt::NOCHG)
cfmt.attr.italic=e->italic;
if (e->underline!=XMLParser::ElemFmt::NOCHG)
cfmt.attr.underline=e->underline;
if (e->color!=XMLParser::ElemFmt::NOCHG)
cfmt.attr.color=e->color;
if (e->align!=XMLParser::ElemFmt::NOCHG)
cfmt.flags=(cfmt.flags & ~Paragraph::align_mask) | (e->align & Paragraph::align_mask);
if (e->lindent!=XMLParser::ElemFmt::NOCHG) {
if (e->lindent<0)
cfmt.lindent-=e->lindent;
else
cfmt.lindent=e->lindent;
}
if (e->rindent!=XMLParser::ElemFmt::NOCHG) {
if (e->rindent<0)
cfmt.rindent-=e->rindent;
else
cfmt.rindent=e->rindent;
}
if (e->findent!=XMLParser::ElemFmt::NOCHG) {
if (e->findent<0)
cfmt.findent-=e->findent;
else
cfmt.findent=e->findent;
}
}
const wchar_t *GetAttr(const wchar_t **attr,const wchar_t *name,const wchar_t *def=NULL)
{
while (*attr) {
if (!wcscmp(name,*attr))
return attr[1];
attr+=2;
}
return def;
}
void XMLParserImp::StartElement(const wchar_t *ns_name,const wchar_t **attr) {
if (!m_ps->root_element) { // special case for root
const wchar_t *p = ns_name;
const wchar_t *q = FB_NS;
const wchar_t *e = q + FB_NS_LEN - 1;
while (*p && q < e)
if (*p++ != *q++)
longjmp(m_ps->jout,ERR_NOTFB2);
if (!p[0] || wcscmp(p+1,L"|FictionBook")!=0)
longjmp(m_ps->jout,ERR_NOTFB2);
m_ps->root_element=true;
}
ElemFmt *elem=LookupElem(ns_name);
if (elem->flags&ElemFmt::DOCUMENT) {
if (m_docs.GetSize()==0)
AddQ(m_ps->start);
Document d;
d.start=m_pp.GetSize();
d.length=0;
d.name=GetAttr(attr,L"name",L"");
m_docs.Add(d);
m_ps->start=0;
// add a cover page if it was present
if (m_docs.GetSize()==1 && m_cover.GetLength()>0) {
AddImage(m_cover,m_ps->start,m_ps->cfmt);
m_ps->start+=ImageLoader::IMAGE_VSIZE;
}
}
if (elem->flags&ElemFmt::ENABLE)
++m_ps->enable;
if (elem->flags&ElemFmt::SECTION)
m_ps->section_nest++;
if (elem->flags&ElemFmt::ELINE)
AddQ(m_ps->start);
if (elem->flags&ElemFmt::FMT) // apply formatting
m_ps->ApplyFmt(elem);
if (elem->flags&ElemFmt::STYLE) {
const wchar_t *style=GetAttr(attr,L"name");
void *val;
if (style && m_ps->stylemap->Lookup(style,val))
m_ps->ApplyFmt(&m_ps->styles->operator[]((int)val));
else
m_ps->PushA();
}
if (elem->flags&ElemFmt::HEADER) {
m_ps->PushA();
m_ps->cfmt.flags |= Paragraph::header;
}
if (elem->flags&ElemFmt::STYLESHEET) {
const wchar_t *type=GetAttr(attr,L"type");
if (m_ps->in_stylesheet || (type && !wcscmp(type,HR_STYLE)))
if (!m_ps->in_stylesheet++)
m_sps->Init();
}
if (elem->flags&ElemFmt::TITLE) // start a toc entry
m_ps->title_start=m_pp.GetSize();
if (elem->flags&ElemFmt::LINKDEST) { // link destination
const wchar_t *id=GetAttr(attr,L"id");
if (id) {
wchar_t *copy=m_buffer.Append(id,wcslen(id)+1);
m_references.Add(copy,(void*)m_pp.GetSize());
}
}
if (elem->flags&ElemFmt::LINK) { // link
const wchar_t *dest=GetAttr(attr,XLINK_NS L"|href");
const wchar_t *type=GetAttr(attr,L"type");
ElemFmt *linkformat;
if (type && !wcscmp(type,L"note"))
linkformat=LookupElem(L"|>footnote");
else
linkformat=LookupElem(L"|>link");
m_ps->ApplyFmt(linkformat);
if (dest) {
m_ps->link_start=m_ps->len;
m_ps->link_name=m_buffer.Append(dest,wcslen(dest)+1);
} else
m_ps->link_name=NULL;
}
if ((m_ps->enable && elem->flags&ElemFmt::PARA) ||
elem->flags&ElemFmt::DESCCAT)
{ // start a new paragraph
m_ps->attr&=~(LEADSP|TRAILSP);
m_ps->acch_lev++;
m_ps->pf_start=m_frags.GetSize();
m_ps->pl_start=m_links.GetSize();
m_ps->numfrags=0;
m_ps->len=0;
m_ps->last_frag_trailsp=false;
m_ps->last_frag_fmt=m_ps->cfmt.attr;
}
if (elem->flags&(ElemFmt::DESCCAT|ElemFmt::DESCITEM)) {
Frag f;
if (elem->flags&ElemFmt::DESCITEM) { // add a few nbsps
f.attr=m_ps->Att()|REALLYLOCAL;
f.len=2;
f.local[0]=f.local[1]=0xa0; // nbsp
m_frags.Add(f);
++m_ps->numfrags;
m_ps->len+=f.len;
}
// add an element name
m_ps->ApplyFmt(LookupElem(L">keyword"));
const wchar_t *ln=LocalName(ns_name);
f.len=wcslen(ln);
f.attr=m_ps->Att()|LOCAL;
f.str=m_buffer.Append(ln,f.len);
m_frags.Add(f);
++m_ps->numfrags;
m_ps->len+=f.len;
m_ps->PopA();
if (elem->flags&ElemFmt::DESCITEM && attr && *attr) { // print attributes as well
// add " ("
f.attr=m_ps->Att()|REALLYLOCAL;
f.len=2;
f.local[0]=L' '; f.local[1]=L'(';
m_frags.Add(f);
++m_ps->numfrags;
m_ps->len+=f.len;
// iterate over attrbutes
while (*attr) {
//attr name
f.attr=m_ps->Att()|LOCAL;
ln=LocalName(*attr);
f.len=wcslen(ln);
f.str=m_buffer.Append(ln,f.len);
m_frags.Add(f);
++m_ps->numfrags;
m_ps->len+=f.len;
// "="
f.attr=m_ps->Att()|REALLYLOCAL;
f.len=1;
f.local[0]=L'=';
m_frags.Add(f);
++m_ps->numfrags;
++m_ps->len;
// value
f.attr=m_ps->Att()|LOCAL;
f.len=wcslen(attr[1]);
f.str=m_buffer.Append(attr[1],f.len);
m_frags.Add(f);
++m_ps->numfrags;
m_ps->len+=f.len;
attr+=2;
}
// add ")"
f.attr=m_ps->Att()|REALLYLOCAL;
f.len=1;
f.local[0]=L')';
m_frags.Add(f);
++m_ps->numfrags;
m_ps->len+=f.len;
}
if (elem->flags&ElemFmt::DESCCAT) {
// if last frag is a trailsp, then discard the last space
if (m_ps->last_frag_trailsp) {
m_ps->len--;
m_frags[m_frags.GetSize()-1].attr&=~TRAILSP;
}
AddP(m_ps->pf_start,m_ps->pl_start,m_ps->start,m_ps->len,m_ps->cfmt);
m_ps->start+=m_ps->len;
m_ps->acch_lev--;
} else { // add a ": "
f.attr=m_ps->Att()|REALLYLOCAL;
f.len=2;
f.local[0]=L':'; f.local[1]=L' ';
m_frags.Add(f);
++m_ps->numfrags;
m_ps->len+=f.len;
}
}
if (elem->flags&ElemFmt::BINARY) {
const wchar_t *id=GetAttr(attr,L"id");
const wchar_t *type=GetAttr(attr,L"content-type");
if (id && type) {
m_ps->binary=1;
Binary *b=m_binarystorage.Get();
b->id=m_buffer.Append(id,wcslen(id)+1);
b->type=m_buffer.Append(type,wcslen(type)+1);
b->startfrag=m_frags.GetSize();
b->numfrags=0;
m_binaries.Add(b->id,(void*)b);
}
}
if (elem->flags&ElemFmt::IMAGE) {
const wchar_t *href=GetAttr(attr,XLINK_NS L"|href");
if (href && href[0]==L'#') {
if (m_ps->acch_lev) { // inline image
const wchar_t *hcopy=m_buffer.Append(href+1,wcslen(href));
int index=m_inline_images.GetSize();
m_inline_images.Add(hcopy);
PushWS();
if (m_ps->attr&LEADSP)
++m_ps->len;
Frag f;
m_ps->cfmt.attr.img=1;
f.attr=m_ps->Att()|PE_IMAGE;
m_ps->cfmt.attr.img=0;
f.len=1;
f.local[0]=index;
f.local[1]=index>>16;
m_frags.Add(f);
++m_ps->numfrags;
++m_ps->len;
m_ps->last_frag_trailsp=0;
m_ps->attr&=~(LEADSP|TRAILSP);
m_ps->last_frag_fmt=m_ps->cfmt.attr;
} else {
AddImage(href+1,m_ps->start,m_ps->cfmt);
// image virtual size is always the same
m_ps->start+=ImageLoader::IMAGE_VSIZE;
if (m_docs.GetSize() == 0)
m_cover = href+1;
}
}
}
}
void XMLParserImp::EndElement(const wchar_t *ns_name) {
ProgSetCur(XML_GetCurrentByteIndex(m_parser));
ElemFmt *elem=LookupElem(ns_name);
if (m_ps->enable && elem->flags&ElemFmt::PARA) { // end a paragraph
// if last frag is a trailsp, then discard the last space
if (m_ps->last_frag_trailsp) {
m_ps->len--;
m_frags[m_frags.GetSize()-1].attr&=~TRAILSP;
}
AddP(m_ps->pf_start,m_ps->pl_start,m_ps->start,m_ps->len,m_ps->cfmt);
m_ps->start+=m_ps->len;
m_ps->acch_lev--;
}
if (elem->flags&ElemFmt::HEADER)
m_ps->PopA();
if (elem->flags&ElemFmt::STYLE)
m_ps->PopA();
if (elem->flags&ElemFmt::STYLESHEET && m_ps->in_stylesheet)
--m_ps->in_stylesheet;
if (elem->flags&ElemFmt::FMT) // apply formatting
m_ps->PopA();
if (elem->flags&ElemFmt::LINK) { // link
if (m_ps->link_name) {
XMLParserImp::Link link;
link.start=m_ps->link_start;
link.length=m_ps->len-m_ps->link_start;
link.target=m_ps->link_name;
m_links.Add(link);
}
m_ps->PopA();
}
if (elem->flags&ElemFmt::SECTION) // snag title attribute
m_ps->section_nest--;
if (elem->flags&ElemFmt::SPACE) // add spaces after this element
m_ps->attr|=LEADSP;
if (elem->flags&ElemFmt::TITLE) {
if (m_docs.GetSize()>0 && m_ps->section_nest)
AddToc(
FilePos(
m_ps->title_start-m_docs[m_docs.GetSize()-1].start,
m_pp.GetSize()-m_ps->title_start,
m_docs.GetSize()-1
),
m_ps->section_nest
);
}
if (elem->flags&ElemFmt::AELINE) // add an empty line after the element
AddQ(m_ps->start);
if (elem->flags&ElemFmt::ENABLE)
--m_ps->enable;
if (elem->flags&ElemFmt::DOCUMENT) {
if (m_docs.GetSize()>0)
m_docs[m_docs.GetSize()-1].length=
m_pp.GetSize()-m_docs[m_docs.GetSize()-1].start;
AddQ(m_ps->start);
}
if (elem->flags&ElemFmt::BINARY && m_ps->binary) {
m_ps->binary=0;
int idx=m_binarystorage.GetSize()-1;
m_binarystorage[idx].numfrags=m_frags.GetSize()-m_binarystorage[idx].startfrag;
}
}
void XMLParserImp::CharData(const wchar_t *text,int len) {
if (m_ps->acch_lev && len) {
int l=normalized_length(text,len);
if (!l) { // whitespace frag, try to add a leading space to the next frag
if (m_ps->numfrags) {
m_ps->attr|=LEADSP;
m_ps->last_frag_fmt=m_ps->cfmt.attr;
}
return;
}
PushWS();
if (*text<=32 && m_ps->numfrags)
m_ps->attr|=LEADSP;
if (text[len-1]<=32) {
m_ps->attr|=TRAILSP;
m_ps->len++;
}
m_ps->len+=l;
if (m_ps->attr&LEADSP)
m_ps->len++;
// here we check the previous frag and if it has TRAILSP and this has a LEADSP,
// and their charformats are the same, then we can discard current LEADSP
if (m_ps->numfrags && m_ps->last_frag_trailsp && m_ps->attr&LEADSP &&
m_ps->last_frag_fmt==m_ps->cfmt.attr)
{
m_ps->attr&=~LEADSP;
m_ps->len--;
}
Frag f;
f.attr=m_ps->Att();
if (l<5 || XML_IsExpanding(m_parser)) { // cache short fragments
wchar_t *buf;
if (l <= sizeof(f.local)/sizeof(wchar_t)) {
f.attr|=REALLYLOCAL;
buf=f.local;
} else {
f.attr|=LOCAL;
buf=m_buffer.Get(l);
}
f.str=buf;
f.len=l;
normalize_space(buf,l,text,len);
} else {
f.len=XML_GetCurrentByteCount(m_parser);
f.fpos=XML_GetCurrentByteIndex(m_parser);
}
m_frags.Add(f);
++m_ps->numfrags;
m_ps->last_frag_trailsp=(m_ps->attr&TRAILSP)!=0;
m_ps->attr&=~(LEADSP|TRAILSP);
m_ps->last_frag_fmt=m_ps->cfmt.attr;
}
if (m_ps->in_stylesheet && len)
ParseStylesheet(text,len);
if (m_ps->binary && len && !iswhitespace(text,len)) {
Frag f;
f.attr=m_ps->attr&CDATA; // text attrs are not needed in this context
if (XML_IsExpanding(m_parser)) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -