⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmlparser.cpp

📁 俄罗斯人开发的大名鼎鼎的Pocket Pc 阅读器haaliread的源代码,visual c
💻 CPP
📖 第 1 页 / 共 4 页
字号:
static int    normalized_length(const wchar_t *s,int len) {
  const wchar_t *e=s+len;
  int	      nl=0;
  while (s<e && *s<=32)
    ++s;
  goto in;
  while (s<e) {
    while (s<e && *s<=32)
      ++s;
    if (s<e)
      ++nl;
in:
    while (s<e && *s>32)
      ++nl,++s;
  }
  return nl;
}

static bool iswhitespace(const wchar_t *text,int len) {
  const wchar_t *end=text+len;
  while (text<end) {
    if (*text!=0x0d && *text!=0x0a && *text!=0x20 && *text!=0x09)
      return false;
    ++text;
  }
  return true;
}

static XMLParser::FmtArray	g_eformat;
static WMap			*g_elements;

static struct Cleaner {
  ~Cleaner() { delete g_elements; }
} g_cleaner;

static XMLParser::ElemFmt    *LookupElem(const wchar_t *name) {
  void *data;
  if (name && g_elements->Lookup(name,data))
    return &g_eformat[(int)data];
  return &g_eformat[0]; // default
}

static const wchar_t	      *LocalName(const wchar_t *name) {
  const wchar_t	  *end=wcschr(name,L'|');
  return end ? end+1 : name;
}

void XMLParserImp::ParseState::ApplyFmt(ElemFmt *e,int nest) {
  PushA();
  if (e->fsz!=XMLParser::ElemFmt::NOCHG)
    cfmt.attr.fsize=nest ? max(e->fsz-nest,1) : e->fsz;
  if (e->bold!=XMLParser::ElemFmt::NOCHG)
    cfmt.attr.bold=e->bold;
  if (e->italic!=XMLParser::ElemFmt::NOCHG)
    cfmt.attr.italic=e->italic;
  if (e->underline!=XMLParser::ElemFmt::NOCHG)
    cfmt.attr.underline=e->underline;
  if (e->color!=XMLParser::ElemFmt::NOCHG)
    cfmt.attr.color=e->color;
  if (e->align!=XMLParser::ElemFmt::NOCHG)
    cfmt.flags=(cfmt.flags & ~Paragraph::align_mask) | (e->align & Paragraph::align_mask);
  if (e->lindent!=XMLParser::ElemFmt::NOCHG) {
    if (e->lindent<0)
      cfmt.lindent-=e->lindent;
    else
      cfmt.lindent=e->lindent;
  }
  if (e->rindent!=XMLParser::ElemFmt::NOCHG) {
    if (e->rindent<0)
      cfmt.rindent-=e->rindent;
    else
      cfmt.rindent=e->rindent;
  }
  if (e->findent!=XMLParser::ElemFmt::NOCHG) {
    if (e->findent<0)
      cfmt.findent-=e->findent;
    else
      cfmt.findent=e->findent;
  }
}

const wchar_t *GetAttr(const wchar_t **attr,const wchar_t *name,const wchar_t *def=NULL)
{
  while (*attr) {
    if (!wcscmp(name,*attr))
      return attr[1];
    attr+=2;
  }
  return def;
}

void XMLParserImp::StartElement(const wchar_t *ns_name,const wchar_t **attr) {
  if (!m_ps->root_element) { // special case for root
    const wchar_t *p = ns_name;
    const wchar_t *q = FB_NS;
    const wchar_t *e = q + FB_NS_LEN - 1;
    while (*p && q < e)
      if (*p++ != *q++)
	longjmp(m_ps->jout,ERR_NOTFB2);
    if (!p[0] || wcscmp(p+1,L"|FictionBook")!=0)
      longjmp(m_ps->jout,ERR_NOTFB2);
    m_ps->root_element=true;
  }
  ElemFmt	*elem=LookupElem(ns_name);
  if (elem->flags&ElemFmt::DOCUMENT) {
    if (m_docs.GetSize()==0)
      AddQ(m_ps->start);
    Document  d;
    d.start=m_pp.GetSize();
    d.length=0;
    d.name=GetAttr(attr,L"name",L"");
    m_docs.Add(d);
    m_ps->start=0;
    // add a cover page if it was present
    if (m_docs.GetSize()==1 && m_cover.GetLength()>0) {
      AddImage(m_cover,m_ps->start,m_ps->cfmt);
      m_ps->start+=ImageLoader::IMAGE_VSIZE;
    }
  }
  if (elem->flags&ElemFmt::ENABLE)
    ++m_ps->enable;
  if (elem->flags&ElemFmt::SECTION)
    m_ps->section_nest++;
  if (elem->flags&ElemFmt::ELINE)
    AddQ(m_ps->start);
  if (elem->flags&ElemFmt::FMT) // apply formatting
    m_ps->ApplyFmt(elem);
  if (elem->flags&ElemFmt::STYLE) {
    const wchar_t   *style=GetAttr(attr,L"name");
    void	    *val;
    if (style && m_ps->stylemap->Lookup(style,val))
      m_ps->ApplyFmt(&m_ps->styles->operator[]((int)val));
    else
      m_ps->PushA();
  }
  if (elem->flags&ElemFmt::HEADER) {
    m_ps->PushA();
    m_ps->cfmt.flags |= Paragraph::header;
  }
  if (elem->flags&ElemFmt::STYLESHEET) {
    const wchar_t *type=GetAttr(attr,L"type");
    if (m_ps->in_stylesheet || (type && !wcscmp(type,HR_STYLE)))
      if (!m_ps->in_stylesheet++)
	m_sps->Init();
  }
  if (elem->flags&ElemFmt::TITLE) // start a toc entry
    m_ps->title_start=m_pp.GetSize();
  if (elem->flags&ElemFmt::LINKDEST) { // link destination
    const wchar_t *id=GetAttr(attr,L"id");
    if (id) {
      wchar_t *copy=m_buffer.Append(id,wcslen(id)+1);
      m_references.Add(copy,(void*)m_pp.GetSize());
    }
  }
  if (elem->flags&ElemFmt::LINK) { // link
    const wchar_t   *dest=GetAttr(attr,XLINK_NS L"|href");
    const wchar_t   *type=GetAttr(attr,L"type");
    ElemFmt	    *linkformat;
    if (type && !wcscmp(type,L"note"))
      linkformat=LookupElem(L"|>footnote");
    else
      linkformat=LookupElem(L"|>link");
    m_ps->ApplyFmt(linkformat);
    if (dest) {
      m_ps->link_start=m_ps->len;
      m_ps->link_name=m_buffer.Append(dest,wcslen(dest)+1);
    } else
      m_ps->link_name=NULL;
  }
  if ((m_ps->enable && elem->flags&ElemFmt::PARA) ||
      elem->flags&ElemFmt::DESCCAT)
  { // start a new paragraph
    m_ps->attr&=~(LEADSP|TRAILSP);
    m_ps->acch_lev++;
    m_ps->pf_start=m_frags.GetSize();
    m_ps->pl_start=m_links.GetSize();
    m_ps->numfrags=0;
    m_ps->len=0;
    m_ps->last_frag_trailsp=false;
    m_ps->last_frag_fmt=m_ps->cfmt.attr;
  }
  if (elem->flags&(ElemFmt::DESCCAT|ElemFmt::DESCITEM)) {
    Frag	  f;
    if (elem->flags&ElemFmt::DESCITEM) { // add a few nbsps
      f.attr=m_ps->Att()|REALLYLOCAL;
      f.len=2;
      f.local[0]=f.local[1]=0xa0; // nbsp
      m_frags.Add(f);
      ++m_ps->numfrags;
      m_ps->len+=f.len;
    }
    // add an element name
    m_ps->ApplyFmt(LookupElem(L">keyword"));
    const wchar_t   *ln=LocalName(ns_name);
    f.len=wcslen(ln);
    f.attr=m_ps->Att()|LOCAL;
    f.str=m_buffer.Append(ln,f.len);
    m_frags.Add(f);
    ++m_ps->numfrags;
    m_ps->len+=f.len;
    m_ps->PopA();
    if (elem->flags&ElemFmt::DESCITEM && attr && *attr) { // print attributes as well
      // add " ("
      f.attr=m_ps->Att()|REALLYLOCAL;
      f.len=2;
      f.local[0]=L' '; f.local[1]=L'(';
      m_frags.Add(f);
      ++m_ps->numfrags;
      m_ps->len+=f.len;
      // iterate over attrbutes
      while (*attr) {
	//attr name
	f.attr=m_ps->Att()|LOCAL;
	ln=LocalName(*attr);
	f.len=wcslen(ln);
	f.str=m_buffer.Append(ln,f.len);
	m_frags.Add(f);
	++m_ps->numfrags;
	m_ps->len+=f.len;
	// "="
	f.attr=m_ps->Att()|REALLYLOCAL;
	f.len=1;
	f.local[0]=L'=';
	m_frags.Add(f);
	++m_ps->numfrags;
	++m_ps->len;
	// value
	f.attr=m_ps->Att()|LOCAL;
	f.len=wcslen(attr[1]);
	f.str=m_buffer.Append(attr[1],f.len);
	m_frags.Add(f);
	++m_ps->numfrags;
	m_ps->len+=f.len;
	attr+=2;
      }
      // add ")"
      f.attr=m_ps->Att()|REALLYLOCAL;
      f.len=1;
      f.local[0]=L')';
      m_frags.Add(f);
      ++m_ps->numfrags;
      m_ps->len+=f.len;
    }
    if (elem->flags&ElemFmt::DESCCAT) {
      // if last frag is a trailsp, then discard the last space
      if (m_ps->last_frag_trailsp) {
	m_ps->len--;
	m_frags[m_frags.GetSize()-1].attr&=~TRAILSP;
      }
      AddP(m_ps->pf_start,m_ps->pl_start,m_ps->start,m_ps->len,m_ps->cfmt);
      m_ps->start+=m_ps->len;
      m_ps->acch_lev--;
    } else { // add a ": "
      f.attr=m_ps->Att()|REALLYLOCAL;
      f.len=2;
      f.local[0]=L':'; f.local[1]=L' ';
      m_frags.Add(f);
      ++m_ps->numfrags;
      m_ps->len+=f.len;
    }
  }
  if (elem->flags&ElemFmt::BINARY) {
    const wchar_t *id=GetAttr(attr,L"id");
    const wchar_t *type=GetAttr(attr,L"content-type");
    if (id && type) {
      m_ps->binary=1;
      Binary  *b=m_binarystorage.Get();
      b->id=m_buffer.Append(id,wcslen(id)+1);
      b->type=m_buffer.Append(type,wcslen(type)+1);
      b->startfrag=m_frags.GetSize();
      b->numfrags=0;
      m_binaries.Add(b->id,(void*)b);
    }
  }
  if (elem->flags&ElemFmt::IMAGE) {
    const wchar_t *href=GetAttr(attr,XLINK_NS L"|href");
    if (href && href[0]==L'#') {
      if (m_ps->acch_lev) { // inline image
	const wchar_t *hcopy=m_buffer.Append(href+1,wcslen(href));
	int	      index=m_inline_images.GetSize();
	m_inline_images.Add(hcopy);
	PushWS();
	if (m_ps->attr&LEADSP)
	  ++m_ps->len;
	Frag    f;
	m_ps->cfmt.attr.img=1;
	f.attr=m_ps->Att()|PE_IMAGE;
	m_ps->cfmt.attr.img=0;
	f.len=1;
	f.local[0]=index;
	f.local[1]=index>>16;
	m_frags.Add(f);
	++m_ps->numfrags;
	++m_ps->len;
	m_ps->last_frag_trailsp=0;
	m_ps->attr&=~(LEADSP|TRAILSP);
	m_ps->last_frag_fmt=m_ps->cfmt.attr;
      } else {
	AddImage(href+1,m_ps->start,m_ps->cfmt);
	// image virtual size is always the same
	m_ps->start+=ImageLoader::IMAGE_VSIZE;

	if (m_docs.GetSize() == 0)
	  m_cover = href+1;
      }
    }
  }
}

void XMLParserImp::EndElement(const wchar_t *ns_name) {
  ProgSetCur(XML_GetCurrentByteIndex(m_parser));
  ElemFmt   *elem=LookupElem(ns_name);

  if (m_ps->enable && elem->flags&ElemFmt::PARA) { // end a paragraph
    // if last frag is a trailsp, then discard the last space
    if (m_ps->last_frag_trailsp) {
      m_ps->len--;
      m_frags[m_frags.GetSize()-1].attr&=~TRAILSP;
    }
    AddP(m_ps->pf_start,m_ps->pl_start,m_ps->start,m_ps->len,m_ps->cfmt);
    m_ps->start+=m_ps->len;
    m_ps->acch_lev--;
  }
  if (elem->flags&ElemFmt::HEADER)
    m_ps->PopA();
  if (elem->flags&ElemFmt::STYLE)
    m_ps->PopA();
  if (elem->flags&ElemFmt::STYLESHEET && m_ps->in_stylesheet)
      --m_ps->in_stylesheet;
  if (elem->flags&ElemFmt::FMT) // apply formatting
    m_ps->PopA();
  if (elem->flags&ElemFmt::LINK) { // link
    if (m_ps->link_name) {
      XMLParserImp::Link	link;
      link.start=m_ps->link_start;
      link.length=m_ps->len-m_ps->link_start;
      link.target=m_ps->link_name;
      m_links.Add(link);
    }
    m_ps->PopA();
  }
  if (elem->flags&ElemFmt::SECTION) // snag title attribute
    m_ps->section_nest--;
  if (elem->flags&ElemFmt::SPACE) // add spaces after this element
    m_ps->attr|=LEADSP;
  if (elem->flags&ElemFmt::TITLE) {
    if (m_docs.GetSize()>0 && m_ps->section_nest)
      AddToc(
	FilePos(
	  m_ps->title_start-m_docs[m_docs.GetSize()-1].start,
	  m_pp.GetSize()-m_ps->title_start,
	  m_docs.GetSize()-1
	),
	m_ps->section_nest
      );
  }
  if (elem->flags&ElemFmt::AELINE) // add an empty line after the element
    AddQ(m_ps->start);
  if (elem->flags&ElemFmt::ENABLE)
    --m_ps->enable;
  if (elem->flags&ElemFmt::DOCUMENT) {
    if (m_docs.GetSize()>0)
      m_docs[m_docs.GetSize()-1].length=
	m_pp.GetSize()-m_docs[m_docs.GetSize()-1].start;
    AddQ(m_ps->start);
  }
  if (elem->flags&ElemFmt::BINARY && m_ps->binary) {
    m_ps->binary=0;
    int	  idx=m_binarystorage.GetSize()-1;
    m_binarystorage[idx].numfrags=m_frags.GetSize()-m_binarystorage[idx].startfrag;
  }
}

void XMLParserImp::CharData(const wchar_t *text,int len) {
  if (m_ps->acch_lev && len) {
    int l=normalized_length(text,len);
    if (!l) { // whitespace frag, try to add a leading space to the next frag
      if (m_ps->numfrags) {
	m_ps->attr|=LEADSP;
	m_ps->last_frag_fmt=m_ps->cfmt.attr;
      }
      return;
    }

    PushWS();

    if (*text<=32 && m_ps->numfrags)
      m_ps->attr|=LEADSP;
    if (text[len-1]<=32) {
      m_ps->attr|=TRAILSP;
      m_ps->len++;
    }
    m_ps->len+=l;
    if (m_ps->attr&LEADSP)
      m_ps->len++;
    // here we check the previous frag and if it has TRAILSP and this has a LEADSP,
    // and their charformats are the same, then we can discard current LEADSP
    if (m_ps->numfrags && m_ps->last_frag_trailsp && m_ps->attr&LEADSP &&
	m_ps->last_frag_fmt==m_ps->cfmt.attr)
    {
      m_ps->attr&=~LEADSP;
      m_ps->len--;
    }
    Frag    f;
    f.attr=m_ps->Att();
    if (l<5 || XML_IsExpanding(m_parser)) { // cache short fragments
      wchar_t	*buf;
      if (l <= sizeof(f.local)/sizeof(wchar_t)) {
	f.attr|=REALLYLOCAL;
	buf=f.local;
      } else {
	f.attr|=LOCAL;
	buf=m_buffer.Get(l);
      }
      f.str=buf;
      f.len=l;
      normalize_space(buf,l,text,len);
    } else {
      f.len=XML_GetCurrentByteCount(m_parser);
      f.fpos=XML_GetCurrentByteIndex(m_parser);
    }
    m_frags.Add(f);
    ++m_ps->numfrags;
    m_ps->last_frag_trailsp=(m_ps->attr&TRAILSP)!=0;
    m_ps->attr&=~(LEADSP|TRAILSP);
    m_ps->last_frag_fmt=m_ps->cfmt.attr;
  }
  if (m_ps->in_stylesheet && len)
    ParseStylesheet(text,len);
  if (m_ps->binary && len && !iswhitespace(text,len)) {
    Frag  f;
    f.attr=m_ps->attr&CDATA; // text attrs are not needed in this context
    if (XML_IsExpanding(m_parser)) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -