⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmlparser.cpp

📁 俄罗斯人开发的大名鼎鼎的Pocket Pc 阅读器haaliread的源代码,visual c
💻 CPP
📖 第 1 页 / 共 4 页
字号:
/*
 * Copyright (c) 2001,2002,2003 Mike Matsnev.  All Rights Reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice immediately at the beginning of the file, without modification,
 *    this list of conditions, and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Absolutely no warranty of function or purpose is made by the author
 *    Mike Matsnev.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 * $Id: XMLParser.cpp,v 1.106.2.12 2004/10/13 13:50:55 mike Exp $
 * 
 */

#include <afxwin.h>
#include <afxtempl.h>
#include <setjmp.h>

#include "FastArray.h"
#include "TextParser.h"
#include "XMLParser.h"
#include "TextViewNG.h"
#include "Unicode.h"
#include "StrBuf.h"
#include "WMap.h"
#include "Image.h"

#include "xscanf.h"

#include "expat.h"

#define	FB_NS	  L"http://www.gribuser.ru/xml/fictionbook/2.0"
#define	FB_NS_LEN (sizeof(FB_NS)/sizeof(wchar_t)-1)
#define	XLINK_NS  L"http://www.w3.org/1999/xlink"
#define	HR_STYLE  L"text/css"

enum {
  CDATA=0x1000000,
  LEADSP=0x2000000,
  LOCAL=0x4000000,
  REALLYLOCAL = 0x8000000,
  TRAILSP = 0x10000000,

  // PE flags
  PE_IMAGE=0x80000000,
};
enum { MAX_CONTENTS_LEN=80 };

enum {
  ERR_NOTFB2=1
};

struct CFMT {
  Attr	  attr;
  int	  lindent;
  int	  rindent;
  int	  findent;
  BYTE	  flags;
};

struct CachedImage {
  CachedImage	      *next;
  HBITMAP	      hBmp;
  int		      width;
  int		      height;
  int		      maxwidth;
  int		      maxheight;
  int		      rotation;
  const wchar_t     *name;
  CachedImage() : hBmp(NULL), name(NULL), next(NULL) { }
  void	      Release() { if (hBmp) DeleteObject(hBmp); name=NULL; hBmp=NULL; }
};

class ImageCache {
  CachedImage	      *m_head,*m_tail;
  int		      m_count;
  int		      m_max;
public:
  ImageCache(int max=4) : m_head(NULL), m_tail(NULL), m_count(0), m_max(max) { }
  ~ImageCache() { RemoveAll(); }

  CachedImage	      *Lookup(const wchar_t *name,bool& alloc);
  void		      Remove(CachedImage *img);
  void		      RemoveAll() {
    while (m_head) {
      CachedImage *next=m_head->next;
      m_head->Release();
      delete m_head;
      m_head=next;
    }
    m_count=0;
    m_head=m_tail=NULL;
  }
};

CachedImage   *ImageCache::Lookup(const wchar_t *name,bool& alloc) {
  CachedImage	*img;

  alloc=false;
  for (img=m_head;img;img=img->next)
    if (!wcscmp(name,img->name))
      return img;

  alloc=true;
  // not found, try to allocate new
  if (m_count<m_max) {
    img=new CachedImage;
    ++m_count;
    img->name=name;
    if (m_tail) {
      m_tail->next=img;
      m_tail=img;
    } else
      m_head=m_tail=img;
    return img;
  }
  // too many entries, reuse head
  ASSERT(m_head!=NULL);
  img=m_head;
  if (m_head!=m_tail)
    m_head=m_head->next;
  img->Release();
  img->name=name;
  img->next=NULL;
  if (img!=m_tail)
    m_tail->next=img;
  m_tail=img;
  return img;
}

void  ImageCache::Remove(CachedImage *img) {
  img->Release();
  if (img==m_head) {
    m_head=m_head->next;
    if (img==m_tail)
      m_tail=NULL;
  } else { // tough, will have to traverse the list
    CachedImage	*tmp=NULL;
    for (tmp=m_head;tmp;tmp=tmp->next)
      if (tmp->next==img)
	break;
    tmp->next=img->next;
    if (img==m_tail)
      m_tail=tmp;
  }
  delete img;
  --m_count;
}

class XMLParserImp: public XMLParser {
public:

  struct SP_State {
    enum {
      START,
      NAME,
      FLAGS,
      FM,LM,RM,SIZE,COLOR
    };
    wchar_t				stylename[128];
    int					stylenameptr;
    ElemFmt				format;
    int					state;
    int					num;
    bool				sign;
    void				Init() { stylenameptr=0; format.Clear(); state=START; }
    void				NAdd(wchar_t ch) {
      if (stylenameptr<sizeof(stylename)/sizeof(wchar_t)-1)
	stylename[stylenameptr++]=ch;
    }
  };

  struct ParseState {
    enum { MAX_NEST=64 };
    int					len; // current len
    int					start;
    DWORD				attr;
    Attr				last_frag_fmt;
    bool				last_frag_trailsp;
    bool				root_element;
    CFMT				cfmt;
    CFMT				attr_stack[MAX_NEST];
    int					attr_stack_ptr;
    int					acch_lev,in_stylesheet;
    int					enable;
    int					section_nest;
    int					title_start;
    int					link_start;
    const wchar_t			*link_name;
    int					pf_start,pl_start,numfrags;
    int					binary;
    FmtArray				*styles;
    WMap				*stylemap;
    jmp_buf				jout;

    void PushA() {
      if (attr_stack_ptr<MAX_NEST)
	attr_stack[attr_stack_ptr++]=cfmt;
    }
    void PopA() {
      if (attr_stack_ptr>0)
	cfmt=attr_stack[--attr_stack_ptr];
    }
    DWORD Att() { return attr|cfmt.attr.wa; }
    void  ApplyFmt(ElemFmt *e,int nest=0);
  };

  struct Frag {    // smallest element - character data
    union {
      DWORD	    fpos;    // offset into the file
      const wchar_t *str;    // pointer to a cached value
      wchar_t	    local[2]; // cached right here
    };
    DWORD   len;    // raw char count
    DWORD   attr;   // attributes of this run
  };
  struct PE { // paragraph
    enum {
      FRAGBITS=10,
      MAXFRAGS=1<<FRAGBITS,
      FRAGSHIFT=32-FRAGBITS,
      IDXMASK=(1<<FRAGSHIFT)-1
    };
    union {
      DWORD		  idx_nf; // offset into m_frags
      const wchar_t	  *name; // on an image name
    };
    int		  start; // start of parsed paragraph
    DWORD	  linkidx_nl; // offset into m_links
    DWORD	  indent; // left, right and first line indentation
    DWORD	  flags;

    DWORD	  nfrags() { return idx_nf>>FRAGSHIFT; }
    DWORD	  idx() { return idx_nf&IDXMASK; }

    DWORD	  nlinks() { return linkidx_nl>>FRAGSHIFT; }
    DWORD	  lidx() { return linkidx_nl&IDXMASK; }

    DWORD	  li() { return (indent>>10)&0x2ff; }
    DWORD	  ri() { return indent&0x2ff; }
    DWORD	  fi() { return (indent>>20)&0x2ff; }

    void	  setidx_nf(DWORD idx,DWORD nf) { idx_nf=(idx&IDXMASK)|(nf<<FRAGSHIFT); }
    void	  setidx_nl(DWORD idx,DWORD nf) { linkidx_nl=(idx&IDXMASK)|(nf<<FRAGSHIFT); }
    void	  setindent(DWORD l,DWORD r,DWORD f) { indent=((f&0x2ff)<<20)|((l&0x2ff)<<10)|(r&0x2ff); }
    void	  Zero() { memset(this,0,sizeof(*this)); }
  };

  struct Document { // subdocument
    int	    start; // start paragraph
    int	    length; // length in paragraphs
    CString name;
  };

  struct Link {
    int		  start;
    int		  length;
    const wchar_t *target;
  };

  struct Binary {
    wchar_t	      *id;
    wchar_t	      *type;
    int		      numfrags;
    int		      startfrag;
  };

  friend class Base64BinReader;

  FastArray<Frag>	m_frags;
  FastArray<PE>		m_pp;
  FastArray<Link>	m_links;
  FastArray<Binary>	m_binarystorage;
  FastArray<const wchar_t *> m_inline_images;
  StrBuf		m_buffer;
  XML_Parser		m_parser;
  CArray<Document,Document&>	m_docs;
  WMap			m_references;
  WMap			m_binaries;
  ParseState		*m_ps;
  SP_State		*m_sps;
  CString		m_cover;
  ImageCache		m_imcache;

  Paragraph		GetParagraphImp(int idx);

  // paragraphs
  virtual Paragraph	GetParagraph(int docid,int para);
  virtual int		Length(int docid); // in paragraphs
  virtual int		GetPLength(int docid,int para);
  virtual int		GetPStart(int docid,int para);
  virtual int		GetTotalLength(int docid);
  virtual int		LookupParagraph(int docid,int charpos);

  // documents
  virtual int		GetSubDocCount() { return m_docs.GetSize(); }
  virtual CString	GetSubDocName(int docid);

  // links
  virtual bool		LookupReference(const wchar_t *name,FilePos& dest);

  // images
  virtual bool		GetImage(const wchar_t *name,HDC hDC,int maxwidth,
    int maxheight,int rotation,Image& img);
  virtual void		InvalidateImageCache() { m_imcache.RemoveAll(); }

  // construction and destruction
			XMLParserImp(Meter *m,CBufFile *fp,Bookmarks *bmk,
				     HANDLE heap);
  virtual		~XMLParserImp();

  virtual bool		ParseFile(int encoding);

  // paragraphs
  void			AddP(int pstart,int lstart,int start,int len,CFMT& fmt);
  void			AddImage(const wchar_t *href,int start,CFMT& fmt);
  void			AddQ(int start);
  void			AddToc(FilePos pos,int level);
  void			PushWS(); // check for leading spaces/format flags

  // stylesheet
  void			ParseStylesheet(const wchar_t *text,int len);

  // callbacks
  void			StartElement(const wchar_t *name,const wchar_t **attr);
  void			EndElement(const wchar_t *name);
  void			CharData(const wchar_t *text,int len);

  // expat callacks
  static void		StartElementCB(void *udata,const wchar_t *name,
				       const wchar_t **attr);
  static void		EndElementCB(void *udata,const wchar_t *name);
  static void		CharDataCB(void *udata,const wchar_t *text,int len);
  static int		UnknownEncodingCB(void *data,const wchar_t *name,
					  XML_Encoding *info);
  static void		StartCDataCB(void *udata);
  static void		EndCDataCB(void *udata);

  // binary access
  ImageLoader::BinReader *OpenBinary(const wchar_t *name,const wchar_t **type,
    const wchar_t **vname);
};

const TCHAR   *XMLParser::ElemFmt::flag_names=_T("apofestcdlqxrivbgh");


XMLParserImp::XMLParserImp(Meter *m,CBufFile *fp,Bookmarks *bmk,HANDLE heap) :
  XMLParser(m,fp,heap,bmk), m_parser(NULL), m_pp(heap), m_frags(heap),
  m_links(heap), m_buffer(heap), m_references(heap), m_binaries(heap),
  m_binarystorage(heap), m_inline_images(heap)
{
}

XMLParserImp::~XMLParserImp() {
  // destroy parser, if any
  if (m_parser)
    XML_ParserFree((XML_Parser)m_parser);
  // destroy image cache if any
}

int	  XMLParserImp::Length(int docid) {
  return docid<0 || docid>=m_docs.GetSize() ? 0 : m_docs[docid].length;
}

#define	  SHY 0xAD

Paragraph XMLParserImp::GetParagraph(int docid,int para) {
  if (docid<0 || docid>=m_docs.GetSize() ||
      para<0 || para>=m_docs[docid].length)
    return Paragraph();
  return GetParagraphImp(m_docs[docid].start+para);
}

CString	XMLParserImp::GetSubDocName(int docid) {
  if (docid<0 || docid>=m_docs.GetSize())
    return CString();
  if (docid==0 && m_docs[docid].name.GetLength()==0)
    return _T("Main");
  return m_docs[docid].name;
}

static int  RClamp(int v,int min,int max) {
  if (v<min)
    return min;
  if (v>max)
    return max;
  return v;
}

Paragraph XMLParserImp::GetParagraphImp(int idx) {
  if (m_pp[idx].flags&PE_IMAGE) { // a very special case
    Paragraph	p(ImageLoader::IMAGE_VSIZE);
    p.flags=(BYTE)m_pp[idx].flags;
    p.lindent=m_pp[idx].li();
    p.rindent=m_pp[idx].ri();
    p.findent=m_pp[idx].fi();
    for (int i=0;i<ImageLoader::IMAGE_VSIZE;++i) {
      p.str[i]=L' ';
      p.cflags[i].wa=0;
    }
    // abuse links for image href
    p.links=Buffer<Paragraph::Link>(1);
    p.links[0].off=0;
    p.links[0].len=ImageLoader::IMAGE_VSIZE;
    p.links[0].target=m_pp[idx].name;
    p.flags|=Paragraph::image;
    return p;
  }
  // here we have to read the paragraphs from file
  int	      len=m_pp[idx+1].start-m_pp[idx].start,np=m_pp[idx].nfrags();
  int	      fragbase=m_pp[idx].idx();
  Paragraph   p(len);
  p.flags=(BYTE)m_pp[idx].flags;
  p.lindent=m_pp[idx].li();
  p.rindent=m_pp[idx].ri();
  p.findent=m_pp[idx].fi();

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -