📄 xml.cpp

📁 Using the UnderC Tokenizer Class It s often necessary to parse complex text files, where standard

💻 CPP

字号:

/* XML.CPP.
* demonstrates a very simple XML parser based on the Tokenizer class
 */
#include <iostream>
#include <string>
#include <cctype>
using namespace std;

#include <uc/tokens.h>

/* some useful wrappers around calls to Tokenizer; they throw an
   exception if there's a problem */
class TokenException {
private:
   char* m_err;
public:
  TokenException(char* msg) : m_err(msg) {}
  // override
  const char* what() const
  { return m_err; }
};

char buff[80];

void error_expecting(const char* msg)
{
  throw TokenException(buff);
}

void must_be(Tokenizer& tok, char ch)
{
  TokenType t = tok.next();
  if ((char)t != ch) {
    sprintf(buff,"expecting '%c'\n",ch);
    error_expecting(buff);
  }
}

char* must_be_string(Tokenizer& tok)
{
  TokenType t = tok.next();
  if ((char)t != T_STRING) {
    error_expecting("string");
  }
  return tok.get_str();
}

bool non_blank(char *p)
{
  int knt = 0;
  while (*p) {
    if (! isspace(*p)) knt++;
    p++;
  }
  return knt > 0;
}

bool parse_element(Tokenizer& tok)
{
  char data[512];
  string ename,name,val;
  ename = tok.get_str();
  cout << "elem " << ename << endl;
  TokenType t = tok.next();
  // pick up element attributes, if any
  while (t != '/' && t != '>') {
    if (t != T_TOKEN) error_expecting("word");
    name = tok.get_str();
    must_be(tok,'=');
    val  = must_be_string(tok);
    cout << "attrib " << name << '=' << val << endl;
    t = tok.next();
  }
  if (t == '/') { must_be(tok,'>'); return true; }
  // pick up the element data
  char *p = data;
  char ch;
  while (ch = tok.getch()) {
    if (ch == '<') { // we hit a subelement
      t = tok.next();
      if (t == '/') { // element end tag
         *p = 0;
         p = data;
         if (non_blank(data))
           cout << "data " << data << endl;
         t = tok.next();
         if (t == '>') return true;
         if (t == T_TOKEN && ename == tok.get_str()) {
             if (tok.next() == '>') return true;
         }
         error_expecting(ename.c_str());
      } else {
         parse_element(tok);
      }
    } else
      *p++ = ch;
  }
  return false;
}

bool parse_xml(const char* file)
{
  Tokenizer tok(file);
  tok.set_flags(C_IDEN);
  TokenType t = tok.next();
  try {
    while (t == '<') {
      t = tok.next();
      /* skip meta stuff and commentary */
      if (t == '?' || t == '!') {
         do { t = tok.next(); } while (t != '>');
         t = tok.next(); // move to next '<'
      }
      else return parse_element(tok);
    }
  } catch(TokenException& e) {
    cerr << e.what() << endl;
  }
  return false;
}

int main(int argc, char**argv)
{
  parse_xml(argv[1]);
}

💿 文件大小 17 K

👤 上传用户 anlan001

📂 所属分类编译器/解释器

🏷️ 相关标签

#Tokenizer #necessary #standard #complex

⌨️ 快捷键说明

复制代码 Ctrl + C

搜索代码 Ctrl + F

全屏模式 F11

切换主题 Ctrl + Shift + D

显示快捷键 ?

增大字号 Ctrl + =

减小字号 Ctrl + -