📄 xmlpullparseres.c
字号:
#include "XmlPullParserES.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#define AttributeType "CDATA"
#define PROPERTY_XMLDECL_VERSION "xmldecl-version"
#define PROPERTY_XMLDECL_STANDALONE "xmldecl-standalone"
#define PROPERTY_XMLDECL_CONTENT "xmldecl-content"
//char *str=NULL;
char print_char[3]="\\n";
//extend
char **arr;
char **arr2;
char **arr3;
char **carr;
int *iarr1;
int *iarr2;
int *iarr;
char** newNamespacePrefix;
char** newNamespaceUri ;
char**newEntityName;
char** newEntityNameBuf;
char** newEntityReplacement ;
char** newEntityReplacementBuf;
// features are not resetable and typicaly defaults to true ...
int Namespacesprocess=true;
// global parser state
int lineNumber;
int columnNumber;
int seenRoot;
int reachedEnd;
enum EventTypes eventType;
int emptyElementTag;
// element stack
int level;
char **elRawName;
int *elRawNameEnd;
int * elRawNameLine;
char **elName;
char **elPrefix;
char **elUri;
int elStackSize;
int *elNamespaceCount;
// attribute stack
int attributeCount;
int attrPosSize;
char **attributeName;
char **attributePrefix;
char **attributeUri;
char **attributeValue;
// namespace stack
int namespaceSize;
int namespaceEnd;
char **namespacePrefix;
char **namespaceUri;
// entity replacement stack
int entityEnd;
int entitySize;
char **entityName;
char **entityReplacement;
char **entityNameBuf;
char **entityReplacementBuf;
// input buffer management
char *reader_ptr;
int reader_size;
char *inputEncoding;
int bufLoadFactor;
char *buf;
int bufSoftLimit;
int preventBufferCompaction;
int bufAbsoluteStart;
int bufStart;
int bufEnd;
int pos;
int posStart;
int posEnd;
char *pc;
int pcStart;
int pcEnd;
// parsing state
int usePC;
int seenStartTag;
int seenEndTag;
int pastEndTag;
int seenAmpersand;
int seenMarkup;
int seenDocdecl;
// transient variable set during each call to next/Token()
int tokenize;
char *text;
char *entityRefName;
char *xmlDeclVersion;
char *xmlDeclContent;
int xmlDeclStandalone;
char *charRefOneCharBuf;
const static char emptyString[]= "";
const static char VERSION[]= "version";
const static char NCODING[] = "ncoding";
const static char TANDALONE[] = "tandalone";
const static char YES[] = "yes";
const static char NO[] = "no";
static const int READ_CHUNK_SIZE = 6 * 1024;
void arraycopy(char* src_, int src_position_, char* dst_, int
dst_position_, int len_)
{
memcpy(dst_+dst_position_,src_+src_position_, len_);
}
void stringcopy(char* src_, int src_position_, char* dst_, int
dst_position_, int len_)
{
// char temparr[dst_position_+len_+1];
char *temparr ;
temparr= dst_ ;
if (dst_position_!=0){
memcpy(temparr,dst_,dst_position_);
memcpy(temparr+dst_position_,src_+src_position_, len_);
*(temparr+dst_position_+len_)='\0';
}else{
memcpy(temparr,src_+src_position_, len_);
*(temparr+len_)='\0';
}
// dst_=temparr;
//printf("\nstringcopy : dst_=%s",dst_) ;
}
int nextImpl()
{ char *str=NULL;
char ch;
int hadCharData = false;
int needsMerging = false;
int cdStart, cdEnd, cdLen, oldStart, oldEnd, i;
char* resolvedEntity;
text = NULL;
pcEnd = pcStart = 0;
usePC = false;
bufStart = posEnd;
if (pastEndTag) {
pastEndTag = false;
elRawName[level]=NULL;
namespaceEnd = elNamespaceCount[level];
--level;
}
if (emptyElementTag) {
emptyElementTag = false;
pastEndTag = true;
return eventType = END_TAG;
}
// document ::= prolog element Misc*
if (level <1) {
if (seenRoot) {
return parseEpilog();
} else {
return parseProlog();
}
} else {/*解析根元素部分(level >=1)*/
if (seenStartTag) {
seenStartTag = false;
return eventType = parseStartTag();
}
if (seenEndTag) {
seenEndTag = false;
return eventType = parseEndTag();
}
/* ASSUMPTION: on first character of content or markup!*/
/* content ::= CharData? ((element | Reference | CDSect | PI | Comment
) CharData?)* */
if (seenMarkup) {
seenMarkup = false;
ch = '<';
} else if (seenAmpersand) {
seenAmpersand = false;
ch = '&';
} else {
ch = more();
}
posStart = pos - 1; /* this is correct start of event! */
LOOP:
while (true) {
// work on MARKUP
switch (ch ) {
case '<' : {
if (hadCharData && tokenize) {
seenMarkup = true;
return eventType = TEXT;
}
ch = more();
switch (ch ) {
case '/': {
if (!tokenize && hadCharData) {
seenEndTag = true;
return eventType = TEXT;
}
return eventType = parseEndTag();
}
case '!' : {
ch = more();
if (ch == '-') {
parseComment();
if (tokenize) return eventType = COMMENT;
if (!usePC && hadCharData) {
needsMerging = true;
} else {
posStart = pos; //completely ignore comment
}
} else if (ch == '[') {
/* must remeber previous posStart/End as it merges with content of CDATA*/
parseCDSect(hadCharData);
if (tokenize) return eventType = CDSECT;
cdStart = posStart;
cdEnd = posEnd;
cdLen = cdEnd - cdStart;
if (cdLen > 0) { /* was there anything inside CDATA section?*/
if (!usePC) {
hadCharData = true;
needsMerging = true;
}
}
} else {
fprintf(stderr,"unexpected character in markup %s", printable(ch));
return -1;
}
break;
}
case '?': {
parsePI();
if (tokenize) return eventType = PROCESSING_INSTRUCTION;
if (!usePC && hadCharData) {
needsMerging = true;
} else {
posStart = pos; //completely ignore PI
}
break;
}
default:{
if (isNameStartChar(ch)) {
if (!tokenize && hadCharData) {
seenStartTag = true;
return eventType = TEXT;
}
return eventType = parseStartTag();
} else {
fprintf(stderr,"unexpected character in markup %s", printable(ch));
return -1;
}
}
}
break;
}
case '&': {//work on ENTITY
if (tokenize && hadCharData) {
seenAmpersand = true;
return eventType = TEXT;
}
oldStart = posStart + bufAbsoluteStart;
oldEnd = posEnd +bufAbsoluteStart;
resolvedEntity = parseEntityRef();
if (tokenize) return eventType = ENTITY_REF;
// check if replacement text can be resolved !
if (resolvedEntity == NULL) {
if (entityRefName == NULL) {
entityRefName=(char*)malloc(posEnd - posStart);
stringcopy(buf,posStart,entityRefName,0,posEnd - posStart);
}
fprintf(stderr,"could not resolve entity named '%s'", entityRefName);
return -1;
}
posStart = oldStart - bufAbsoluteStart;
posEnd = oldEnd - bufAbsoluteStart;
if (!usePC) {
if (hadCharData) {
joinPC();
needsMerging = false;
} else {
usePC = true;
pcStart = pcEnd = 0;
}
}
/* write into PC replacement text - do merge for replacement text!*/
for (i = 0; i < strlen(resolvedEntity); i++) {
if (pcEnd >= strlen(pc)) ensurePC(pcEnd);
pc[pcEnd++] = resolvedEntity[i];
}
break;
}
default: {/*work on character data */
if (needsMerging) {
joinPC();
needsMerging = false;
}
/* CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)*/
hadCharData = true;
do {
nomalizeInput() ;
ch = more();
} while (ch != '<' && ch != '&');
posEnd = pos - 1;
goto LOOP; // skip ch = more() from below
}
}
ch = more();
} // endless while(true)
}
}
int next()
{
tokenize = false;
return nextImpl();
}
int nextToken()
{
tokenize = true;
return nextImpl();
}
void nomalizeInput()
{/* deal with normalization issues ...*/
char ch;
int normalizedCR = false;
if (ch == '\r') {
normalizedCR = true;
posEnd = pos - 1;
if (!usePC) {
if (posEnd > posStart) {
joinPC();
} else {
usePC = true;
pcStart = pcEnd = 0;
}
} //assert usePC == true;
if ((unsigned)pcEnd >= strlen(pc)) ensurePC(pcEnd);
pc[pcEnd++] = '\n';
} else if (ch == '\n') {
if (!normalizedCR && usePC) {
if ((unsigned)pcEnd >= strlen(pc)) ensurePC(pcEnd);
pc[pcEnd++] = '\n';
}
normalizedCR = false;
} else {
if (usePC) {
if ((unsigned)pcEnd >= strlen(pc)) ensurePC(pcEnd);
pc[pcEnd++] = ch;
}
}
}
int parseProlog()
{
/*prolog: ::= XMLDecl? Misc* (doctypedecl Misc*)? and find root element */
char ch;
int gotS ;
if (seenMarkup) {
ch = buf[pos - 1];
} else {
ch = more();
}
seenMarkup = false;
posStart = pos - 1;
gotS = false;
while (true) {
/* deal with Misc: Misc ::= Comment | PI | S */
/* deal with docdecl --> mark it! else parseStartTag seen <[^/] */
if (ch == '<') {
if (gotS && tokenize) {
posEnd = pos - 1;
seenMarkup = true;
return eventType = IGNORABLE_WHITESPACE;
}
ch = more();
switch (ch){
case '?': {
// check if it is 'xml'-- deal with XMLDecl
if (parsePI()) { // make sure to skip XMLDecl
if (tokenize) {
return eventType = PROCESSING_INSTRUCTION;
}
} else {// skip over - continue tokenizing
posStart = pos;
gotS = false;
} break;
}
case '!':{
ch = more();
if (ch == 'D') {
if (seenDocdecl) {
fprintf(stderr,"only one docdecl allowed in XML document");
return -1;
}
seenDocdecl = true;
parseDocdecl();
if (tokenize) return eventType = DOCDECL;
} else if (ch == '-') {
parseComment();
if (tokenize) return eventType = COMMENT;
} else {
fprintf(stderr, "unexpected markup <!%s", printable(ch));
return -1;
} break;
}
case '/': {
fprintf(stderr, "expected start tag name and not %s", printable(ch));
return -1;
}
default:{
if (isNameStartChar(ch)) {
seenRoot = true;
return parseStartTag();
} else {
fprintf(stderr, "expected start tag name and not %s", printable(ch));
return -1;
}
}
}
}else if (isS(ch)) {
gotS = true;
if (tokenize) nomalizeInput();
} else {
fprintf(stderr, "expected start tag name and not %s", printable(ch));
return -1;
}
ch = more();
}
}
int parseEpilog()
{ // epilog: Misc*
int gotS = false;
char ch;
if (eventType == END_DOCUMENT) {
fprintf(stderr, "already reached end of XML input");
return -1;
}
if (reachedEnd) {
return eventType = END_DOCUMENT;
}
if (seenMarkup) {
ch = buf[pos - 1];
} else {
ch = more();
}
seenMarkup = false;
posStart = pos - 1;
while (true) {
// deal with Misc
// Misc ::= Comment | PI | S
if (ch == '<') {
if (gotS && tokenize) {
posEnd = pos - 1;
seenMarkup = true;
return eventType = IGNORABLE_WHITESPACE;
}
ch = more();
switch (ch ){
case '?': {/* check if it is 'xml'--deal with XMLDecl*/
parsePI();
if (tokenize) return eventType = PROCESSING_INSTRUCTION;
break;
}
case '!':{
ch = more();
if (ch == 'D') {
parseDocdecl();
if (tokenize) return eventType = DOCDECL;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -