⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmlpullparseres.c

📁 xml解析器(在niosII环境下用C语言开发的)
💻 C
📖 第 1 页 / 共 5 页
字号:
#include  "XmlPullParserES.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>

#define AttributeType "CDATA"
#define PROPERTY_XMLDECL_VERSION  "xmldecl-version"
#define PROPERTY_XMLDECL_STANDALONE "xmldecl-standalone"
#define PROPERTY_XMLDECL_CONTENT "xmldecl-content"
//char *str=NULL;
char print_char[3]="\\n";
//extend
  char **arr;
  char **arr2;
  char **arr3; 
  char **carr;
  int *iarr1;
  int *iarr2;
  int *iarr;
  char** newNamespacePrefix;
  char** newNamespaceUri ;
  char**newEntityName;
   char** newEntityNameBuf;
   char** newEntityReplacement ;
   char** newEntityReplacementBuf;
// features are not resetable and typicaly defaults to true ...
	int	Namespacesprocess=true;
// global parser state
	int	  lineNumber;
	int	  columnNumber;
	int	  seenRoot;
	int   reachedEnd;
  enum EventTypes eventType;
	int	  emptyElementTag;
	
	// element stack
	int	   level;
	char	 **elRawName;
  int    *elRawNameEnd;
  int	   * elRawNameLine;
  
	char   **elName; 
  char   **elPrefix;
  char   **elUri;
	int	      elStackSize;
	int	    *elNamespaceCount;
  
	// attribute stack
	int		  attributeCount;
	int		  attrPosSize;
 char    **attributeName;
 char    **attributePrefix;
 char    **attributeUri;
 char    **attributeValue;

	// namespace stack
	int		namespaceSize; 
	int		namespaceEnd;
  char    **namespacePrefix;
  char    **namespaceUri;

	// entity replacement stack
	int		entityEnd;
	int		entitySize;
	char  **entityName;
	char    **entityReplacement;
	char	   **entityNameBuf;
	char	   **entityReplacementBuf;
	
	// input buffer management
	char 	*reader_ptr;
	int            reader_size;
	char 	*inputEncoding;
	int		  bufLoadFactor;
	char	       *buf;
	int		  bufSoftLimit; 
	int	         preventBufferCompaction;

	int		bufAbsoluteStart; 
	int		bufStart;
	int		bufEnd;
	int		pos;
	int		posStart;
	int		posEnd;

	char      *pc;
	int		pcStart;
	int		pcEnd;

	// parsing state
	int	usePC;
	int	seenStartTag;
	int	seenEndTag;
	int	pastEndTag;
	int	seenAmpersand;
	int	seenMarkup;
	int	seenDocdecl;

	// transient variable set during each call to next/Token()
	int 	  tokenize;
	char 	*text;
	char 	*entityRefName;
	char 	*xmlDeclVersion;
	char 	*xmlDeclContent;
	int	   xmlDeclStandalone;
	char	*charRefOneCharBuf;
 
const static char emptyString[]= "";
const static char VERSION[]= "version";
const static char NCODING[] = "ncoding";
const static char TANDALONE[] = "tandalone";
const static char YES[] = "yes";
const static char NO[] = "no";

static const int READ_CHUNK_SIZE = 6 * 1024; 

void arraycopy(char* src_, int src_position_, char* dst_, int 
dst_position_, int len_)
{
	memcpy(dst_+dst_position_,src_+src_position_, len_);
}

void stringcopy(char* src_, int src_position_, char* dst_, int 
dst_position_, int len_)
{
// char temparr[dst_position_+len_+1]; 
 char *temparr ;
      temparr= dst_ ;
 if (dst_position_!=0){
    memcpy(temparr,dst_,dst_position_);
    memcpy(temparr+dst_position_,src_+src_position_, len_);
    *(temparr+dst_position_+len_)='\0';
 }else{
    memcpy(temparr,src_+src_position_, len_);
    *(temparr+len_)='\0';
 }
// dst_=temparr;
 //printf("\nstringcopy : dst_=%s",dst_) ;
}


int nextImpl()
{ char *str=NULL;
	char  ch;
	int  hadCharData = false;
	int  needsMerging = false;
	int  cdStart,  cdEnd,  cdLen, oldStart, oldEnd, i;
	char*  resolvedEntity;
	text = NULL;
	pcEnd = pcStart = 0;
	usePC = false;
	bufStart = posEnd;
	if (pastEndTag) {
		pastEndTag = false;
		
    elRawName[level]=NULL;
     namespaceEnd = elNamespaceCount[level]; 
      --level;

 	}
	if (emptyElementTag) {
		emptyElementTag = false;
		pastEndTag = true;
		return eventType = END_TAG;
	}
	// document ::= prolog element Misc*
	if (level <1) {		
		if (seenRoot) {
			return parseEpilog();
		} else {
			return parseProlog();
		}		
	} else {/*解析根元素部分(level >=1)*/

		if (seenStartTag) {
			seenStartTag = false;
			return eventType = parseStartTag();
		}
		if (seenEndTag) {
			seenEndTag = false;
			return eventType = parseEndTag();
		}

		/* ASSUMPTION: on first character of content or markup!*/
		/* content ::= CharData? ((element | Reference | CDSect | PI | Comment
) CharData?)*   */
		if (seenMarkup) {   
			seenMarkup = false;
			ch = '<';
		} else if (seenAmpersand) {
			seenAmpersand = false;
			ch = '&';
		} else {
			ch = more();
		}
		posStart = pos - 1; /* this is correct start of event! */
		LOOP:
		while (true) {
			// work on MARKUP
			switch (ch ) {
				  case  '<' : {
					if (hadCharData && tokenize) {
						seenMarkup = true;
						return eventType = TEXT;
					}
					ch = more();
					switch (ch ) {
				         	case  '/': {
							if (!tokenize && hadCharData) {
							seenEndTag = true;
							return eventType = TEXT;
							}
							return eventType = parseEndTag();
				         	}
						case  '!' : {
							ch = more();
							if (ch == '-') {
								parseComment();
								if (tokenize) return eventType = COMMENT;
								if (!usePC && hadCharData) {
									needsMerging = true;
								} else {
									posStart = pos;  //completely ignore comment
								}
							} else if (ch == '[') {
						/* must remeber previous posStart/End as it merges with content of CDATA*/
									parseCDSect(hadCharData);
									if (tokenize) return eventType = CDSECT;
									cdStart = posStart;
									cdEnd = posEnd;
									cdLen = cdEnd - cdStart;
									if (cdLen > 0) { /* was there anything inside CDATA section?*/
										if (!usePC) {
											hadCharData = true;
											needsMerging = true;
										}
									}
							} else {                
								fprintf(stderr,"unexpected character in markup %s", printable(ch));
						 		return -1;
								}
               break;
						} 
						case  '?': {
							parsePI();
							if (tokenize) return eventType = PROCESSING_INSTRUCTION;
							if (!usePC && hadCharData) {
								needsMerging = true;
							} else {
								posStart = pos;  //completely ignore PI
							}
              break;
						} 
						default:{
							if (isNameStartChar(ch)) {
								if (!tokenize && hadCharData) {
									seenStartTag = true;
									return eventType = TEXT;
								}
								return eventType = parseStartTag();
							} else {                
							 	fprintf(stderr,"unexpected character in markup %s", printable(ch));
								 return -1;
							}
						}
					}
           break;
				  }
			case  '&':  {//work on ENTITY
				if (tokenize && hadCharData) {
					seenAmpersand = true;
					return eventType = TEXT;
				}
				oldStart = posStart + bufAbsoluteStart;
				oldEnd = posEnd +bufAbsoluteStart;
				resolvedEntity = parseEntityRef();
				if (tokenize) return eventType = ENTITY_REF;
				// check if replacement text can be resolved !
				if (resolvedEntity == NULL) {
					if (entityRefName == NULL) {
            entityRefName=(char*)malloc(posEnd - posStart);
            stringcopy(buf,posStart,entityRefName,0,posEnd - posStart);
            }
					fprintf(stderr,"could not resolve entity named '%s'", entityRefName);
					 return -1;
				}
				posStart = oldStart - bufAbsoluteStart;
				posEnd = oldEnd - bufAbsoluteStart;
				if (!usePC) {
					if (hadCharData) {
						joinPC(); 
						needsMerging = false;
					} else {
						usePC = true;
						pcStart = pcEnd = 0;
					}
				}
				/* write into PC replacement text - do merge for replacement text!*/
       			 for (i = 0; i < strlen(resolvedEntity); i++) {         			
						if (pcEnd >= strlen(pc)) ensurePC(pcEnd);
						pc[pcEnd++] = resolvedEntity[i];
				}
        break;
			} 
			default: {/*work on character data */
				if (needsMerging) {
					joinPC();  
					needsMerging = false;
				}
				/* CharData ::=   [^<&]* - ([^<&]* ']]>' [^<&]*)*/
				hadCharData = true;
				do {
					nomalizeInput() ;
					ch = more();
				} while (ch != '<' && ch != '&');
				posEnd = pos - 1;
				goto LOOP;  // skip ch = more() from below 
			}
		}
			ch = more();
		} // endless while(true)
	} 
}	

int next()
{
	tokenize = false;
	return nextImpl();
}

int nextToken()
{
	tokenize = true;
	return nextImpl();
}

void nomalizeInput()
{/* deal with normalization issues ...*/
  char ch;
	int  normalizedCR = false;
	if (ch == '\r') {
		normalizedCR = true;
		posEnd = pos - 1;		
		if (!usePC) {
			if (posEnd > posStart) {
				joinPC();
			} else {
				usePC = true;
				pcStart = pcEnd = 0;
			}
		} //assert usePC == true;
		if ((unsigned)pcEnd >= strlen(pc)) ensurePC(pcEnd); 
            	pc[pcEnd++] = '\n';
	} else if (ch == '\n') {
			if (!normalizedCR && usePC) {
				if ((unsigned)pcEnd >= strlen(pc)) ensurePC(pcEnd);
				pc[pcEnd++] = '\n';
			}
			normalizedCR = false;
	} else {
			if (usePC) {
				if ((unsigned)pcEnd >= strlen(pc)) ensurePC(pcEnd);
				pc[pcEnd++] = ch;
			}
	}
}

int parseProlog()
{
/*prolog: ::= XMLDecl? Misc* (doctypedecl Misc*)? and find root element  */  
	char ch;
	int gotS ;
	if (seenMarkup) {
		ch = buf[pos - 1];
	} else {
		ch = more();
	}
	seenMarkup = false;
	posStart = pos - 1;
	gotS = false;
	while (true) {
		/* deal with Misc:  Misc ::= Comment | PI | S   */
		/* deal with docdecl --> mark it! else parseStartTag seen <[^/]  */
		if (ch == '<') {
			if (gotS && tokenize) {
				posEnd = pos - 1;
				seenMarkup = true;
				return eventType = IGNORABLE_WHITESPACE;
			}
			ch = more();
			switch (ch){
				case '?': {
				// check if it is 'xml'-- deal with XMLDecl
					if (parsePI()) {  // make sure to skip XMLDecl
						if (tokenize) {
					            return eventType = PROCESSING_INSTRUCTION;
						}
					} else {// skip over - continue tokenizing
						posStart = pos;
						gotS = false;
					} break;
				} 
				case '!':{
					ch = more();
					if (ch == 'D') {
						if (seenDocdecl) {
							fprintf(stderr,"only one docdecl allowed in XML document");
							return -1;
						}
						seenDocdecl = true;
						parseDocdecl();
						if (tokenize) return eventType = DOCDECL;
					} else if (ch == '-') {
						parseComment();
						if (tokenize) return eventType = COMMENT;
					} else {            
						fprintf(stderr, "unexpected markup <!%s", printable(ch));
						return -1;
					} break;
				}
				case '/': {          
					fprintf(stderr, "expected start tag name and not %s", printable(ch));
					return -1;
				} 
				default:{
					if (isNameStartChar(ch)) {
						seenRoot = true;
						return parseStartTag();
					} else {            
						fprintf(stderr, "expected start tag name and not %s", printable(ch));
						return -1;
					}
				}
			}
		}else if (isS(ch)) {
			gotS = true;
			if (tokenize)	nomalizeInput();
		} else {  
			fprintf(stderr, "expected start tag name and not %s", printable(ch));
			return -1;
		}
		ch = more();
	}
}

int parseEpilog()
{ // epilog: Misc*
	int gotS = false;	
	char ch;
	if (eventType == END_DOCUMENT) {
		fprintf(stderr, "already reached end of XML input");
		return -1;
	}
	if (reachedEnd) {
		return eventType = END_DOCUMENT;
	}
	if (seenMarkup) {
		ch = buf[pos - 1];
	} else {
		ch = more();
	}
	seenMarkup = false;
	posStart = pos - 1;
	while (true) {
		// deal with Misc
		// Misc ::= Comment | PI | S
		if (ch == '<') {
			if (gotS && tokenize) {
				posEnd = pos - 1;
				seenMarkup = true;
				return eventType = IGNORABLE_WHITESPACE;
			}
			ch = more();
			switch (ch ){
				case '?': {/* check if it is 'xml'--deal with XMLDecl*/
					parsePI();
					if (tokenize) return eventType = PROCESSING_INSTRUCTION;
          break;
				} 
				case  '!':{
					ch = more();
					if (ch == 'D') {
						parseDocdecl();
						if (tokenize) return eventType = DOCDECL;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -