⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmlpullparseres.c

📁 xml解析器(在niosII环境下用C语言开发的)
💻 C
📖 第 1 页 / 共 5 页
字号:
			if (strcmp(name,namespacePrefix[i])==0) {		
        			if (name != NULL)      { tempStr=name;}
				fprintf(stderr, "duplicated namespace declaration for %s prefix", tempStr);
				return -1;
			}
		}
   
		++namespaceEnd;
    
	} else {
		if (!usePC) {
      attributeValue[attributeCount]=(char*)malloc(pos - 1 -posStart);
     stringcopy(buf, posStart,attributeValue[attributeCount],0,pos - 1 -posStart);
     //printf("\nattributeValue[%d]=%s",attributeCount,attributeValue[attributeCount]);
		} else {
      attributeValue[attributeCount]=(char*)malloc(pcEnd - pcStart);
     stringcopy(pc, pcStart,attributeValue[attributeCount],0,pcEnd - pcStart);
    }
		++attributeCount;
    
	}
  //printf("\nname=%s",name);

//for(i=0;i<namespaceEnd;i++) printf("\n----namespacePrefix[%d]=%s",i,namespacePrefix[i]);
	posStart = prevPosStart - bufAbsoluteStart;
	return ch;
}

int parseComment()
{	//ASSUMPTION: seen <!-
	int seenDash = false;
	int seenDashDash = false;
	char ch = more();
	int curLine   = lineNumber;
	int curColumn = columnNumber;	
	
	if (ch != '-'){
		fprintf(stderr,"expected <!-- for comment start");
		return -1;
	}
	if (tokenize) posStart = pos;
	while (true) {
		// scan until it hits -->0 
		ch = more();
		if (seenDashDash && ch != '>') {
			fprintf(stderr,"in comment after two dashes (--) next character must be > not %s", printable(ch));
			fprintf(stderr,"comment started on line %d and column %d was not closed",curLine, curColumn);
			return -1;
		}
		if (ch == '-') {
			if (!seenDash) {
				seenDash = true;
			} else {
				seenDashDash = true;
				seenDash = false;
			}
		} else if (ch == '>') {
			if (seenDashDash) {
				break;  // found end sequence!
			} 
			seenDash = false;
		} else {
			seenDash = false;
			seenDashDash = false;
		}
		if (tokenize) {
			nomalizeInput();
		}
	}
	if (tokenize) {
		posEnd = pos - 3;
		if (usePC) {
			pcEnd -= 2;
		}
	}
  return true;
}

int parsePI()
{	//PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
	//ASSUMPTION: seen <?
	const int curLine = lineNumber;
	const int curColumn = columnNumber;
	int piTargetStart = pos + bufAbsoluteStart;
	int piTargetEnd = -1;
	int seenQ = false;
	int off , len ;
	char ch;
	if (tokenize) posStart = pos;
	while (true) {
		// scan until it hits ?>
		ch = more();
		if (ch == '?') {
			seenQ = true;
		} else if (ch == '>') {
			if (seenQ) {
				break;  // found end sequence!!!!
			}
			seenQ = false;
		} else {
			if (piTargetEnd == -1 && isS(ch)) {
				piTargetEnd = pos - 1 + bufAbsoluteStart;
				/* PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))*/
				if ((piTargetEnd - piTargetStart) == 3) {
					if ((buf[piTargetStart] == 'x' || buf[piTargetStart] == 'X')
						&& (buf[piTargetStart + 1] == 'm' || buf[piTargetStart + 1] == 'M')
						&& (buf[piTargetStart + 2] == 'l' || buf[piTargetStart + 2] == 'L')
					) {
						if (piTargetStart > 3) { 
              /*<?xml is allowed as first characters in input ...*/
			fprintf(stderr,"processing instruction can not have PITarget with reserveld xml name");
			fprintf(stderr,"processing instruction started on line %d and column %d not closed",curLine,curColumn);
							return false;
						} else {
							if (buf[piTargetStart] != 'x'
								&& buf[piTargetStart + 1] != 'm'
								&& buf[piTargetStart + 2] != 'l') {
								fprintf(stderr,	"XMLDecl must have xml name in lowercase");
								fprintf(stderr, "processing instruction started on line %d and column %d was not closed",curLine,curColumn);
								return false;
							}
						}
						parseXmlDecl(ch);
						if (tokenize) posEnd = pos-2;//pos-1
						off = (piTargetStart-bufAbsoluteStart+3);
						len = (pos-2-off);
            xmlDeclContent=(char*)malloc(len);
            stringcopy(buf, off,xmlDeclContent,0,len);      
						return false;
					}
				}
			}
			seenQ = false;
		}
		if (tokenize)  nomalizeInput();
	}
	if (piTargetEnd == -1) {
		piTargetEnd = pos - 2 + bufAbsoluteStart;
	}
	piTargetStart -= bufAbsoluteStart;
	piTargetEnd -= bufAbsoluteStart;
	if (tokenize) {
		posEnd = pos - 2;		
		--pcEnd;		
	}
	return true;
}

int  parseXmlDecl(char ch)
{		
	int versionEnd, versionStart; 
	char quotChar;	
		
	//XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
	//make sure that relative positions will stay correct!
	preventBufferCompaction = true;
	bufStart = 0; 
     //have to keep pos unchnaged during expansion!
     // --- parse VersionInfo
     /* VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum'"')
     // now on first S past <?xml*/
	ch = skipS(ch);
	ch = requireInput(ch, VERSION);
	// Eq ::= S? '=' S?
	ch = skipS(ch);
	if (ch != '=') {
	 fprintf(stderr, "expected equals sign (=) after version and not %s",printable(ch));
		return -1;
	}
	ch = more();
	ch = skipS(ch);
	if (ch != '\'' && ch != '"') {
		fprintf(stderr,"expected apostrophe (') or quotation mark (\") after version and not %s",printable(ch));
		return -1;
	}
	quotChar = ch;	
	versionStart = pos;
	ch = more();
	//VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
	while (ch != quotChar) {
		if ((ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z') && (ch < '0' ||
     ch > '9')&& ch != '_' && ch != '.' && ch != ':' && ch != '-') {     
		fprintf(stderr,"<?xml version value expected to be in ([a-zA-Z0-9_.:] | '-') not %s",printable(ch));
			return -1;
		}
		ch = more();
	}
	versionEnd = pos - 1;
	parseXmlDeclWithVersion(versionStart, versionEnd);
	preventBufferCompaction = false; 
  // alow buffer commpaction again- pos MAY change
  return true;
}

int parseXmlDeclWithVersion(int versionStart_, int versionEnd_)
{
	char quotChar,  ch ;
	int encodingStart, encodingEnd, standaloneStart;
	// check version is "1.1"
	if (((versionEnd_ - versionStart_) != 3)
		|| buf[versionStart_] != '1'
		|| buf[versionStart_ + 1] != '.'
		|| buf[versionStart_ + 2] != '1') {
		fprintf(stderr,"only 1.1 is supported as <?xml version not '");
		return -1;
	}
  xmlDeclVersion=(char*)malloc(versionEnd_ - versionStart_);
  stringcopy(buf, versionStart_,xmlDeclVersion,0,versionEnd_ - versionStart_);
	/*EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )*/
	ch = more();
	ch = skipS(ch);
	if (ch == 'e') {
		ch = more();
		ch = requireInput(ch, NCODING);
		ch = skipS(ch);
		if (ch != '=') {
			fprintf(stderr,"expected equals sign (=) after encoding and not " );
			return -1;
		}
		ch = more();
		ch = skipS(ch);
		if (ch != '\'' && ch != '"') {
			fprintf(stderr,"expected (') or(\") after encoding and not ");
			return -1;
		}
		quotChar = ch;
		encodingStart = pos;
		ch = more();
		/* EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*   */
		if ((ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z')) {
	fprintf(stderr,"<?xml encoding name expected to start with [A-Za-z] not ");
			return -1;
		}
		ch = more();
		while (ch != quotChar) {
			if ((ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z') && (ch < '0' 
     || ch > '9')&& ch != '.' && ch != '_' && ch != '-') {
	fprintf(stderr,"<?xml encoding value expected to be in ([A-Za-z0-9._] |'-') not ");
				return -1;
			}
			ch = more();
		}
		encodingEnd = pos - 1;
		/*reconcile with setInput encodingName  */
    inputEncoding=(char*)malloc(encodingEnd - encodingStart);
    stringcopy(buf, encodingStart,inputEncoding,0,encodingEnd - encodingStart);
		ch = more();
	}
	ch = skipS(ch);
/* SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes'| 'no') '"'))*/
	if (ch == 's') {
		ch = more();
		ch = requireInput(ch, TANDALONE);
		ch = skipS(ch);
		if (ch != '=') {
			fprintf(stderr,"expected equals sign (=) after standalone and not ");
			return -1;
		}
		ch = more();
		ch = skipS(ch);
		if (ch != '\'' && ch != '"') {
fprintf(stderr,"expected apostrophe (') or quotation mark (\") after encoding");
			return -1;
		}
	quotChar = ch;
	standaloneStart = pos;
		ch = more();
		if (ch == 'y') {
			ch = requireInput(ch, YES);			 
			xmlDeclStandalone = true;
		} else if (ch == 'n') {
			ch = requireInput(ch, NO);			
			xmlDeclStandalone = false;
		} else {
			fprintf(stderr,"expected 'yes' or 'no' after standalone and not ");
			return -1;
		}
		if (ch != quotChar) {
			fprintf(stderr,"expected %c after standalone value not ", quotChar);
			return -1;
		}
		ch = more();
	}
	ch = skipS(ch);
	if (ch != '?') {
		fprintf(stderr,"expected ?> as last part of <?xml not ");
		return -1;
	}
	ch = more();
	if (ch != '>') {
		fprintf(stderr,"expected ?> as last part of <?xml not ");
		return -1;
	}
  return true;
}

int parseDocdecl()
{
	//ASSUMPTION: seen <!D
	int bracketLevel = 0;
	char ch = more();
	if (ch != 'O'){
		fprintf(stderr,"expected <!DOCTYPE");
		return -1;
	}
	ch = more();
	if (ch != 'C'){
		fprintf(stderr,"expected <!DOCTYPE");
		return -1;
	}
	ch = more();
	if (ch != 'T'){
		fprintf(stderr,"expected <!DOCTYPE");
		return -1;
	}
	ch = more();
	if (ch != 'Y'){
		fprintf(stderr,"expected <!DOCTYPE");
		return -1;
	}
	ch = more();
	if (ch != 'P'){
		fprintf(stderr,"expected <!DOCTYPE");
		return -1;
	}
	ch = more();
	if (ch != 'E'){
		fprintf(stderr,"expected <!DOCTYPE");
		return -1;
	}
	posStart = pos;
	// do simple scanning for end of doctype
	/* doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('['
	                   (markupdecl | DeclSep)* ']' S?)? '>'      */
	while (true) {
		ch = more();
		if (ch == '[') ++bracketLevel;
		if (ch == ']') --bracketLevel;
		if (ch == '>' && bracketLevel == 0) break;
		if (tokenize)  nomalizeInput();
	}
	posEnd = pos - 1;
  return true;
}

int parseCDSect(int hadCharData)
{	// CDSect ::= CDStart CData CDEnd
	// CDStart ::=  '<![CDATA['
	// CData ::= (Char* - (Char* ']]>' Char*))
	// CDEnd ::= ']]>'
	//ASSUMPTION: seen <![
	int seenBracket = false;
	int seenBracketBracket = false;	
	int cdStart , curLine , curColumn ,curpos;	
	char ch = more();
	if (ch != 'C'){
		fprintf(stderr,"expected <[CDATA[ for comment start");
		return -1;
	}		
	ch = more();
	if (ch != 'D'){
		fprintf(stderr,"expected <[CDATA[ for comment start");
		return -1;
	}		
	ch = more();
	if (ch != 'A'){
		fprintf(stderr,"expected <[CDATA[ for comment start");
		return -1;
	}		
	ch = more();
	if (ch != 'T'){
		fprintf(stderr,"expected <[CDATA[ for comment start");
		return -1;
	}		
	ch = more();
	if (ch != 'A'){
		fprintf(stderr,"expected <[CDATA[ for comment start");
		return -1;
	}		
	ch = more();
	if (ch != '['){
		fprintf(stderr,"expected <[CDATA[ for comment start");
		return -1;
	}		
	cdStart = pos + bufAbsoluteStart;
	curLine = lineNumber;
	curColumn = columnNumber;	
		if (tokenize) {
		if (hadCharData) {
			if (!usePC) {
				// posEnd is correct already!!!
				if (posEnd > posStart) {
					joinPC();
				} else {
					usePC = true;
					pcStart = pcEnd = 0;
				}
			}
		}
	}
	while (true) {
		// scan until it hits "]]>"
		ch = more();
		if (ch == ']') {
			if (!seenBracket) {
				seenBracket = true;
			} else {
				seenBracketBracket = true;
			}
		} else if (ch == '>') {
		if (seenBracketBracket) {
				break;  // found end sequence!
		} else {
				seenBracketBracket = false;
			}
			seenBracket = false;
		} else {
			seenBracket = false;
		}
		curpos=pos;  //save current pos changed!
		if (tokenize) {
			// deal with normalization issues ...
			if (ch == '\r') { pos++;
			             posStart = cdStart - bufAbsoluteStart;}
			nomalizeInput();				
		}
	}
	if (tokenize) {
		if (usePC) {
			pcEnd = pcEnd - 2;
		}
	}
	pos=curpos;
	posStart = cdStart - bufAbsoluteStart;
	posEnd = pos - 3;
  return true;
}

int fillBuf() 
{
	int compact ,expand = false;
	int newSize= 2 * strlen(buf);
  int len, ret, i;
	char * tagName;
	char newBuf[newSize];
	char expectedTagStack[128];
	
	if (reader_ptr == NULL){
		fprintf(stderr,"reader must be set before parsing is started");
		return -1;
	}
	// see if we are in compaction area
	if (bufEnd > bufSoftLimit) {
		// expand buffer it makes sense!
		compact = bufStart > bufSoftLimit;		
		if (preventBufferCompaction) {
			compact = false;
			expand = true;
		} else if (!compact) {
			//freeSpace
			if ((unsigned)bufStart < strlen(buf) / 2) {
				// less then half buffer available for compactin --> expand instead!
				expand = true;
			} else {
				// at least half of buffer can be reclaimed .
				compact = true;
			}
		}
		// if buffer almost full then compact it
		if (compact) {
			//assert bufStart > 0
			arraycopy(buf, bufStart, buf, 0, bufEnd - bufStart);
		} else if (expand) {
			newSize = 2 * strlen(buf);			
			arraycopy(buf, bufStart, newBuf, 0, bufEnd - bufStart);
			buf = newBuf;
			if (bufLoadFactor > 0) {
				bufSoftLimit = (bufLoadFactor * strlen(buf)) / 100;
			}
		} else {
				fprintf(stderr,"internal error in fillBuffer()");
				return -1;			
		}
		bufEnd -= bufStart;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -