📄 xmlpullparseres.c
字号:
} else if (ch == '-') {
parseComment();
if (tokenize) return eventType = COMMENT;
} else {
fprintf(stderr, "unexpected markup <!%s", printable(ch));
break;
} break;
}
case '/': {
fprintf(stderr, "end tag not allowed in epilog but got %s", printable(ch));
break;
} break;
default:
if (isNameStartChar(ch)) {
fprintf(stderr, "start tag not allowed in epilog but got %s", printable(ch));
break;
} else {
fprintf(stderr, "in epilog expected ignorable content and not %s", printable(ch));
break;
} break;
}
} else if (isS(ch)) {
gotS = true;
if (tokenize) nomalizeInput();
} else {
fprintf(stderr,"in epilog non whitespace content is not allowed but got %s\n", printable(ch));
break;
}
ch = more();
}
reachedEnd = true;
if (tokenize && gotS) {
posEnd = pos; //LAST available character pos
return eventType = IGNORABLE_WHITESPACE;
}
return eventType = END_DOCUMENT;
}
int parseStartTag()
{
//ASSUMPTION ch is past <T
// STag ::= '<' Name (S Attribute)* S? '>'
// EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
int nameStart = pos - 1 + bufAbsoluteStart;
int colonPos = -1;
char ch = buf[pos - 1];
int elLen,i,j;
char* name = NULL;
char* prefix = NULL;
char* uri=NULL;
char* attrPrefix=NULL;
char* attrUri=NULL;
const char empty[]="";
char *elrawname;
++level;
posStart = pos - 2;
emptyElementTag = false;
attributeCount = 0;
// retrieve name
if (ch == ':' && Namespacesprocess){
fprintf(stderr,"colon can not be at element name start");
return -1;
}
while (true) {
ch = more();
if (!isNameChar(ch)) break;
if (ch == ':' && Namespacesprocess) {
if (colonPos != -1){
fprintf(stderr, "only one colon is allowed in name of element");
return -1;
}
colonPos = pos - 1 + bufAbsoluteStart;
}
}
ensureElementsCapacity();
elLen = (pos - 1) - (nameStart - bufAbsoluteStart);
if (elRawName[level] == NULL ) {
elrawname=(char*)malloc(elLen);
stringcopy(buf,(nameStart - bufAbsoluteStart),elrawname,0,elLen);
elRawName[level]=elrawname;
// printf("\nelRawName2[%d]=%s",level,elRawName2[level]);
}
// elRawNameEnd[level]=(int*)malloc(sizeof(int*));
elRawNameEnd[level] = elLen;
// printf("\n***elRawNameEnd[%d]=%d",level,elRawNameEnd[level]);
elRawNameLine[level] = lineNumber;
if (Namespacesprocess) {
if (colonPos != -1) {
prefix=(char*)malloc(colonPos - nameStart);
stringcopy(buf, (nameStart - bufAbsoluteStart),prefix,0,(colonPos - nameStart));
//elPrefix[level]=prefix;
elPrefix[level]=(char*)malloc(colonPos - nameStart);
strcpy(elPrefix[level],prefix);
name=(char*)malloc(pos - 2 - (colonPos - bufAbsoluteStart));
stringcopy(buf, colonPos + 1 - bufAbsoluteStart,name,0,pos - 2 - (colonPos - bufAbsoluteStart));
elName[level]=(char*)malloc(sizeof(char)*strlen(name));
strcpy(elName[level] ,name);
//elName[level] = name;
} else {
prefix =namespacePrefix[level]= NULL;
elPrefix[level]=prefix;
name=(char*)malloc(sizeof(char)*elLen);
stringcopy(buf,nameStart - bufAbsoluteStart,name,0,elLen);
elName[level]=(char*)malloc(sizeof(char)*strlen(name));
strcpy(elName[level] ,name);
}
} else {
name=(char*)malloc(sizeof(char)*elLen);
stringcopy(buf,nameStart - bufAbsoluteStart,name,0,elLen);
elName[level]=(char*)malloc(sizeof(char)*strlen(name));
strcpy(elName[level] ,name);
}
while (true) {
while (isS(ch)) {
ch = more();
} // skip additional white spaces
if (ch == '>') {
break;
} else if (ch == '/') {
if (emptyElementTag){
fprintf(stderr,"repeated / in tag declaration");
return -1;
}
emptyElementTag = true;
ch = more();
if (ch != '>')
{
fprintf(stderr,"expected > to end empty tag not %s", printable(ch));
return -1;
}
break;
} else if (isNameStartChar(ch)) {
ch = parseAttribute();
ch = more();
continue;
} else {
fprintf(stderr, "start tag unexpected character %s", printable(ch));
return -1;
}
}
/*when namespaces were declared we resolve them*/
if (Namespacesprocess) {
uri = getNamespace_from_pre(prefix);
if (*uri == NULL) {
if (*prefix == NULL) { // no prefix and no uri => use default namespace
uri = NO_NAMESPACE;
} else {
fprintf(stderr,"could not determine namespace bound to element prefix %s", prefix);
return -1;
}
}
elUri[level]=(char*)malloc(sizeof(char)*strlen(uri));
strcpy(elUri[level],uri);
//elUri[level] =uri;
// printf("\nelUri[%d]=%s",level,elUri[level]);
/* resolve attribute namespaces*/
for (i = 0; i< attributeCount; i++) {
attrPrefix = attributePrefix[i];
if (*attrPrefix != NULL) {
attrUri = getNamespace_from_pre(attrPrefix);
if (*attrUri == NULL) {
fprintf(stderr,"could not determine namespace for %s", attrPrefix);
return -1;
}
attributeUri[i] = (char*)attrUri;
} else {
attributeUri[i] = (char*)NO_NAMESPACE;
}
}
// check namespaced attribute uniqueness contraint!
for (i = 1; i < attributeCount; i++) {
for (j = 0; j < i; j++) {
if ((strcmp(attributeUri[j],attributeUri[i])==0)&&(strcmp(attributeName
[j],attributeName[i])==0)) {
fprintf(stderr, "duplicated attributes %s:%s and %s:%s",
(attributeUri[j])?attributeUri[j]:empty, attributeName[j],
(attributeUri[i])?attributeUri[i]:empty, attributeName[i]);
return -1;
}
}
} } else { // ! Namespacesprocess
// check raw attribute uniqueness contraint!
for (i = 1; i < attributeCount; i++) {
for (j = 0; j < i; j++) {
if (strcmp(attributeName[j],attributeName[i])==0) {
fprintf(stderr,"duplicated attributes %s and %s", attributeName[j],
attributeName[i]);
return -1;
}
}
}
}
elNamespaceCount[level] = namespaceEnd;
posEnd = pos;
return eventType = START_TAG;
}
int parseEndTag()
{
//ASSUMPTION ch is past "</"
//ETag ::= '</' Name S? '>'
int nameStart, off, len, i;
char* startname ;
char* endname;
char* cbuf;
char ch ;
ch= more();
if (!isNameStartChar(ch))
{
fprintf(stderr,"expected name start and not %s", printable(ch));
return -1;
}
posStart = pos - 3;
nameStart = pos - 1 + bufAbsoluteStart;
do {
ch = more();
} while (isNameChar(ch));
// check that end tag name is the same as start tag
off = nameStart - bufAbsoluteStart;
len = (pos - 1) - off;
cbuf = elRawName[level];
//printf("\nelRawName2[%d]=%s",level,elRawName2[level]);
if (elRawNameEnd[level] != len) {
startname=(char*)malloc(elRawNameEnd[level]);
stringcopy(cbuf, 0,startname,0,elRawNameEnd[level]);
//printf("\nelRawNameEnd[%d]=%d",level,elRawNameEnd[level]);
printf("\nstartname=%s",startname);
printf("\nelRawName[%d]=%s",level,elRawName[level]);
endname=(char*)malloc(len);
stringcopy(buf, off,endname,0,len);
fprintf(stderr,"end tag name </%s> must match start tag name <%s> from line%d", endname, startname, elRawNameLine[level]);
return -1;
}
for (i = 0; i < len; i++) {
if (buf[off++] != cbuf[i]) {
startname=(char*)malloc(len);
stringcopy(cbuf, 0,startname,0,len);
// printf("\ncbuf=%s",cbuf);
// printf("\nlen=%d",len);
endname=(char*)malloc(len);
stringcopy(buf, off- i - 1,endname,0,len);
fprintf(stderr,"end tag name </%s> must match start tag name <%s> from line %d", endname,
startname, elRawNameLine[level]);
return -1;
}
}
while (isS(ch)) {
ch = more();
} // skip additional white spaces
if (ch != '>') {
fprintf(stderr,"expected > to finsh end tag not %s from line %d ", printable(ch), elRawNameLine[level]);
return -1;
}
posEnd = pos;
pastEndTag = true;
// *elRawName2[level]=NULL;
return eventType = END_TAG;
}
char parseAttribute()
{
// parse attribute
// Attribute ::= Name Eq AttValue
int startsWithXmlns ;
int xmlnsPos=0;
int colonPos = -1;
char* resolvedEntity= NULL;
char* ns = NULL;
char* name = NULL;
char* prefix = NULL;
char delimit;
int normalizedCR=false;
char *tempStr="default";
int prefixLen,nameLen,i, startNs;
int prevPosStart = posStart + bufAbsoluteStart;
int nameStart = pos - 1 + bufAbsoluteStart;
char ch = buf[pos - 1];
if (ch == ':' && Namespacesprocess){
fprintf(stderr, "colon can not be at attribute name start");
return -1;
}
startsWithXmlns = Namespacesprocess && ch == 'x';
ch = more();
while (isNameChar(ch)) {
if (Namespacesprocess) {
if (startsWithXmlns && xmlnsPos < 5) {
++xmlnsPos;
if (xmlnsPos == 1) {
if (ch != 'm') startsWithXmlns = false;
} else if (xmlnsPos == 2) {
if (ch != 'l') startsWithXmlns = false;
} else if (xmlnsPos == 3) {
if (ch != 'n') startsWithXmlns = false;
} else if (xmlnsPos == 4) {
if (ch != 's') startsWithXmlns = false;
} else if (xmlnsPos == 5) {
if (ch != ':'){
fprintf(stderr,"after xmlns in attribute name must be colon ");
return -1;
}
}
}
if (ch == ':') {
if (colonPos != -1){
fprintf(stderr,"only one colon is allowed in attribute name ");
return -1;
}
colonPos = pos - 1 + bufAbsoluteStart;
}
}
ch = more();
}
ensureAttributesCapacity(attributeCount);
// start processing attributes
// work on prefixes and namespace URI
if (Namespacesprocess) {
if (xmlnsPos < 4) startsWithXmlns = false;
if (startsWithXmlns) {
if (colonPos != -1) {
nameLen = pos - 2 - (colonPos - bufAbsoluteStart);
if (nameLen == 0) {
fprintf(stderr,"namespace prefix is required after xmlns:");
return -1;
}
name=(char*)malloc(nameLen);
stringcopy(buf, colonPos - bufAbsoluteStart + 1,name,0,nameLen);
}
} else {
if (colonPos != -1) {
prefixLen = colonPos - nameStart;
prefix=(char*)malloc(prefixLen);
stringcopy(buf, nameStart - bufAbsoluteStart,prefix,0,prefixLen);
attributePrefix[attributeCount]=(char*)malloc(strlen(prefix));
strcpy(attributePrefix[attributeCount] ,prefix);
nameLen = pos - 2 - (colonPos - bufAbsoluteStart);
name=(char*)malloc(nameLen);
stringcopy(buf, colonPos - bufAbsoluteStart + 1,name,0,nameLen);
attributeName[attributeCount]=(char*)malloc(strlen(name));
strcpy(attributeName[attributeCount],name);
} else {
prefix = attributePrefix[attributeCount] = NULL;
name=(char*)malloc(pos - 1 - (nameStart - bufAbsoluteStart));
stringcopy(buf, nameStart - bufAbsoluteStart,name,0, pos - 1 - (nameStart - bufAbsoluteStart));
attributeName[attributeCount]=(char*)malloc(strlen(name));
strcpy(attributeName[attributeCount],name);
// attributeName[attributeCount] =name;
// printf("\nattributeName[%d]=%s",attributeCount,attributeName[attributeCount]);
}
}
} else { // retrieve name
name=(char*)malloc(pos - 1 - (nameStart - bufAbsoluteStart));
stringcopy(buf, nameStart - bufAbsoluteStart,name,0, pos - 1 - (nameStart - bufAbsoluteStart));
attributeName[attributeCount]=(char*)malloc(strlen(name));
strcpy(attributeName[attributeCount],name);
//attributeName[attributeCount] =name;
}
//Eq ::= S? '=' S?
while (isS(ch)) {
ch = more();
} // skip additional spaces
if (ch != '='){
fprintf(stderr,"expected = after attribute name");
return -1;
}
ch = more();
while (isS(ch)) {
ch = more();
} // skip additional spaces
/* AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] |
Reference)* "'" */
delimit = ch;
if (delimit != '"' && delimit != '\'')
{
fprintf(stderr, "attribute value must start with quotation or apostrophe not %s", printable(delimit));
return -1;
}
// parse until delimit or < and resolve Reference
// Reference ::= EntityRef | CharRef
usePC = false;
pcStart = pcEnd;
posStart = pos;
while (true) {
ch = more();
if (ch == delimit) {
break;
}
if (ch == '<') {
fprintf(stderr,"markup not allowed inside attribute value - illegal < ");
return -1;
}
if (ch == '&') {// extractEntityRef
posEnd = pos - 1;
if (!usePC) {
if (posEnd > posStart) {
joinPC();
} else {
usePC = true;
pcStart = pcEnd = 0;
}
}//assert usePC == true;
resolvedEntity = parseEntityRef();
// check if replacement text can be resolved !
if (resolvedEntity == NULL) {
if (entityRefName == NULL) {
entityRefName=(char*)malloc(posEnd - posStart);
stringcopy(buf, posStart,entityRefName,0, posEnd - posStart);
}
fprintf(stderr, "could not resolve entity named '%s'", entityRefName );
return -1;
}
/* write into PC replacement text - do merge for replacement text!*/
for (i = 0; i < strlen(resolvedEntity); i++) {
if ((unsigned) pcEnd >= strlen(pc)) ensurePC(pcEnd);
pc[pcEnd++] = resolvedEntity[i];
}
} else if (ch == '\t' || ch == '\n' || ch == '\r') {
// do attribute value normalization
// as described in http://www.w3.org/TR/REC-xml#AVNormalize
// handle EOL normalization ...
if (!usePC) {
posEnd = pos - 1;
if (posEnd > posStart) {
joinPC();
} else {
usePC = true;
pcEnd = pcStart = 0;
}
}
if ((unsigned) pcEnd >= strlen(pc)) ensurePC(pcEnd);
if (ch != '\n' || !normalizedCR) {
pc[pcEnd++] = ' '; //'\n';
}
} else {
if (usePC) {
if ((unsigned)pcEnd >= strlen(pc)) ensurePC(pcEnd);//Note strlen(pc)
pc[pcEnd++] = ch;
}
}
normalizedCR = ch == '\r';
}
if (Namespacesprocess && startsWithXmlns) {
if (!usePC) {
ns=(char*)malloc(pos - 1 - posStart);
stringcopy(buf, posStart,ns,0, pos - 1 - posStart);
} else {
ns=(char*)malloc(pos - 1 - posStart);
stringcopy(pc, pcStart,ns,0, pcEnd - pcStart);
}
ensureNamespacesCapacity(namespaceEnd);
if (colonPos != -1) {
if (strlen(ns) == 0) {
fprintf(stderr,"non-default namespace can not be declared to be empty string");
return -1;
}
// declare new namespace
namespacePrefix[namespaceEnd]=(char*)malloc(sizeof(char)*strlen(name));
strcpy(namespacePrefix[namespaceEnd] ,name);
//namespacePrefix[namespaceEnd] = name;
} else {
// declare new default namespace ...
namespacePrefix[namespaceEnd] = NULL;
}
// printf("\nns=%s",ns);
namespaceUri[namespaceEnd]=(char*)malloc(strlen(ns));
strcpy(namespaceUri[namespaceEnd] ,ns);
// printf("namespaceUri[%d]=%s",namespaceEnd,namespaceUri[namespaceEnd]);
//namespaceUri[namespaceEnd] = ns;
// detect duplicate namespace declarations!!!
startNs = elNamespaceCount[level-1];
for (i = namespaceEnd-1; i >= startNs; --i) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -