📄 ezxml.c
字号:
/* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ s = strchr(v, q); if(s) *(s++) = '\0'; /* null terminate value */ ent[i + 1] = ezxml_decode(v, pe, '%'); /* set value */ ent[i + 2] = NULL; /* null terminate entity list */ if(!ezxml_ent_ok(n, ent[i + 1], ent)) { /* circular reference */ if(ent[i + 1] != v) free(ent[i + 1]); ezxml_err(root, v, "circular entity declaration &%s", n); break; } else ent[i] = n; /* set entity name */ } else if(!strncmp(s, "<!ATTLIST", 9)) { /* parse default attributes */ t = s + strspn(s + 9, EZXML_WS) + 9; /* skip whitespace separator */ if(!*t) { ezxml_err(root, t, "unclosed <!ATTLIST"); break; } if(*(s = t + strcspn(t, EZXML_WS ">")) == '>') continue; else *s = '\0'; /* null terminate tag name */ for(i = 0; root->attr[i] && strcmp(n, root->attr[i][0]); i++); while(*(n = ++s + strspn(s, EZXML_WS)) && *n != '>') { if(*(s = n + strcspn(n, EZXML_WS))) *s = '\0'; /* attr name */ else { ezxml_err(root, t, "malformed <!ATTLIST"); break; } s += strspn(s + 1, EZXML_WS) + 1; /* find next token */ c = (strncmp(s, "CDATA", 5)) ? "*" : " "; /* is it cdata? */ if(!strncmp(s, "NOTATION", 8)) s += strspn(s + 8, EZXML_WS) + 8; s = (*s == '(') ? strchr(s, ')') : s + strcspn(s, EZXML_WS); if(!s) { ezxml_err(root, t, "malformed <!ATTLIST"); break; } s += strspn(s, EZXML_WS ")"); /* skip white space separator */ if(!strncmp(s, "#FIXED", 6)) s += strspn(s + 6, EZXML_WS) + 6; if(*s == '#') { /* no default value */ s += strcspn(s, EZXML_WS ">") - 1; if(*c == ' ') continue; /* cdata is default, nothing to do */ v = NULL; } else { /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ s = strchr(v = s + 1, *s); if((*s == '"' || *s == '\'') && /* default value */ s) *s = '\0'; else { ezxml_err(root, t, "malformed <!ATTLIST"); break; } } if(!root->attr[i]) { /* new tag name */ root->attr = (!i) ? malloc(2 * sizeof(char **)) : realloc(root->attr, (i + 2) * sizeof(char **)); root->attr[i] = malloc(2 * sizeof(char *)); root->attr[i][0] = t; /* set tag name */ root->attr[i][1] = (char *)(root->attr[i + 1] = NULL); } for(j = 1; root->attr[i][j]; j += 3); /* find end of list */ root->attr[i] = realloc(root->attr[i], (j + 4) * sizeof(char *)); root->attr[i][j + 3] = NULL; /* null terminate list */ root->attr[i][j + 2] = c; /* is it cdata? */ root->attr[i][j + 1] = (v) ? ezxml_decode(v, root->ent, *c) : NULL; root->attr[i][j] = n; /* attribute name */ } } else if(!strncmp(s, "<!--", 4)) s = strstr(s + 4, "-->"); /* comments */ else if(!strncmp(s, "<?", 2)) { /* processing instructions */ /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ s = strstr(c = s + 2, "?>"); if(s) ezxml_proc_inst(root, c, s++ - c); } else if(*s == '<') s = strchr(s, '>'); /* skip other declarations */ else if(*(s++) == '%' && !root->standalone) break; } free(pe); return !*root->err;}/* Converts a UTF-16 string to UTF-8. Returns a new string that must be freed *//* or NULL if no conversion was needed. */char *ezxml_str2utf8(char **s, size_t * len){ char *u; size_t l = 0, sl, max = *len; long c, d; int b, be = (**s == '\xFE') ? 1 : (**s == '\xFF') ? 0 : -1; if(be == -1) return NULL; /* not UTF-16 */ u = malloc(max); for(sl = 2; sl < *len - 1; sl += 2) { c = (be) ? (((*s)[sl] & 0xFF) << 8) | ((*s)[sl + 1] & 0xFF) /*UTF-16BE */ : (((*s)[sl + 1] & 0xFF) << 8) | ((*s)[sl] & 0xFF); /*UTF-16LE */ if(c >= 0xD800 && c <= 0xDFFF && (sl += 2) < *len - 1) { /* high-half */ d = (be) ? (((*s)[sl] & 0xFF) << 8) | ((*s)[sl + 1] & 0xFF) : (((*s)[sl + 1] & 0xFF) << 8) | ((*s)[sl] & 0xFF); c = (((c & 0x3FF) << 10) | (d & 0x3FF)) + 0x10000; } while(l + 6 > max) u = realloc(u, max += EZXML_BUFSIZE); if(c < 0x80) u[l++] = c; /* US-ASCII subset */ else { /* multi-byte UTF-8 sequence */ for(b = 0, d = c; d; d /= 2) b++; /* bits in c */ b = (b - 2) / 5; /* bytes in payload */ u[l++] = (0xFF << (7 - b)) | (c >> (6 * b)); /* head */ while(b) u[l++] = 0x80 | ((c >> (6 * --b)) & 0x3F); /* payload */ } } return *s = realloc(u, *len = l);}/* frees a tag attribute list */voidezxml_free_attr(char **attr){ int i = 0; char *m; if(!attr || attr == EZXML_NIL) return; /* nothing to free */ while(attr[i]) i += 2; /* find end of attribute list */ m = attr[i + 1]; /* list of which names and values are malloced */ for(i = 0; m[i]; i++) { if(m[i] & EZXML_NAMEM) free(attr[i * 2]); if(m[i] & EZXML_TXTM) free(attr[(i * 2) + 1]); } free(m); free(attr);}/* parse the given xml string and return an ezxml structure */ezxml_tezxml_parse_str(char *s, size_t len){ ezxml_root_t root = (ezxml_root_t) ezxml_new(NULL); char q, e, *d, **attr, **a = NULL; /* initialize a to avoid compile warning */ int l, i, j; root->m = s; if(!len) return ezxml_err(root, NULL, "root tag missing"); root->u = ezxml_str2utf8(&s, &len); /* convert utf-16 to utf-8 */ root->e = (root->s = s) + len; /* record start and end of work area */ e = s[len - 1]; /* save end char */ s[len - 1] = '\0'; /* turn end char into null terminator */ while(*s && *s != '<') s++; /* find first tag */ if(!*s) return ezxml_err(root, s, "root tag missing"); for(;;) { attr = (char **)EZXML_NIL; d = ++s; if(isalpha(*s) || *s == '_' || *s == ':' || *s < '\0') { /* new tag */ if(!root->cur) return ezxml_err(root, d, "markup outside of root element"); s += strcspn(s, EZXML_WS "/>"); while(isspace(*s)) *(s++) = '\0'; /* null terminate tag name */ if(*s && *s != '/' && *s != '>') { /* find tag in default attr list */ /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ a = root->attr[0]; for(i = 0; a && strcmp(a[0], d); i++) { a = root->attr[i]; } } for(l = 0; *s && *s != '/' && *s != '>'; l += 2) { /* new attrib */ attr = (l) ? realloc(attr, (l + 4) * sizeof(char *)) : malloc(4 * sizeof(char *)); /* allocate space */ attr[l + 3] = (l) ? realloc(attr[l + 1], (l / 2) + 2) : malloc(2); /* mem for list of maloced vals */ strcpy(attr[l + 3] + (l / 2), " "); /* value is not malloced */ attr[l + 2] = NULL; /* null terminate list */ attr[l + 1] = ""; /* temporary attribute value */ attr[l] = s; /* set attribute name */ s += strcspn(s, EZXML_WS "=/>"); if(*s == '=' || isspace(*s)) { *(s++) = '\0'; /* null terminate tag attribute name */ q = *(s += strspn(s, EZXML_WS "=")); if(q == '"' || q == '\'') { /* attribute value */ attr[l + 1] = ++s; while(*s && *s != q) s++; if(*s) *(s++) = '\0'; /* null terminate attribute val */ else { ezxml_free_attr(attr); return ezxml_err(root, d, "missing %c", q); } for(j = 1; a && a[j] && strcmp(a[j], attr[l]); j += 3); attr[l + 1] = ezxml_decode(attr[l + 1], root->ent, (a && a[j]) ? *a[j + 2] : ' '); if(attr[l + 1] < d || attr[l + 1] > s) attr[l + 3][l / 2] = EZXML_TXTM; /* value malloced */ } } while(isspace(*s)) s++; } if(*s == '/') { /* self closing tag */ *(s++) = '\0'; if((*s && *s != '>') || (!*s && e != '>')) { if(l) ezxml_free_attr(attr); return ezxml_err(root, d, "missing >"); } ezxml_open_tag(root, d, attr); ezxml_close_tag(root, d, s); } else if((q = *s) == '>' || (!*s && e == '>')) { /* open tag */ *s = '\0'; /* temporarily null terminate tag name */ ezxml_open_tag(root, d, attr); *s = q; } else { if(l) ezxml_free_attr(attr); return ezxml_err(root, d, "missing >"); } } else if(*s == '/') { /* close tag */ s += strcspn(d = s + 1, EZXML_WS ">") + 1; /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ q = *s; if(!q && e != '>') return ezxml_err(root, d, "missing >"); *s = '\0'; /* temporarily null terminate tag name */ if(ezxml_close_tag(root, d, s)) return &root->xml; if(isspace(*s = q)) s += strspn(s, EZXML_WS); } else if(!strncmp(s, "!--", 3)) { /* xml comment */ s = strstr(s + 3, "--"); if(!s || (*(s += 2) != '>' && *s) || (!*s && e != '>')) return ezxml_err(root, d, "unclosed <!--"); } else if(!strncmp(s, "![CDATA[", 8)) { /* cdata */ /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ s = strstr(s, "]]>"); if(s) ezxml_char_content(root, d + 8, (s += 2) - d - 10, 'c'); else return ezxml_err(root, d, "unclosed <![CDATA["); } else if(!strncmp(s, "!DOCTYPE", 8)) { /* dtd */ for(l = 0; *s && ((!l && *s != '>') || (l && (*s != ']' || *(s + strspn(s + 1, EZXML_WS) + 1) != '>'))); l = (*s == '[') ? 1 : l) s += strcspn(s + 1, "[]>") + 1; if(!*s && e != '>') return ezxml_err(root, d, "unclosed <!DOCTYPE"); d = (l) ? strchr(d, '[') + 1 : d; if(l && !ezxml_internal_dtd(root, d, s++ - d)) return &root->xml; } else if(*s == '?') { /* <?...?> processing instructions */ do { s = strchr(s, '?'); } while(s && *(++s) && *s != '>'); if(!s || (!*s && e != '>')) return ezxml_err(root, d, "unclosed <?"); else ezxml_proc_inst(root, d + 1, s - d - 2); } else return ezxml_err(root, d, "unexpected <"); if(!s || !*s) break; *s = '\0'; d = ++s; if(*s && *s != '<') { /* tag character content */ while(*s && *s != '<') s++; if(*s) ezxml_char_content(root, d, s - d, '&'); else break; } else if(!*s) break; } if(!root->cur) return &root->xml; else if(!root->cur->name) return ezxml_err(root, d, "root tag missing"); else return ezxml_err(root, d, "unclosed tag <%s>", root->cur->name);}/* Wrapper for ezxml_parse_str() that accepts a file stream. Reads the entire *//* stream into memory and then parses it. For xml files, use ezxml_parse_file() *//* or ezxml_parse_fd() */ezxml_tezxml_parse_fp(FILE * fp){ ezxml_root_t root; size_t l, len = 0; char *s; /* Jason Luu, Aug 29, 2007. Removed assignment in conditional statement */ s = malloc(EZXML_BUFSIZE); if(!s) return NULL; do { len += (l = fread((s + len), 1, EZXML_BUFSIZE, fp)); if(l == EZXML_BUFSIZE) s = realloc(s, len + EZXML_BUFSIZE); } while(s && l == EZXML_BUFSIZE); if(!s) return NULL; root = (ezxml_root_t) ezxml_parse_str(s, len); /* Ted Campbell, Aug 14, 2007. Added explicit cast. */ root->len = (size_t) (-1); /* so we know to free s in ezxml_free() */ return &root->xml;}/* A wrapper for ezxml_parse_str() that accepts a file descriptor. First *//* attempts to mem map the file. Failing that, reads the file into memory. *//* Returns NULL on failure. */ezxml_tezxml_parse_fd(int fd){ ezxml_root_t root; struct stat st; size_t l; void *m; if(fd < 0) return NULL; fstat(fd, &st);#ifndef EZXML_NOMMAP l = (st.st_size + sysconf(_SC_PAGESIZE) - 1) & ~(sysconf(_SC_PAGESIZE) - 1); if((m = mmap(NULL, l, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0)) != MAP_FAILED) { madvise(m, l, MADV_SEQUENTIAL); /* optimize for sequential access */ root = (ezxml_root_t) ezxml_parse_str(m, st.st_size); madvise(m, root->len = l, MADV_NORMAL); /* put it back to normal */ } else { /* mmap failed, read file into memory */#endif /* EZXML_NOMMAP */ l = read(fd, m = malloc(st.st_size), st.st_size); root = (ezxml_root_t) ezxml_parse_str(m, l); /* Ted Campbell, Aug 14, 2007. Added explicit cast. */ root->len = (size_t) (-1); /* so we know to free s in ezxml_free() */#ifndef EZXML_NOMMAP }#endif /* EZXML_NOMMAP */ return &root->xml;}/* a wrapper for ezxml_parse_fd that accepts a file name */ezxml_tezxml_parse_file(const char *file){ int fd = open(file, O_RDONLY, 0); ezxml_t xml = ezxml_parse_fd(fd); if(fd >= 0) close(fd); return xml;}/* Encodes ampersand sequences appending the results to *dst, reallocating *dst *//* if length excedes max. a is non-zero for attribute encoding. Returns *dst */char *ezxml_ampencode(const char *s, size_t len, char **dst, size_t * dlen, size_t * max, short a){ const char *e;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -