📄 parser.c
字号:
if ((cur[2] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xf0) == 0xf0) { if (cur[3] == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) goto encoding_error; /* 4-byte code */ ctxt->input->cur += 4; val = (cur[0] & 0x7) << 18; val |= (cur[1] & 0x3f) << 12; val |= (cur[2] & 0x3f) << 6; val |= cur[3] & 0x3f; } else { /* 3-byte code */ ctxt->input->cur += 3; val = (cur[0] & 0xf) << 12; val |= (cur[1] & 0x3f) << 6; val |= cur[2] & 0x3f; } if (((val > 0xd7ff) && (val < 0xe000)) || ((val > 0xfffd) && (val < 0x10000)) || (val >= 0x110000)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Char out of allowed range\n"); ctxt->errNo = XML_ERR_INVALID_ENCODING; ctxt->wellFormed = 0; ctxt->disableSAX = 1; } } else /* 2-byte code */ ctxt->input->cur += 2; } else /* 1-byte code */ ctxt->input->cur++; } else { /* * Assume it's a fixed lenght encoding (1) with * a compatibke encoding for the ASCII set, since * XML constructs only use < 128 chars */ ctxt->input->cur++; } ctxt->nbChars++; if (*ctxt->input->cur == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); } } if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); if ((*ctxt->input->cur == 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) xmlPopInput(ctxt); return;encoding_error: /* * If we detect an UTF8 error that probably mean that the * input encoding didn't get properly advertized in the * declaration header. Report the error and switch the encoding * to ISO-Latin-1 (if you don't like this policy, just declare the * encoding !) */ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Input is not proper UTF-8, indicate encoding !\n"); ctxt->errNo = XML_ERR_INVALID_ENCODING; ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); ctxt->input->cur++; return;}/** * xmlCurrentChar: * @ctxt: the XML parser context * @len: pointer to the length of the char read * * The current char value, if using UTF-8 this may actaully span multiple * bytes in the input buffer. Implement the end of line normalization: * 2.11 End-of-Line Handling * Wherever an external parsed entity or the literal entity value * of an internal parsed entity contains either the literal two-character * sequence "#xD#xA" or a standalone literal #xD, an XML processor * must pass to the application the single character #xA. * This behavior can conveniently be produced by normalizing all * line breaks to #xA on input, before parsing.) * * Returns the current char value and its lenght */intxmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { if (ctxt->token != 0) { *len = 0; return(ctxt->token); } if (ctxt->encoding == NULL) { /* * We are supposed to handle UTF8, check it's valid * From rfc2044: encoding of the Unicode values on UTF-8: * * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * * Check for the 0x110000 limit too */ const unsigned char *cur = ctxt->input->cur; unsigned char c; unsigned int val; c = *cur; if (c & 0x80) { if (cur[1] == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); if ((cur[1] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xe0) == 0xe0) { if (cur[2] == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); if ((cur[2] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xf0) == 0xf0) { if (cur[3] == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) goto encoding_error; /* 4-byte code */ *len = 4; val = (cur[0] & 0x7) << 18; val |= (cur[1] & 0x3f) << 12; val |= (cur[2] & 0x3f) << 6; val |= cur[3] & 0x3f; } else { /* 3-byte code */ *len = 3; val = (cur[0] & 0xf) << 12; val |= (cur[1] & 0x3f) << 6; val |= cur[2] & 0x3f; } } else { /* 2-byte code */ *len = 2; val = (cur[0] & 0x1f) << 6; val |= cur[1] & 0x3f; } if (!IS_CHAR(val)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Char out of allowed range\n"); ctxt->errNo = XML_ERR_INVALID_ENCODING; ctxt->wellFormed = 0; ctxt->disableSAX = 1; } return(val); } else { /* 1-byte code */ *len = 1; if (*ctxt->input->cur == 0xD) { if (ctxt->input->cur[1] == 0xA) { ctxt->nbChars++; ctxt->input->cur++; } return(0xA); } return((int) *ctxt->input->cur); } } /* * Assume it's a fixed lenght encoding (1) with * a compatibke encoding for the ASCII set, since * XML constructs only use < 128 chars */ *len = 1; if (*ctxt->input->cur == 0xD) { if (ctxt->input->cur[1] == 0xA) { ctxt->nbChars++; ctxt->input->cur++; } return(0xA); } return((int) *ctxt->input->cur);encoding_error: /* * If we detect an UTF8 error that probably mean that the * input encoding didn't get properly advertized in the * declaration header. Report the error and switch the encoding * to ISO-Latin-1 (if you don't like this policy, just declare the * encoding !) */ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Input is not proper UTF-8, indicate encoding !\n"); ctxt->errNo = XML_ERR_INVALID_ENCODING; ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); *len = 1; return((int) *ctxt->input->cur);}/** * xmlStringCurrentChar: * @ctxt: the XML parser context * @cur: pointer to the beginning of the char * @len: pointer to the length of the char read * * The current char value, if using UTF-8 this may actaully span multiple * bytes in the input buffer. * * Returns the current char value and its lenght */intxmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) { if (ctxt->encoding == NULL) { /* * We are supposed to handle UTF8, check it's valid * From rfc2044: encoding of the Unicode values on UTF-8: * * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * * Check for the 0x110000 limit too */ unsigned char c; unsigned int val; c = *cur; if (c & 0x80) { if ((cur[1] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xe0) == 0xe0) { if ((cur[2] & 0xc0) != 0x80) goto encoding_error; if ((c & 0xf0) == 0xf0) { if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) goto encoding_error; /* 4-byte code */ *len = 4; val = (cur[0] & 0x7) << 18; val |= (cur[1] & 0x3f) << 12; val |= (cur[2] & 0x3f) << 6; val |= cur[3] & 0x3f; } else { /* 3-byte code */ *len = 3; val = (cur[0] & 0xf) << 12; val |= (cur[1] & 0x3f) << 6; val |= cur[2] & 0x3f; } } else { /* 2-byte code */ *len = 2; val = (cur[0] & 0x1f) << 6; val |= cur[2] & 0x3f; } if (!IS_CHAR(val)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Char out of allowed range\n"); ctxt->errNo = XML_ERR_INVALID_ENCODING; ctxt->wellFormed = 0; ctxt->disableSAX = 1; } return(val); } else { /* 1-byte code */ *len = 1; return((int) *cur); } } /* * Assume it's a fixed lenght encoding (1) with * a compatibke encoding for the ASCII set, since * XML constructs only use < 128 chars */ *len = 1; return((int) *cur);encoding_error: /* * If we detect an UTF8 error that probably mean that the * input encoding didn't get properly advertized in the * declaration header. Report the error and switch the encoding * to ISO-Latin-1 (if you don't like this policy, just declare the * encoding !) */ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Input is not proper UTF-8, indicate encoding !\n"); ctxt->errNo = XML_ERR_INVALID_ENCODING; *len = 1; return((int) *cur);}/** * xmlCopyChar: * @len: pointer to the length of the char read (or zero) * @array: pointer to an arry of xmlChar * @val: the char value * * append the char value in the array * * Returns the number of xmlChar written */intxmlCopyChar(int len, xmlChar *out, int val) { /* * We are supposed to handle UTF8, check it's valid * From rfc2044: encoding of the Unicode values on UTF-8: * * UCS-4 range (hex.) UTF-8 octet sequence (binary) * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx */ if (len == 0) { if (val < 0) len = 0; else if (val < 0x80) len = 1; else if (val < 0x800) len = 2; else if (val < 0x10000) len = 3; else if (val < 0x110000) len = 4; if (len == 0) { fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n", val); return(0); } } if (len > 1) { int bits; if (val < 0x80) { *out++= val; bits= -6; } else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; } else { *out++= (val >> 18) | 0xF0; bits= 12; } for ( ; bits >= 0; bits-= 6) *out++= ((val >> bits) & 0x3F) | 0x80 ; return(len); } *out = (xmlChar) val; return(1);}/** * xmlSkipBlankChars: * @ctxt: the XML parser context * * skip all blanks character found at that point in the input streams. * It pops up finished entities in the process if allowable at that point. * * Returns the number of space chars skipped */intxmlSkipBlankChars(xmlParserCtxtPtr ctxt) { int cur, res = 0; do { cur = CUR; while (IS_BLANK(cur)) { NEXT; cur = CUR; res++; } while ((cur == 0) && (ctxt->inputNr > 1) && (ctxt->instate != XML_PARSER_COMMENT)) { xmlPopInput(ctxt); cur = CUR; } if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); } while (IS_BLANK(cur)); return(res);}/************************************************************************ * * * Commodity functions to handle entities processing * * * ************************************************************************//** * xmlPopInput: * @ctxt: an XML parser context * * xmlPopInput: the current input pointed by ctxt->input came to an end * pop it and return the next char. * * Returns the current xmlChar in the parser context */xmlCharxmlPopInput(xmlParserCtxtPtr ctxt) { if (ctxt->inputNr == 1) return(0); /* End of main Input */ xmlFreeInputStream(inputPop(ctxt)); if ((*ctxt->input->cur == 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) return(xmlPopInput(ctxt)); return(CUR);}/** * xmlPushInput: * @ctxt: an XML parser context * @input: an XML parser input fragment (entity, XML fragment ...). * * xmlPushInput: switch to a new input stream which is stacked on top * of the previous one(s). */voidxmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { if (input == NULL) return; inputPush(ctxt, input); GROW;}/** * xmlFreeInputStream: * @input: an xmlParserInputPtr * * Free up an input stream. */voidxmlFreeInputStream(xmlParserInputPtr input) { if (input == NULL) return; if (input->filename != NULL) xmlFree((char *) input->filename); if (input->directory != NULL) xmlFree((char *) input->directory); if (input->encoding != NULL) xmlFree((char *) input->encoding); if (input->version != NULL) xmlFree((char *) input->version); if ((input->free != NULL) && (input->base != NULL)) input->free((xmlChar *) input->base); if (input->buf != NULL) xmlFreeParserInputBuffer(input->buf); memset(input, -1, sizeof(xmlParserInput)); xmlFree(input);}/** * xmlNewInputStream: * @ctxt: an XML parser context * * Create a new input stream structure * Returns the new input stream or NULL
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -