📄 ospxmlutil.c
字号:
/**########################################################################*########################################################################*########################################################################* * COPYRIGHT (c) 1998, 1999 by TransNexus, LLC * * This software contains proprietary and confidential information * of TransNexus, LLC. Except as may be set forth in the license * agreement under which this software is supplied, use, disclosure, * or reproduction is prohibited without the prior, express, written* consent of TransNexus, LLC. * *******#########################################################################*#########################################################################*#########################################################################*//* * ospxmlutil.c - Utility functions for parsing and encoding XML documents. */#include "osp.h"#include "ospbfr.h"#include "ospxmltype.h"#include "ospxmldoc.h"/* pre-defined entity names */const OSPTXMLDOCENTITY OSPVXMLDocEntities[] ={ { '<' , (unsigned char *)"lt" }, { '>' , (unsigned char *)"gt" }, { '&' , (unsigned char *)"amp" }, { '\'' , (unsigned char *)"apos" }, { '"' , (unsigned char *)"quot" }};const unsigned OSPVXMLDocEntitiesSize = sizeof(OSPVXMLDocEntities)/sizeof(OSPTXMLDOCENTITY);/**//*-----------------------------------------------------------------------* * OSPPXMLDocIsMatch() - does the document match a given string *-----------------------------------------------------------------------*/unsigned /* returns error code */OSPPXMLDocIsMatch( OSPTBFR **ospvBfrAddr, /* buffer containing document */ OSPTXMLENC ospvEncoding, /* character encoding */ const unsigned char *ospvString, /* string to check for match */ unsigned ospvStringLen, /* length of match string */ unsigned char ospvScratch[], /* place to store characters */ unsigned *ospvIsMatch /* place to put answer */){ unsigned ospvErrCode = OSPC_ERR_NO_ERROR; int tmpErrCode = OSPC_ERR_NO_ERROR; unsigned cnt = 0; if (ospvBfrAddr == OSPC_OSNULL) { ospvErrCode = OSPC_ERR_BUF_EMPTY; } if (*ospvBfrAddr == OSPC_OSNULL) { ospvErrCode = OSPC_ERR_BUF_EMPTY; } if (ospvEncoding == ospeXMLEncUnknown) { ospvErrCode = OSPC_ERR_XML_BAD_ENC; } if (ospvString == OSPC_OSNULL) { ospvErrCode = OSPC_ERR_XML_INVALID_ARGS; } if (ospvScratch == OSPC_OSNULL) { ospvErrCode = OSPC_ERR_XML_INVALID_ARGS; } if (ospvIsMatch == OSPC_OSNULL) { ospvErrCode = OSPC_ERR_XML_INVALID_ARGS; } if (ospvErrCode == OSPC_ERR_NO_ERROR) { /* assume it's not a match for safety (in case of error break) */ *ospvIsMatch = OSPC_FALSE; /* * In the following loop we "peek" at enough characters to * compare against the string. Note that we're storing the * "peeked" characters in the scratch array, which is passed * to us. That way the calling function has to worry about * sizing the scratch array. But that's okay, since the caller * presumably knows how big it needs to be. (Hint: It should * be the same size as the match string.) */ for ( cnt=0; cnt<ospvStringLen; cnt++) { OSPPXMLDocPeekCharN(ospvBfrAddr, ospvEncoding, cnt, &ospvScratch[cnt], &tmpErrCode); if (tmpErrCode != OSPC_ERR_NO_ERROR) { break; } } /* * If we didn't get enough characters to compare, that's not * an error; but it's certainly not a match either. If we * didn't get a full scratch buffer, there's no need to * continue. We've already set the default return to false. */ if (cnt == ospvStringLen) { /* be sure to null terminate the scratch string */ ospvScratch[cnt] = 0; /* now that we've extracted the tag characters, see what we've got */ if (ospvErrCode == OSPC_ERR_NO_ERROR) { if (OSPM_MEMCMP((const char *)ospvScratch, (const char *)ospvString, ospvStringLen) == 0) { *ospvIsMatch = OSPC_TRUE; } } } } return(ospvErrCode);}/**//*-----------------------------------------------------------------------* * OSPPXMLDocSkipPast() - skip past a string in the document *-----------------------------------------------------------------------*/unsigned /* returns error code */OSPPXMLDocSkipPast( OSPTBFR **ospvBfrAddr, /* buffer containing document */ OSPTXMLENC ospvEncoding, /* character encoding */ const unsigned char *ospvString, /* string to skip past */ unsigned char ospvScratch[] /* place to store characters */){ unsigned ospvErrCode = OSPC_ERR_NO_ERROR; unsigned cnt; unsigned len = 0; if (ospvBfrAddr == OSPC_OSNULL) { ospvErrCode = OSPC_ERR_BUF_EMPTY; } if (*ospvBfrAddr == OSPC_OSNULL) { ospvErrCode = OSPC_ERR_BUF_EMPTY; } if (ospvEncoding == ospeXMLEncUnknown) { ospvErrCode = OSPC_ERR_XML_BAD_ENC; } if (ospvString == OSPC_OSNULL) { ospvErrCode = OSPC_ERR_XML_INVALID_ARGS; } if (ospvScratch == OSPC_OSNULL) { ospvErrCode = OSPC_ERR_XML_INVALID_ARGS; } /* * In the following loop we pre-load the scratch buffer with enough * to compare against the string. Note that we're storing the * characters in the scratch array, which is passed to us. That way * the calling function has to worry about sizing the scratch array. * But that's okay, since the caller presumably knows how big it * needs to be. (Hint: It should be the same size as the match string.) */ if (ospvErrCode == OSPC_ERR_NO_ERROR) { for ( cnt=0, len=OSPM_STRLEN((const char *)ospvString); ((cnt<len) && (ospvErrCode==OSPC_ERR_NO_ERROR)); cnt++) { ospvErrCode = OSPPXMLDocReadChar(ospvBfrAddr, ospvEncoding, &ospvScratch[cnt]); } /* be sure to null terminate the scratch string */ ospvScratch[cnt] = 0; } /* * Now we start the main loop of the function. Here we're checking * for a match and we continue reading characters until we find one. */ while (ospvErrCode == OSPC_ERR_NO_ERROR) { if (OSPM_MEMCMP((const char *)ospvScratch, (const char *)ospvString, len) == 0) { /* we've found the match, so we're done */ break; } /* * No match yet, so we keep looking. First we need to shift * the scratch buffer to make room for the new character. * If a non-destructive memcpy is available, it might be * a good candidate for optimizing this part. */ for (cnt=0; cnt<(len-1); cnt++) { ospvScratch[cnt] = ospvScratch[cnt+1]; } ospvErrCode = OSPPXMLDocReadChar(ospvBfrAddr, ospvEncoding, &ospvScratch[len-1]); } return(ospvErrCode);}/**//*-----------------------------------------------------------------------* * OSPPXMLDocSkipPastChar() - skip XML until just past a single character *-----------------------------------------------------------------------*/unsigned /* returns error code */OSPPXMLDocSkipPastChar( OSPTBFR **ospvBfrAddr, /* buffer containing document */ OSPTXMLENC ospvEncoding, /* character encoding for the document */ unsigned char ospvChar /* character to skip past */){ unsigned ospvErrCode = OSPC_ERR_NO_ERROR; unsigned char readChar; if (ospvBfrAddr == OSPC_OSNULL) { ospvErrCode = OSPC_ERR_BUF_EMPTY; } if (*ospvBfrAddr == OSPC_OSNULL) { ospvErrCode = OSPC_ERR_BUF_EMPTY; } if (ospvEncoding == ospeXMLEncUnknown) { ospvErrCode = OSPC_ERR_XML_BAD_ENC; } /* loop looking for the character */ while (ospvErrCode == OSPC_ERR_NO_ERROR) { ospvErrCode = OSPPXMLDocReadChar(ospvBfrAddr, ospvEncoding, &readChar); if (readChar==ospvChar) { break; } } return(ospvErrCode);}/**//*-----------------------------------------------------------------------* * OSPPXMLDocGetEncoding() *-----------------------------------------------------------------------*/unsigned /* returns error code */OSPPXMLDocGetEncoding( OSPTBFR **ospvBfrAddr, /* buffer containing document */ OSPTXMLENC *ospvEncoding /* place to store encoding type */){ unsigned ospvErrCode = OSPC_ERR_NO_ERROR; int readChar; unsigned char char1 = '\0'; unsigned char char2 = '\0'; if (ospvBfrAddr == OSPC_OSNULL) { ospvErrCode = OSPC_ERR_BUF_EMPTY; } if (*ospvBfrAddr == OSPC_OSNULL) { ospvErrCode = OSPC_ERR_BUF_EMPTY; } *ospvEncoding = ospeXMLEncUnknown; /* * First have a look at the first two bytes in the buffer. From * these, we can deduce the character encoding. */ if (ospvErrCode == OSPC_ERR_NO_ERROR) { readChar = OSPPBfrPeekByte(*ospvBfrAddr); if (readChar != -1) { char1 = (char) readChar; } else { ospvErrCode = OSPC_ERR_BUF_INCOMPLETE; } } if (ospvErrCode == OSPC_ERR_NO_ERROR) { readChar = OSPPBfrPeekByteN(*ospvBfrAddr,1); if (readChar != -1) { char2 = (char) readChar; } else { ospvErrCode = OSPC_ERR_XML_INCOMPLETE; } } /* * Now we check the characters, this stuff is pretty much * straight from the XML specification. */ if (ospvErrCode == OSPC_ERR_NO_ERROR) { if ((char1 == OSPC_XMLDOC_UTF16MSB) && (char2 == OSPC_XMLDOC_UTF16LSB)) { /* we found the UTF-16 byte order mark, it's big-endian */ *ospvEncoding = ospeXMLEncUTF16b; } else if ((char1 == OSPC_XMLDOC_UTF16LSB) && (char2 == OSPC_XMLDOC_UTF16LSB)) { /* we found the UTF-16 byte order mark, it's little-endian */ *ospvEncoding = ospeXMLEncUTF16l; } else if ((char1 == OSPC_XMLDOC_UTF16NULL) && (char2 == OSPC_XMLDOC_OPEN)) { /* UTF-16 without the byte order mark, big-endian */ *ospvEncoding = ospeXMLEncUTF16b; } else if ((char1 == OSPC_XMLDOC_OPEN) && (char2 == OSPC_XMLDOC_UTF16NULL)) { /* UTF-16 without the byte order mark, little-endian */ *ospvEncoding = ospeXMLEncUTF16l; } else if ((char1 == OSPC_XMLDOC_OPEN) && (char2 == OSPC_XMLDOC_QUEST)) { /* this is UTF-8 */ *ospvEncoding = ospeXMLEncUTF8; } else { /* * According to the XML standard, anything other * than UTF8 or UTF16 must have a character * encoding declaration. Since we've already * checked for UTF16 and we've already looked * for the start of a character encoding, that * only leaves UTF8. */ *ospvEncoding = ospeXMLEncUTF8; } } return(ospvErrCode);}/**//*-----------------------------------------------------------------------* * OSPPXMLDocTranslateEntity() - translate an entity name into its value *-----------------------------------------------------------------------*/unsigned /* returns error code */OSPPXMLDocTranslateEntity( unsigned char *ospvName, unsigned char *ospvChar /* place to store character */){ unsigned ospvErrCode = OSPC_ERR_NO_ERROR; unsigned cnt; if (ospvName == OSPC_OSNULL) { ospvErrCode = OSPC_ERR_XML_INVALID_ARGS; } if (ospvChar == OSPC_OSNULL) { ospvErrCode = OSPC_ERR_XML_INVALID_ARGS; } if (ospvErrCode == OSPC_ERR_NO_ERROR) { /* is this a character reference */ if (*ospvName == OSPC_XMLDOC_CHARREF) { ospvName++; /* is it hex? */ if (*ospvName == OSPC_XMLDOC_HEXREF) { /* this one's hex */ ospvName++; for (*ospvChar = 0; *ospvName; ospvName++) { *ospvChar *= 0x10; if ((*ospvName >= '0') && (*ospvName <= '9')) { unsigned char x = *ospvChar; *ospvChar = (unsigned char)(x + ((unsigned char)*ospvName - (unsigned char)'0')); } else if ((*ospvName >= 'a') && (*ospvName <= 'f')) { unsigned char x = *ospvChar; *ospvChar = (unsigned char)(x + ((unsigned char)*ospvName - (unsigned char)'a') + 10); } else if ((*ospvName >= 'A') && (*ospvName <= 'F')) { unsigned char x = *ospvChar; *ospvChar = (unsigned char)(x + ((unsigned char)*ospvName - (unsigned char)'A') + 10); } else { ospvErrCode = OSPC_ERR_XML_BAD_ENTITY; } } } else { /* just a decimal character reference */ for (*ospvChar = 0; *ospvName; ospvName++) { *ospvChar *= 10; if ((*ospvName >= '0') && (*ospvName <= '9')) { unsigned char x = *ospvChar; *ospvChar = (unsigned char)(x + ((unsigned char)*ospvName - (unsigned char)'0')); } else { ospvErrCode = OSPC_ERR_XML_BAD_ENTITY; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -