📄 parser.cpp
字号:
if ((c=get_char(psrc, &cl))<=0) { *dest=0; return false;} if (cl==1){ *dest++=c; psrc++; } else { i=toutf8(c, dest); if (i<0) {*dest=0; return false;} dest+=i; psrc+=cl; } } *dest=0; if (psrc>pend) return false; return true; // success}//Skip String//Skips all characters in the fragment string that are contained within//strSkipChars until some other character is found. Useful for skipping//over whitespace.long Parser::skipString (char **pstrFragment, const char *strSkipChars){ if (!pstrFragment || !strSkipChars) return -1; while ((**pstrFragment != '\0') && (char_match (**pstrFragment, strSkipChars))) { (*pstrFragment)++; } return 0; //success}//Skip Until String//Skips all characters in the string until it finds the skip key.//Then it skips the skip key and returns.long Parser::skipUntilString (char **pstrSource, const char *strSkipKey){ if (!pstrSource || !strSkipKey) return -1; while (**pstrSource && strncmp (*pstrSource, strSkipKey, strlen(strSkipKey))) (*pstrSource)++; *pstrSource = *pstrSource + strlen (strSkipKey); return 0; //success}//This will return the string of the next token in the TokenBuffint Parser::getNextToken(){ int TokenLength=0; int temp, tlen; clearTokBuf(); // Check for white space if(*CurrPtr=='\0') { //TokenBuff[0]='\0'; return 0; } // Attribute value logic must come first, since all text untokenized until end-quote if (inAttrib && (!char_match (*CurrPtr, QUOTE))) { char *strEndQuote = findNextMatch (CurrPtr, QUOTE); if (strEndQuote == NULL) { TokenLength = 1; //*TokenBuff = '\0'; // return a single space for whitespace //if (!copy_token (TokenBuff, CurrPtr, TokenLength)) if (!copy_token (CurrPtr, TokenLength)) return 1; return 0; // serious problem - no matching end-quote found for attribute } TokenLength = strEndQuote - CurrPtr; // BUGBUG: conversion issue if using more than simple strings //if (!copy_token (TokenBuff, CurrPtr, TokenLength)) if (!copy_token (CurrPtr, TokenLength)) return 1; CurrPtr = CurrPtr+TokenLength; return 0; // must return now, so it doesn't go into name processing } if (char_match (*CurrPtr, WHITESPACE)) { TokenLength = 1; //if (!copy_token (TokenBuff, " ", TokenLength)) // return a single space for whitespace if (!copy_token (" ", TokenLength)) // return a single space for whitespace return 1; CurrPtr = CurrPtr+TokenLength; return 0; } // Skip <? .. ?> , <! .. >, <!-- .. --> while (!strncmp (CurrPtr, BEGIN_COMMENT, strlen(BEGIN_COMMENT)) // <!-- || !strncmp (CurrPtr, BEGIN_PROCESSING, strlen(BEGIN_PROCESSING)) // <? || !strncmp (CurrPtr, BEGIN_DOCTYPE, strlen(BEGIN_DOCTYPE))) // <! { if (!strncmp (CurrPtr, BEGIN_COMMENT, strlen(BEGIN_COMMENT))) skipUntilString (&CurrPtr, END_COMMENT); else if (!strncmp (CurrPtr, BEGIN_PROCESSING, strlen(BEGIN_PROCESSING))) skipUntilString (&CurrPtr, END_PROCESSING); else skipUntilString (&CurrPtr, GREATERTHAN); skipString (&CurrPtr, WHITESPACE); TagVal=false; } // Check for start tags if (char_match (*CurrPtr, LESSTHAN)) { temp = toint(CurrPtr+1, &tlen); if (temp == '/') TokenLength = 2; // token is '</' end tag else if(isnamech(temp,false)) TokenLength=1; // Begin tag found, so return '<' token else{ //strcpy(TokenBuff,"\0"); return 1; //error } TagVal=false; } // Check for opening/closing attribute value quotation mark if (char_match (*CurrPtr, QUOTE) && !TagVal) { // Quote found, so return it as token TokenLength = strlen(QUOTE); } // Check for '=' token if (char_match (*CurrPtr, EQUALS) && !TagVal) { // Equals found, so return it as a token TokenLength = strlen(EQUALS); } // Check for '/>' token if (char_match (*CurrPtr, SLASH)) { if (char_match (*(CurrPtr + 1), GREATERTHAN)) { // token '/>' found TokenLength = 2; TagVal=true; } //Content may begin with a / else if (TagVal) { TagVal=false; CurrPtr=SavePtr+1;//SavePtr whould not have have already moved. char *pEndContent = CurrPtr; // Read content until a < is found that is not a comment <!-- bool bReadContent = true; while (bReadContent) { while (!char_match (*pEndContent, LESSTHAN) && *pEndContent) pEndContent++; if (!strncmp (pEndContent, BEGIN_COMMENT, strlen (BEGIN_COMMENT))) skipUntilString (&pEndContent, END_COMMENT); else bReadContent = false; if (!(*pEndContent)) bReadContent = false; } TokenLength = pEndContent - CurrPtr; } } // Check for '>' token else if (char_match (*CurrPtr, GREATERTHAN)) { // Equals found, so return it as a token TokenLength = strlen(GREATERTHAN); SavePtr=CurrPtr; // Saving this ptr for not ignoring the leading and trailing spaces. TagVal=true; } // Check for Content e.g. <tag>content</tag> else if (TagVal) { TagVal=false; CurrPtr=SavePtr+1;//SavePtr whould not have have already moved. char *pEndContent = CurrPtr; // Read content until a < is found that is not a comment <!-- bool bReadContent = true; while (bReadContent) { while (!char_match (*pEndContent, LESSTHAN) && *pEndContent) pEndContent++; if (!strncmp (pEndContent, BEGIN_COMMENT, strlen (BEGIN_COMMENT))) skipUntilString (&pEndContent, END_COMMENT); else bReadContent = false; if (!(*pEndContent)) bReadContent = false; } TokenLength = pEndContent - CurrPtr; } // Check for name tokens else if (isnamech(toint(CurrPtr,&tlen),false)){ // Name found, so find out how long it is int iIndex=tlen; while (isnamech(toint(CurrPtr+iIndex,&tlen),true)) iIndex+=tlen; TokenLength=iIndex; }; // Copy the token to the return string if (TokenLength > 0){ //if (!copy_token (TokenBuff, CurrPtr, TokenLength)) if (!copy_token (CurrPtr, TokenLength)) return 1; } else if (*CurrPtr == '\0') { TokenLength = 0; clearTokBuf(); return 0; } else { // return the unrecognized token for error information TokenLength = 1; //copy_token (TokenBuff, CurrPtr, TokenLength); appendTokBuf(*CurrPtr); return 1; } CurrPtr = CurrPtr+TokenLength; return 0;}//Will return a handle to the tree structure//The tree structure indicates to which parent the node belongs.int Parser::getNextNode(NODE_TYPE &NodeType, char **NodeName, char **NodeValue, bool &IsEnd, bool IgnoreWhiteSpace){ while(*CurrPtr!='\0') { if(IgnoreWhiteSpace) IgnoreWhiteSpaces(); if(getNextToken()!=0) { *NodeValue=NULL; *NodeName=NULL; NodeType=INVALID_NODE; IsEnd=false; return 1; } //if(!strcmp(TokenBuff, LESSTHAN)) if(!strcmp(getTokBuf(), LESSTHAN)) { if(IgnoreWhiteSpace) IgnoreWhiteSpaces(); TagName=true; if(getNextToken()!=0) { *NodeValue=NULL; *NodeName=NULL; NodeType=INVALID_NODE; IsEnd=false; return 1; } TagName=false; //if(TokenBuff==NULL) // throw DOMException(DOMException::FATAL_ERROR_DURING_PARSING); //if(*TokenBuff=='\0') // throw DOMException(DOMException::FATAL_ERROR_DURING_PARSING); if (getTokBufLength() == 0) throw DOMException(DOMException::FATAL_ERROR_DURING_PARSING); //*NodeName=(char *)malloc(strlen(TokenBuff)+1); *NodeName=(char *)malloc(getTokBufLength()+1); //strcpy(*NodeName,TokenBuff); strcpy(*NodeName,getTokBuf()); //strcpy(LastElement,TokenBuff); setLastElem(getTokBuf()); *NodeValue=NULL; NodeType=ELEMENT_NODE; attrFound=true; atLeastOneAttrFound=false; IsEnd=false; } //else if(!strcmp(TokenBuff, GREATERTHAN)) else if(!strcmp(getTokBuf(), GREATERTHAN)) { attrFound=false; if(atLeastOneAttrFound)//forget the greater than { atLeastOneAttrFound=false; continue; } else return 0; } //else if(!strcmp(TokenBuff, ENDTAG)) else if(!strcmp(getTokBuf(), ENDTAG)) { if(IgnoreWhiteSpace) IgnoreWhiteSpaces(); if(getNextToken()!=0) { *NodeValue=NULL; *NodeName=NULL; NodeType=INVALID_NODE; IsEnd=false; return 1; } //if(TokenBuff==NULL) // throw DOMException(DOMException::FATAL_ERROR_DURING_PARSING); //if(*TokenBuff=='\0') // throw DOMException(DOMException::FATAL_ERROR_DURING_PARSING); if (getTokBufLength() == 0) throw DOMException(DOMException::FATAL_ERROR_DURING_PARSING); //*NodeName=(char *)malloc(strlen(TokenBuff)+1); *NodeName=(char *)malloc(getTokBufLength()+1); //strcpy(*NodeName,TokenBuff); strcpy(*NodeName,getTokBuf()); *NodeValue=NULL; NodeType=ELEMENT_NODE; IsEnd=true; } //else if(!strcmp(TokenBuff, COMPLETETAG)) else if(!strcmp(getTokBuf(), COMPLETETAG)) { if(NodeType==ELEMENT_NODE || (NodeType==ATTRIBUTE_NODE)) { IsEnd=false; //rewindCurrentPtr(strlen(TokenBuff)); rewindCurrentPtr(getTokBufLength()); return 0; } //store the last element tag and return back as end element node //*NodeName=(char *)malloc(strlen(LastElement)+1); *NodeName=(char *)malloc(getLastElemLength()+1); //strcpy(*NodeName,LastElement); strcpy(*NodeName,getLastElem()); *NodeValue=NULL; NodeType=ELEMENT_NODE; attrFound=true; atLeastOneAttrFound=false; IsEnd=true; return 0; } //else if(TokenBuff[0] == '\0') else if (getTokBufLength() == 0) { IsEnd=false; continue; } else if(!attrFound) { //if (TokenBuff != NULL) //{ // if(*TokenBuff!='\0') // { // *NodeValue=(char *)malloc(strlen(TokenBuff)+1); // strcpy(*NodeValue,TokenBuff); // } //} if (getTokBufLength() != 0){ *NodeValue=(char *)malloc(getTokBufLength()+1); strcpy(*NodeValue,getTokBuf()); } *NodeName=(char *)malloc(strlen("#text")+1); strcpy(*NodeName,"#text"); NodeType=TEXT_NODE; IsEnd=false; return 0; } else { //if(TokenBuff==NULL) // throw DOMException(DOMException::FATAL_ERROR_DURING_PARSING); //if(*TokenBuff=='\0') // throw DOMException(DOMException::FATAL_ERROR_DURING_PARSING); if (getTokBufLength() == 0) throw DOMException(DOMException::FATAL_ERROR_DURING_PARSING); if(NodeType==ELEMENT_NODE) { //rewindCurrentPtr(strlen(TokenBuff)+1); rewindCurrentPtr(getTokBufLength()+1); return 0; } //*NodeName=(char *)malloc(strlen(TokenBuff)+1); *NodeName=(char *)malloc(getTokBufLength()+1); //strcpy(*NodeName,TokenBuff); strcpy(*NodeName,getTokBuf()); if(IgnoreWhiteSpace) IgnoreWhiteSpaces(); //gets rid of equals if(getNextToken()!=0) { *NodeValue=NULL; *NodeName=NULL; NodeType=INVALID_NODE; IsEnd=false; return 1; } if(IgnoreWhiteSpace) IgnoreWhiteSpaces(); if(getNextToken()!=0) { *NodeValue=NULL; *NodeName=NULL; NodeType=INVALID_NODE; IsEnd=false; return 1; } //gets rid of beginning quotes inAttrib=true; if(getNextToken()!=0) { *NodeValue=NULL; *NodeName=NULL; NodeType=INVALID_NODE; IsEnd=false; return 1; } inAttrib=false; //if(TokenBuff==NULL) // throw DOMException(DOMException::FATAL_ERROR_DURING_PARSING); //if(*TokenBuff=='\0') // throw DOMException(DOMException::FATAL_ERROR_DURING_PARSING); if (getTokBufLength() == 0) throw DOMException(DOMException::FATAL_ERROR_DURING_PARSING); //*NodeValue=(char *)malloc(strlen(TokenBuff)+1); *NodeValue=(char *)malloc(getTokBufLength()+1); //strcpy(*NodeValue,TokenBuff); strcpy(*NodeValue,getTokBuf()); // gets rid of ending quotes if(getNextToken()!=0) { *NodeValue=NULL; *NodeName=NULL; NodeType=INVALID_NODE; IsEnd=false; return 1; } NodeType=ATTRIBUTE_NODE; IsEnd=false; atLeastOneAttrFound=true; return 0; } } return 0;}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -