📄 encoder.cpp
字号:
if (c!=EOF && letterSet[c]==NUMBERCHAR)
{
x4=10*x4+(c-'0');
s[s_size++]=c;
ENCODE_GETC(c);
}
}
if (x4>255)
{
encodeMixed(s,s_size,XMLState,c);
s_size=0;
wordType=LOWERWORD;
continue;
}
ENCODE_PUTC(CHAR_IP);
if (IF_OPTION(OPTION_NUMBER_CONTAINER))
{
cont.memout_ip->OutTgtByte(x1);
cont.memout_ip->OutTgtByte(x2);
cont.memout_ip->OutTgtByte(x3);
cont.memout_ip->OutTgtByte(x4);
}
else
{
ENCODE_PUTC(x1);
ENCODE_PUTC(x2);
ENCODE_PUTC(x3);
ENCODE_PUTC(x4);
}
s_size=0;
wordType=LOWERWORD;
continue;
}
if (s_size-lsize==3)
{
encodeWord(s,lsize,wordType,XMLState,c);
s_size-=lsize;
memmove(s,s+lsize,s_size);
ENCODE_PUTC('7');
int remain=(s[1]-'0')*10+(s[2]-'0');
if (IF_OPTION(OPTION_NUMBER_CONTAINER))
cont.memout_remain->OutTgtByte(remain);
else
ENCODE_PUTC(remain);
}
else
if (lsize==1 && s_size==3)
{
ENCODE_PUTC(CHAR_REMAIN);
int remain=(s[0]-'0')*10+(s[2]-'0');
if (IF_OPTION(OPTION_NUMBER_CONTAINER))
cont.memout_remain2->OutTgtByte(remain);
else
ENCODE_PUTC(remain);
}
else
if (lsize==2 && s_size==4 && (s[0]=='1' || (s[0]=='2' && s[1]<='4')))
{
ENCODE_PUTC(CHAR_REMAIN);
int remain=(s[0]-'0')*100+(s[1]-'0')*10+(s[3]-'0');
if (IF_OPTION(OPTION_NUMBER_CONTAINER))
cont.memout_remain2->OutTgtByte(remain);
else
ENCODE_PUTC(remain);
}
else
{
encodeWord(s,lsize,wordType,XMLState,c);
s_size-=lsize;
memmove(s,s+lsize,s_size);
ENCODE_PUTC('.');
s_size--;
encodeWord(s+1,s_size,wordType,XMLState,c);
}
s_size=0;
wordType=LOWERWORD;
continue;
}
else
if (c!='-' || s_size==0)
{
encodeWord(s,s_size,wordType,XMLState,c);
s_size=0;
wordType=LOWERWORD;
}
else
if (s_size==4)
{
int year=(s[0]-'0')*1000+(s[1]-'0')*100+(s[2]-'0')*10+(s[3]-'0')-1929;
ENCODE_GETC(c);
letterType=letterSet[c];
if (letterType==NUMBERCHAR && year>=0)
{
s[s_size++]='-'; // s_size==5;
do
{
s[s_size++]=c; // s_size==6; // 1977-12-31
ENCODE_GETC(c);
if (c==EOF)
break;
letterType=letterSet[c];
}
while (s_size<STRING_MAX_SIZE-2 && (letterType==NUMBERCHAR || (c=='-' && s_size==7)));
if (s_size==10 && s[7]=='-')
{
int month=(s[5]-'0')*10+(s[6]-'0')-1;
int day=(s[8]-'0')*10+(s[9]-'0')-1;
if (month>=0 && month<=11 && day>=0 && day<=30)
{
int all=day+31*month+31*12*year;
if (all<65536/2)
{
int newAll=(256/2)+all-lastAll;
if (newAll>=0 && newAll<256)
{
ENCODE_PUTC('9');
if (IF_OPTION(OPTION_NUMBER_CONTAINER))
cont.memout_date3->OutTgtByte(newAll);
else
ENCODE_PUTC(newAll);
}
else
{
newAll=(65536/2)+all-lastAll;
ENCODE_PUTC('8');
if (IF_OPTION(OPTION_NUMBER_CONTAINER))
{
cont.memout_date->OutTgtByte(newAll%256);
cont.memout_date2->OutTgtByte(newAll/256);
}
else
{
ENCODE_PUTC(newAll%256);
ENCODE_PUTC(newAll/256);
}
}
lastAll=all;
s_size=0;
wordType=LOWERWORD;
continue;
}
}
}
if (s_size>7 && s[7]=='-')
{
year=4;
encodeWord(s,year,wordType,XMLState,c);
ENCODE_PUTC('-');
year=2;
encodeWord(s+5,year,wordType,XMLState,c);
ENCODE_PUTC('-');
if (s_size>8)
{
s_size-=8;
encodeWord(s+8,s_size,wordType,XMLState,c);
}
}
else
{
if (s_size==9)
{
int page2;
year+=1929;
page2=(s[5]-'0')*1000+(s[6]-'0')*100+(s[7]-'0')*10+(s[8]-'0');
page2-=year;
if (page2>=0 && page2<256)
{
ENCODE_PUTC(CHAR_PAGES);
if (IF_OPTION(OPTION_NUMBER_CONTAINER))
{
cont.memout_pages->OutTgtByte(year/256);
cont.memout_pages->OutTgtByte(year%256);
cont.memout_pages->OutTgtByte(page2);
}
else
{
ENCODE_PUTC(year/256);
ENCODE_PUTC(year%256);
ENCODE_PUTC(page2);
}
s_size=0;
wordType=LOWERWORD;
continue;
}
}
if (s_size<=14 && s_size>=6 && s[0]!='0' && s[5]!='0')
{
ENCODE_PUTC('6');
subWordType=NUMBER2;
year=4;
encodeWord(s,year,wordType,XMLState,c);
subWordType=NUMBER3;
s_size-=5;
encodeWord(s+5,s_size,wordType,XMLState,c);
subWordType=LOWERWORD;
}
else
{
year=4;
encodeWord(s,year,wordType,XMLState,c);
ENCODE_PUTC('-');
if (s_size>5)
{
s_size-=5;
encodeWord(s+5,s_size,wordType,XMLState,c);
}
}
}
}
else
{
encodeWord(s,s_size,wordType,XMLState,c);
ENCODE_PUTC('-');
}
s_size=0;
wordType=LOWERWORD;
continue;
}
else
if (s_size==3)
{
do
{
s[s_size++]=c;
ENCODE_GETC(c);
if (c==EOF)
break;
letterType=letterSet[c];
}
while (s_size<STRING_MAX_SIZE-2 && letterType==NUMBERCHAR);
int page,page2;
if (s_size==7) // || (s_size==8 && s[4]=='1'))
{
page=(s[0]-'0')*100+(s[1]-'0')*10+(s[2]-'0');
page2=(s[4]-'0')*100+(s[5]-'0')*10+(s[6]-'0');
page2-=page;
if (page2>=0 && page2<256)
{
ENCODE_PUTC(CHAR_PAGES);
if (IF_OPTION(OPTION_NUMBER_CONTAINER))
{
cont.memout_pages->OutTgtByte(page/256);
cont.memout_pages->OutTgtByte(page%256);
cont.memout_pages->OutTgtByte(page2);
}
else
{
ENCODE_PUTC(page/256);
ENCODE_PUTC(page%256);
ENCODE_PUTC(page2);
}
s_size=0;
wordType=LOWERWORD;
continue;
}
}
if (s_size<=13 && s_size>=5 && s[0]!='0' && s[4]!='0')
{
ENCODE_PUTC('6');
subWordType=NUMBER2;
page=3;
encodeWord(s,page,wordType,XMLState,c);
subWordType=NUMBER3;
s_size-=4;
encodeWord(s+4,s_size,wordType,XMLState,c);
subWordType=LOWERWORD;
}
else
{
page=3;
encodeWord(s,page,wordType,XMLState,c);
ENCODE_PUTC('-');
if (s_size>4)
{
s_size-=4;
encodeWord(s+4,s_size,wordType,XMLState,c);
}
}
s_size=0;
wordType=LOWERWORD;
continue;
}
else
{
ENCODE_GETC(c);
if (s_size==2 && c>='A' && c<='Z')
{
int day=(s[0]-'0')*10+(s[1]-'0')-1;
letterType=letterSet[c];
if (letterType==UPPERCHAR && day>=0 && day<=30)
{
s[s_size++]='-'; // s_size==3;
do
{
s[s_size++]=c;
ENCODE_GETC(c);
letterType=letterSet[c];
}
while (s_size<STRING_MAX_SIZE-2 && letterType==UPPERCHAR);// || letterType==LOWERCHAR);
int month=-1;
if (s_size==6 && c=='-')
{
std::string mon;
char* str=(char*)s+3;
mon.append((char*)str,3);
// month=3;
// toLower((unsigned char*)mon.c_str(),month);
month=-1;
if (mon=="JAN") month=0;
else if (mon=="FEB") month=1;
else if (mon=="MAR") month=2;
else if (mon=="APR") month=3;
else if (mon=="MAY") month=4;
else if (mon=="JUN") month=5;
else if (mon=="JUL") month=6;
else if (mon=="AUG") month=7;
else if (mon=="SEP") month=8;
else if (mon=="OCT") month=9;
else if (mon=="NOV") month=10;
else if (mon=="DEC") month=11;
}
if (month==-1)
{
month=2;
encodeWord(s,month,wordType,XMLState,c);
ENCODE_PUTC('-');
beforeWord='-';
s_size-=3;
memmove(s,s+3,s_size);
if (s_size==1)
wordType=FIRSTUPPER;
else if (s_size>1)
wordType=UPPERWORD;
else
wordType=LOWERWORD;
continue;
}
do
{
s[s_size++]=c; // s_size==7; 12-aug-
ENCODE_GETC(c);
letterType=letterSet[c];
}
while (s_size<STRING_MAX_SIZE-2 && letterType==NUMBERCHAR);
int year=-1;
int newAll=0;
if (s_size==11)
{
year=(s[7]-'0')*1000+(s[8]-'0')*100+(s[9]-'0')*10+(s[10]-'0')-1929;
newAll=day+31*month+31*12*year;
}
if (year<0 || newAll<0 || newAll>65535)
{
month=2;
encodeWord(s,month,wordType,XMLState,c);
ENCODE_PUTC('-');
beforeWord='-';
month=3;
wordType=UPPERWORD;
encodeWord(s+3,month,wordType,XMLState,c);
ENCODE_PUTC('-');
s_size-=7;
memmove(s,s+7,s_size);
wordType=NUMBER;
continue;
}
ENCODE_PUTC(CHAR_DATE_ENG);
if (IF_OPTION(OPTION_NUMBER_CONTAINER))
{
cont.memout_date->OutTgtByte(newAll%256);
cont.memout_date2->OutTgtByte(newAll/256);
}
else
{
ENCODE_PUTC(newAll%256);
ENCODE_PUTC(newAll/256);
}
}
else
{
encodeWord(s,s_size,wordType,XMLState,c);
ENCODE_PUTC('-');
}
s_size=0;
wordType=LOWERWORD;
continue;
}
else
{
int lsize=s_size;
s[s_size++]='-';
letterType=letterSet[c];
while (s_size<STRING_MAX_SIZE-2 && letterType==NUMBERCHAR)
{
s[s_size++]=c;
ENCODE_GETC(c);
if (c==EOF)
break;
letterType=letterSet[c];
}
if (lsize<=9 && s_size<=lsize+10 && s_size>=lsize+2 && s[0]!='0' && s[lsize+1]!='0')
{
ENCODE_PUTC('6');
subWordType=NUMBER2;
encodeWord(s,lsize,wordType,XMLState,c);
subWordType=NUMBER3;
s_size-=lsize+1;
encodeWord(s+lsize+1,s_size,wordType,XMLState,c);
subWordType=LOWERWORD;
}
else
{
encodeWord(s,lsize,wordType,XMLState,c);
ENCODE_PUTC('-');
if (s_size>lsize+1)
{
s_size-=lsize+1;
encodeWord(s+lsize+1,s_size,wordType,XMLState,c);
}
}
s_size=0;
wordType=LOWERWORD;
continue;
}
}
} // if (c=='.')
} // if (wordType==NUMBER)
if (c=='<')
{
if (s_size>0)
{
encodeWord(s,s_size,wordType,XMLState,c);
s_size=0;
}
s[s_size++]=c;
ENCODE_GETC(c);
if (c=='/')
{
XMLState=CLOSE;
ENCODE_GETC(c);
}
else
XMLState=OPEN;
while (true)
{
if (c==EOF)
break;
if (!startTagSet[c] || s_size>=STRING_MAX_SIZE-2) // || c==' ' || c=='>' || c=='/' || c==':')
break;
s[s_size++]=c;
ENCODE_GETC(c);
}
wordType=VARWORD;
continue;
}
if (c=='>')
{
int cd=0;
if (XMLState==CLOSED)
XMLState=UNKNOWN;
if (XMLState==CLOSE)
{
ENCODE_GETC(c);
if (IF_OPTION(OPTION_CRLF))
{
if (c==13)
{
ENCODE_GETC(c);
if (c==10)
{
ENCODE_GETC(c);
XMLState=CLOSE_EOL;
}
else
cd=13;
}
}
else
{
if (c==10)
{
ENCODE_GETC(c);
XMLState=CLOSE_EOL;
}
}
letterType=letterSet[c];
}
int cc='>';
encodeWord(s,s_size,wordType,XMLState,cc);
s_size=0;
if (cd)
{
if (addSymbols[13])
ENCODE_PUTC(CHAR_ESCAPE);
ENCODE_PUTC(13);
}
if (XMLState==CLOSED)
{
XMLState=UNKNOWN;
continue;
}
if (last_c=='/' && XMLState==ADDED) // <xml="xxx"/>
{
XMLState=UNKNOWN;
if (stack.size()>0)
stack.pop_back();
}
if (c!='>')
continue;
}
if (c=='&')
{
encodeWord(s,s_size,wordType,XMLState,c);
s_size=0;
wordType=LOWERWORD;
if (last_c!=' ')
beforeWord=last_c;
else
beforeWord=last_last_c;
s[s_size++]=c;
while (true)
{
ENCODE_GETC(c);
if (c==EOF)
break;
letterType=letterSet[c];
if (letterType==UPPERCHAR) // needed only for tryShorter
wordType=VARWORD;
if ((letterType!=LOWERCHAR && letterType!=UPPERCHAR) || s_size>=STRING_MAX_SIZE-2)
break;
s[s_size++]=c;
}
if (c==';' || c=='#')
{
s[s_size++]=c;
ENCODE_GETC(c);
}
encodeWord(s,s_size,wordType,XMLState,c);
s_size=0;
continue;
}
if (c==':' && s_size==4 && wordType==LOWERWORD)
{
if (s[0]=='h' && s[1]=='t' && s[2]=='t' && s[3]=='p')
{
s[s_size++]=c;
ENCODE_GETC(c);
if (c=='/')
{
s[s_size++]=c;
ENCODE_GETC(c);
if (c=='/')
{
while (true)
{
s[s_size++]=c;
ENCODE_GETC(c);
if (c==EOF)
break;
if (urlSet[c] || s_size>=STRING_MAX_SIZE-2)
break;
}
if (c=='/')
{
s[s_size++]=c;
ENCODE_GETC(c);
}
encodeWord(s,s_size,wordType,XMLState,c);
s_size=0;
wordType=LOWERWORD;
}
}
continue;
}
}
if (s[0]!='<')
{
if (c=='@' && s_size>0 && wordType==LOWERWORD)
{
s[s_size++]=c;
while (true)
{
ENCODE_GETC(c);
if (c==EOF)
break;
letterType=letterSet[c];
if (letterType==UPPERCHAR) // needed only for tryShorter
wordType=VARWORD;
if ((letterType!=LOWERCHAR && letterType!=UPPERCHAR && c!='.') || s_size>=STRING_MAX_SIZE-2)
break;
s[s_size++]=c;
}
encodeWord(s,s_size,wordType,XMLState,c);
s_size=0;
continue;
}
#ifdef DYNAMIC_DICTIONARY
if (c=='\'' && s_size>=1) // it's
{
s[s_size++]=c;
if (s_size>=STRING_MAX_SIZE-2)
{
encodeWord(s,s_size,wordType,XMLState,c);
s_size=0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -