📄 encoder.cpp
字号:
{
if (IF_OPTION(OPTION_CRLF))
{
s[s_size]=13;
s_size++;
}
s[s_size]=10;
s_size++;
}
XMLState=CLOSED;
}
if (wordType==NUMBER)
{
if (subWordType==NUMBER2 || subWordType==NUMBER3)
wordType=subWordType;
encodeSpaces();
int len;
if (s_size==4 && ((s[0]=='1' && s[1]>=7 && s[1]<='9') || (s[0]=='2' && s[1]=='0')))
{
len=1000*(s[0]-'0')+100*(s[1]-'0')+10*(s[2]-'0')+(s[3]-'0');
len-=1900;
if (len>=0 && len<256)
{
ENCODE_PUTC('5');
if (IF_OPTION(OPTION_NUMBER_CONTAINER))
cont.memout_year->OutTgtByte(len);
else
ENCODE_PUTC(len);
return;
}
}
if (s_size>=10)
{
int tmp_num=s_size-9;
encodeWord(s,tmp_num,wordType,XMLState,c);
s+=s_size-9;
s_size=9;
}
while (s_size>0 && s[0]=='0')
{
ENCODE_PUTC(s[0]);
s++;
s_size--;
}
if (s_size==0)
return;
len=s[s_size];
s[s_size]=0;
size=atol((char*)s);
s[s_size]=len;
len=0;
while (size>0)
{
num[len++]=size%NUM_BASE; //100;
size=size/NUM_BASE;
}
ENCODE_PUTC('0'+len);
if (!IF_OPTION(OPTION_NUMBER_CONTAINER))
{
for (i=0; i<len; i++)
ENCODE_PUTC(num[i]);
}
else
if (wordType==NUMBER)
for (i=0; i<len; i++)
{
switch (len)
{
case 2:
cont.memout_num2->OutTgtByte(num[i]);
break;
case 3:
cont.memout_num3->OutTgtByte(num[i]);
break;
case 4:
cont.memout_num4->OutTgtByte(num[i]);
break;
default:
cont.memout_num->OutTgtByte(num[i]);
break;
}
}
else
if (wordType==NUMBER2)
for (i=0; i<len; i++)
{
switch (len)
{
case 2:
cont.memout_num2b->OutTgtByte(num[i]);
break;
case 3:
cont.memout_num3b->OutTgtByte(num[i]);
break;
case 4:
cont.memout_num4b->OutTgtByte(num[i]);
break;
default:
cont.memout_numb->OutTgtByte(num[i]);
break;
}
}
else
for (i=0; i<len; i++)
{
switch (len)
{
case 2:
cont.memout_num2c->OutTgtByte(num[i]);
break;
case 3:
cont.memout_num3c->OutTgtByte(num[i]);
break;
case 4:
cont.memout_num4c->OutTgtByte(num[i]);
break;
default:
cont.memout_numc->OutTgtByte(num[i]);
break;
}
}
return;
}
if (s_size>=WORD_MIN_SIZE)
{
if (XMLState==ADDED || XMLState==ADDED2)
{
if (spaces+s_size<STRING_MAX_SIZE)
{
memmove(s+spaces,s,s_size);
memset(s,' ',spaces);
s_size+=spaces;
}
else
encodeSpaces();
}
checkHashExactly(s,s_size,i);
PRINT_CODEWORDS(("i=%d/%d %s(%d)\n",i,sizeDynDict,s,s_size));
if (i>=0)// && codeWordSize(i)<=s_size)
wordType=LOWERWORD;
if (XMLState==ADDED || XMLState==ADDED2)
{
if (i>=0 && i<sizeDynDict)
spaces=0;
else
{
s+=spaces;
s_size-=spaces;
checkHashExactly(s,s_size,i);
if (i>=0)
wordType=LOWERWORD;
}
}
if (i<0)
{
if (wordType==FIRSTUPPER || wordType==UPPERWORD)
{
if (wordType==FIRSTUPPER)
{
flagToEncode=CHAR_FIRSTUPPER;
s[0]=tolower(s[0]);
}
else // wordType==UPPERWORD
{
flagToEncode=CHAR_UPPERWORD;
toLower(s,s_size);
}
checkHashExactly(s,s_size,i);
PRINT_CODEWORDS(("checkHashExactly i=%d %d=%s\n",i,s_size,s));
}
if (i<0 && IF_OPTION(OPTION_TRY_SHORTER_WORD))
{
// try to find shorter version of word in dictionary
i=findShorterWord(s,s_size);
PRINT_CODEWORDS(("findShorterWord i=%d\n",i));
if (i>=0)
{
size=dictlen[i];
if (wordType==UPPERWORD)
{
int ss=s_size-size;
toUpper(s+size,ss);
}
}
}
}
}
if (i>=0)
{
if (IF_OPTION(OPTION_SPACELESS_WORDS))
{
if ((s[0]>='a' && s[0]<='z') || (s[0]>='A' && s[0]<='Z'))
{
if ((beforeWord=='/' || beforeWord=='-' || beforeWord=='\"' || beforeWord=='_' || beforeWord=='>'))
{
if (spaces>0)
ENCODE_PUTC(CHAR_NOSPACE);
}
else
{
if (spaces>0)
spaces--;
else
ENCODE_PUTC(CHAR_NOSPACE);
}
}
}
encodeSpaces();
if (wordType==FIRSTUPPER || wordType==UPPERWORD)
{
ENCODE_PUTC(flagToEncode);
if (IF_OPTION(OPTION_SPACE_AFTER_CC_FLAG))
ENCODE_PUTC(' ');
}
encodeCodeWord(i);
if (size>0)
{
if (wordType==FIRSTUPPER)
wordType=LOWERWORD;
unsigned char* s2=s+size;
int s_size2=s_size-size;
encodeAsText(s2,s_size2,wordType);
}
}
else
{
if (wordType==FIRSTUPPER)
s[0]=toupper(s[0]);
else if (wordType==UPPERWORD)
toUpper(s,s_size);
encodeSpaces();
encodeAsText(s,s_size,wordType);
}
if (justAdded)
{
if (IF_OPTION(OPTION_USE_CONTAINERS) && XMLState==ADDED)
ENCODE_PUTC(' ');
if (IF_OPTION(OPTION_USE_CONTAINERS))
cont.selectMemBuffer(s,s_size);
}
return;
}
// process the file
void XWRT_Encoder::WRT_encode(size_t bufferSize)
{
unsigned char s[STRING_MAX_SIZE];
EWordType wordType;
int c,unicode_le,unicode_be,fftell;
unicode_be=unicode_le=fftell=0;
spaces=0;
s_size=0;
last_c=0;
binCount=0;
wordType=LOWERWORD;
XMLState=UNKNOWN;
getcBuffer[0]=0;
getcBufferData=&getcBuffer[1];
getcBufferSize=bufferSize;
ENCODE_GETC(c);
while (true)
{
if (c==EOF)
break;
PRINT_CHARS(("c=%c (%d) last=%c XMLState=%d\n",c,c,last_c,XMLState));
if (last_c=='>')
XMLState=UNKNOWN;
if (detect)
{
fftell++;
if (c<32 && c!=9 && c!=10 && c!=13 && c!=0)
binCount++;
else
if (c==0)
{
if (fftell%2 != 0)
unicode_be++;
else
unicode_le++;
}
if (fftell==BYTES_TO_DETECT)
{
TURN_OFF(OPTION_UNICODE_LE);
TURN_OFF(OPTION_UNICODE_BE);
if (unicode_le*4/3>fftell/2)
TURN_ON(OPTION_UNICODE_LE)
else
if (unicode_be*4/3>fftell/2)
TURN_ON(OPTION_UNICODE_BE)
}
value[c]++;
if (last_c=='=' && c=='\"')
quotes++;
letterType=letterSet[c];
}
else
{
if (c==13)
{
encodeWord(s,s_size,wordType,XMLState,c);
s_size=0;
ENCODE_GETC(c);
if (c==10 && IF_OPTION(OPTION_CRLF))
{
ENCODE_PUTC(CHAR_CRLF);
ENCODE_GETC(c);
}
else
{
if (addSymbols[13])
ENCODE_PUTC(CHAR_ESCAPE);
ENCODE_PUTC(13);
}
continue;
}
letterType=letterSet[c];
if (letterType==RESERVEDCHAR)
{
PRINT_CHARS(("reservedSet[c] c=%d (%c)\n",c,c));
encodeWord(s,s_size,wordType,XMLState,c);
s_size=0;
PRINT_CHARS(("out CHAR_ESCAPE=%d\n",CHAR_ESCAPE));
ENCODE_PUTC(CHAR_ESCAPE);
ENCODE_PUTC(c);
ENCODE_GETC(c);
continue;
}
if (IF_OPTION(OPTION_QUOTES_MODELING))
{
if (c=='=')
{
encodeWord(s,s_size,wordType,XMLState,c);
s_size=0;
ENCODE_GETC(c);
if (c=='\"')
{
encodeCodeWord(quoteOpen);
ENCODE_GETC(c);
}
else
ENCODE_PUTC('=');
continue;
}
if (c=='\"')
{
encodeWord(s,s_size,wordType,XMLState,c);
s_size=0;
ENCODE_GETC(c);
if (c=='>' && XMLState==ADDED)
{
encodeCodeWord(quoteClose);
ENCODE_GETC(c);
}
else
ENCODE_PUTC('\"');
continue;
}
}
if (letterType==NUMBERCHAR) // && (XMLState!=OPEN && XMLState!=CLOSE))
{
if (wordType!=NUMBER)
{
encodeWord(s,s_size,wordType,XMLState,c);
s_size=0;
}
wordType=NUMBER;
s[s_size++]=c;
if (s_size>=STRING_MAX_SIZE-2)
{
encodeWord(s,s_size,wordType,XMLState,c);
s_size=0;
}
ENCODE_GETC(c);
continue;
}
if (wordType==NUMBER)
{
if (c==':' && s_size<=2)
{
int lsize=s_size;
do
{
s[s_size++]=c;
ENCODE_GETC(c);
if (c==EOF)
break;
letterType=letterSet[c];
}
while (letterType==NUMBERCHAR && s_size<STRING_MAX_SIZE-2);
int hour,minute;
if (lsize==2)
hour=(s[0]-'0')*10+(s[1]-'0')-1;
else
hour=(s[0]-'0')-1;
minute=(s[s_size-2]-'0')*10+(s[s_size-1]-'0');
if (minute<0 || minute>59 || hour<0 || hour>11 || s_size-lsize!=3 || (c!='a' && c!='p'))
{
if (lsize==2)
hour=(s[0]-'0')*10+(s[1]-'0');
else
hour=-1;
minute=(s[s_size-2]-'0')*10+(s[s_size-1]-'0');
if (s_size-lsize==3 && minute>=0 && minute<=99 && hour>=0 && hour<=99)
{
int sec;
if (c==':')
{
s[s_size++]=c;
ENCODE_GETC(c);
if (c!=EOF && letterSet[c]==NUMBERCHAR)
{
s[s_size++]=c;
sec=(c-'0')*10;
ENCODE_GETC(c);
if (c!=EOF && letterSet[c]==NUMBERCHAR)
{
s[s_size++]=c;
sec+=(c-'0');
ENCODE_PUTC(CHAR_HOURMINSEC);
if (IF_OPTION(OPTION_NUMBER_CONTAINER))
{
cont.memout_hms->OutTgtByte(hour);
cont.memout_hms->OutTgtByte(minute);
cont.memout_hms->OutTgtByte(sec);
}
else
{
ENCODE_PUTC(hour);
ENCODE_PUTC(minute);
ENCODE_PUTC(sec);
}
ENCODE_GETC(c);
wordType=LOWERWORD;
s_size=0;
continue;
}
}
encodeMixed(s,s_size,XMLState,c);
wordType=LOWERWORD;
s_size=0;
continue;
}
ENCODE_PUTC(CHAR_HOURMIN);
if (IF_OPTION(OPTION_NUMBER_CONTAINER))
{
cont.memout_hm->OutTgtByte(hour);
cont.memout_hm->OutTgtByte(minute);
}
else
{
ENCODE_PUTC(hour);
ENCODE_PUTC(minute);
}
wordType=LOWERWORD;
s_size=0;
continue;
}
minute=-1;
}
else
{
if (c=='p')
hour+=12; // 0-23
if (last_c!=' ')
beforeWord=last_c;
else
beforeWord=last_last_c;
ENCODE_GETC(c);
}
if (minute<0 || c!='m')
{
encodeWord(s,lsize,wordType,XMLState,c);
ENCODE_PUTC(':');
if (minute>=0)
{
s_size-=lsize+1;
encodeWord(s+lsize+1,s_size,wordType,XMLState,c);
if (hour>=12)
s[0]='p';
else
s[0]='a';
s_size=1;
wordType=LOWERWORD;
}
else
{
s_size-=lsize+1;
memmove(s,s+lsize+1,s_size);
}
}
else
{
if (lsize==2 && s[0]=='0')
ENCODE_PUTC('0');
ENCODE_PUTC(CHAR_TIME);
if (IF_OPTION(OPTION_NUMBER_CONTAINER))
{
cont.memout_time->OutTgtByte(hour);
cont.memout_time->OutTgtByte(minute);
}
else
{
ENCODE_PUTC(hour);
ENCODE_PUTC(minute);
}
ENCODE_GETC(c);
wordType=LOWERWORD;
s_size=0;
}
continue;
}
else
if (c=='.')
{
int lsize=s_size;
do
{
s[s_size++]=c;
ENCODE_GETC(c);
if (c==EOF)
break;
letterType=letterSet[c];
}
while (letterType==NUMBERCHAR && s_size<STRING_MAX_SIZE-2);
if (c=='.' && lsize<4 && s_size-lsize>=2 && s_size-lsize<=4)
{
int x1,x2,x3,x4,c2;
if (lsize==2)
x1=10*(s[0]-'0')+(s[1]-'0');
else
if (lsize==3)
x1=100*(s[0]-'0')+10*(s[1]-'0')+(s[2]-'0');
else
x1=(s[0]-'0');
if (s_size-lsize==3)
{
c2=s[s_size-2];
x2=10*(s[s_size-2]-'0')+(s[s_size-1]-'0');
}
else
if (s_size-lsize==4)
{
c2=s[s_size-3];
x2=100*(s[s_size-3]-'0')+10*(s[s_size-2]-'0')+(s[s_size-1]-'0');
}
else
{
c2=s[s_size-1];
x2=(s[s_size-1]-'0');
}
s[s_size++]=c;
ENCODE_GETC(c);
if (c==EOF || letterSet[c]!=NUMBERCHAR || c=='0' || s[0]=='0' || c2=='0' || x1>255 || x2>255)
{
encodeMixed(s,s_size,XMLState,c);
s_size=0;
wordType=LOWERWORD;
continue;
}
x3=0;
x3=10*x3+(c-'0');
s[s_size++]=c;
ENCODE_GETC(c);
if (c!=EOF && letterSet[c]==NUMBERCHAR)
{
x3=10*x3+(c-'0');
s[s_size++]=c;
ENCODE_GETC(c);
if (c!=EOF && letterSet[c]==NUMBERCHAR)
{
x3=10*x3+(c-'0');
s[s_size++]=c;
ENCODE_GETC(c);
}
}
if (c!='.' || x3>255)
{
encodeMixed(s,s_size,XMLState,c);
s_size=0;
wordType=LOWERWORD;
continue;
}
s[s_size++]=c;
ENCODE_GETC(c);
if (c==EOF || letterSet[c]!=NUMBERCHAR || c=='0')
{
encodeMixed(s,s_size,XMLState,c);
s_size=0;
wordType=LOWERWORD;
continue;
}
x4=0;
x4=10*x4+(c-'0');
s[s_size++]=c;
ENCODE_GETC(c);
if (c!=EOF && letterSet[c]==NUMBERCHAR)
{
x4=10*x4+(c-'0');
s[s_size++]=c;
ENCODE_GETC(c);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -