uunconc.c
来自「UUDeview是一个编码解码器」· C语言 代码 · 共 1,686 行 · 第 1/3 页
C
1,686 行
/* * This file is part of uudeview, the simple and friendly multi-part multi- * file uudecoder program (c) 1994-2001 by Frank Pilhofer. The author may * be contacted at fp@fpx.de * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. *//* * These are the functions that are responsible for decoding. The * original idea is from a freeware utility called "uunconc", and * few lines of this code may still bear a remote resemblance to * its code. If you are the author or know him, contact me. * This program could only decode one multi-part, uuencoded file * where the parts were in order. Base64, XX and BinHex decoding, * support for multi-files and part-ordering covered by myself. **/#ifdef HAVE_CONFIG_H#include "config.h"#endif#ifdef SYSTEM_WINDLL#include <windows.h>#endif#ifdef SYSTEM_OS2#include <os2.h>#endif#include <stdio.h>#include <ctype.h>#ifdef STDC_HEADERS#include <stdlib.h>#include <string.h>#endif#ifdef HAVE_UNISTD_H#include <unistd.h>#endif#ifdef HAVE_ERRNO_H#include <errno.h>#endif#include <crc32.h>#include <uudeview.h>#include <uuint.h>#include <fptools.h>#include <uustring.h>char * uunconc_id = "$Id: uunconc.c,v 1.38 2004/03/01 22:52:27 fp Exp $";/* for braindead systems */#ifndef SEEK_SET#ifdef L_BEGIN#define SEEK_SET L_BEGIN#else#define SEEK_SET 0#endif#endif/* * decoder states */#define BEGIN (1)#define DATA (2)#define END (3)#define DONE (4)/* * mallocable areas */char *uunconc_UUxlat;char *uunconc_UUxlen;char *uunconc_B64xlat;char *uunconc_XXxlat;char *uunconc_BHxlat;char *uunconc_save;/* * decoding translation tables and line length table */static int * UUxlen; /* initialized in UUInitConc() */static int * UUxlat; /* from the malloc'ed areas above */static int * B64xlat;static int * XXxlat;static int * BHxlat;/* * buffer for decoding */static char *save[3];/* * mallocable areas */char *uuncdl_fulline;char *uuncdp_oline;/* * Return information for QuickDecode */static int uulboundary;/* * To prevent warnings when using a char as index into an array */#define ACAST(s) ((int)(unsigned char)(s))/* * Initialize decoding tables */voidUUInitConc (void){ int i, j; /* * Update pointers */ UUxlen = (int *) uunconc_UUxlen; UUxlat = (int *) uunconc_UUxlat; B64xlat = (int *) uunconc_B64xlat; XXxlat = (int *) uunconc_XXxlat; BHxlat = (int *) uunconc_BHxlat; save[0] = uunconc_save; save[1] = uunconc_save + 256; save[2] = uunconc_save + 512; /* prepare decoding translation table */ for(i = 0; i < 256; i++) UUxlat[i] = B64xlat[i] = XXxlat[i] = BHxlat[i] = -1; /* * At some time I received a file which used lowercase characters for * uuencoding. This shouldn't be, but let's accept it. Must take special * care that this doesn't break xxdecoding. This is giving me quite a * headache. If this one file hadn't been a Pocahontas picture, I might * have ignored it for good. */ for (i = ' ', j = 0; i < ' ' + 64; i++, j++) UUxlat[i] /* = UUxlat[i+64] */ = j; for (i = '`', j = 0; i < '`' + 32; i++, j++) UUxlat[i] = j; /* add special cases */ UUxlat['`'] = UUxlat[' ']; UUxlat['~'] = UUxlat['^']; /* prepare line length table */ UUxlen[0] = 1; for(i = 1, j = 5; i <= 61; i += 3, j += 4) UUxlen[i] = UUxlen[i+1] = UUxlen[i+2] = j; /* prepare other tables */ for (i=0; i<64; i++) { B64xlat[ACAST(B64EncodeTable[i])] = i; XXxlat [ACAST(XXEncodeTable [i])] = i; BHxlat [ACAST(BHEncodeTable [i])] = i; }}/* * Workaround for Netscape *//* * Determines whether Netscape may have broken up a data line (by * inserting a newline). This only seems to happen after <a in a * href statement */intUUBrokenByNetscape (char *string){ char *ptr; int len; if (string==NULL || (len=strlen(string))<3) return 0; if ((ptr = _FP_stristr (string, "<a href=")) != NULL) { if (_FP_stristr (string, "</a>") > ptr) return 2; } ptr = string + len; while (len && (*(ptr-1)=='\015' || *(ptr-1)=='\012')) { ptr--; len--; } if (len<3) return 0; if (*--ptr == ' ') ptr--; ptr--; if (_FP_strnicmp (ptr, "<a", 2) == 0) return 1; return 0;}/* * Try to repair a Netscape-corrupted line of data. * This must only be called on corrupted lines, since non-Netscape * data may even _get_ corrupted by this procedure. * * Some checks are included multiply to speed up the procedure. For * example: (*p1!='<' || strnicmp(p1,"</a>",4)). If the first expression * becomes true, the costly function isn't called :-) * * Since '<', '>', '&' might even be replaced by their html equivalents * in href strings, I'm now using two passes, the first one for & + co, * the second one for hrefs. */intUUNetscapeCollapse (char *string){ char *p1=string, *p2=string; int res = 0; if (string==NULL) return 0; /* * First pass */ while (*p1) { if (*p1 == '&') { if (_FP_strnicmp (p1, "&", 5) == 0) { p1+=5; *p2++='&'; res=1; } else if (_FP_strnicmp (p1, "<", 4) == 0) { p1+=4; *p2++='<'; res=1; } else if (_FP_strnicmp (p1, ">", 4) == 0) { p1+=4; *p2++='>'; res=1; } else *p2++ = *p1++; } else *p2++ = *p1++; } *p2 = '\0'; /* * Second pass */ p1 = p2 = string; while (*p1) { if (*p1 == '<') { if ((_FP_strnicmp (p1, "<ahref=", 7) == 0 || _FP_strnicmp (p1, "<a href=",8) == 0) && (_FP_strstr (p1, "</a>") != 0 || _FP_strstr (p1, "</A>") != 0)) { while (*p1 && *p1!='>') p1++; if (*p1=='\0' || *(p1+1)!='<') return 0; p1++; while (*p1 && (*p1!='<' || _FP_strnicmp(p1,"</a>",4)!=0)) { *p2++ = *p1++; } if (_FP_strnicmp(p1,"</a>",4) != 0) return 0; p1+=4; res=1; } else *p2++ = *p1++; } else *p2++ = *p1++; } *p2 = '\0'; return res;}/* * The second parameter is 0 if we are still searching for encoded data, * otherwise it indicates the encoding we're using right now. If we're * still in the searching stage, we must be a little more strict in * deciding for or against encoding; there's too much plain text looking * like encoded data :-( */intUUValidData (char *ptr, int encoding, int *bhflag){ int i=0, j, len=0, suspicious=0, flag=0; char *s = ptr; if ((s == NULL) || (*s == '\0')) { return 0; /* bad string */ } while (*s && *s!='\012' && *s!='\015') { s++; len++; i++; } if (i == 0) return 0; switch (encoding) { case UU_ENCODED: goto _t_UU; case XX_ENCODED: goto _t_XX; case B64ENCODED: goto _t_B64; case BH_ENCODED: goto _t_Binhex; case YENC_ENCODED: return YENC_ENCODED; } _t_Binhex: /* Binhex Test */ len = i; s = ptr; /* * bhflag notes the state we're in. Within the data, it's 1. If we're * still looking for the initial :, it's 0 */ if (*bhflag == 0 && *s != ':') { if (encoding==BH_ENCODED) return 0; goto _t_B64; } else if (*bhflag == 0 /* *s == ':' */) { s++; len--; } while (len && BHxlat[ACAST(*s)] != -1) { len--; s++; } /* allow space characters at the end of the line if we are sure */ /* that this is Binhex encoded data or the line was long enough */ flag = (*s == ':') ? 0 : 1; if (*s == ':' && len>0) { s++; len--; } if (((i>=60 && len<=10) || encoding) && *s==' ') { while (len && *s==' ') { s++; len--; } } /* * BinHex data shall have exactly 64 characters (except the last * line). We ignore everything with less than 40 characters to * be flexible */ if (len != 0 || (flag && i < 40)) { if (encoding==BH_ENCODED) return 0; goto _t_B64; } *bhflag = flag; return BH_ENCODED; _t_B64: /* Base64 Test */ len = i; s = ptr; /* * Face it: there _are_ Base64 lines that are not a multiple of four * in length :-( * * if (len%4) * goto _t_UU; */ while (len--) { if (*s < 0 || (B64xlat[ACAST(*s)] == -1 && *s != '=')) { /* allow space characters at the end of the line if we are sure */ /* that this is Base64 encoded data or the line was long enough */ if (((i>=60 && len<=10) || encoding) && *s++==' ') { while (*s==' ' && len) s++; if (len==0) return B64ENCODED; } if (encoding==B64ENCODED) return 0; goto _t_UU; } else if (*s == '=') { /* special case at end */ /* if we know this is B64encoded, allow spaces at end of line */ s++; if (*s=='=' && len>=1) { len--; s++; } if (encoding && len && *s==' ') { while (len && *s==' ') { s++; len--; } } if (len != 0) { if (encoding==B64ENCODED) return 0; goto _t_UU; } return B64ENCODED; } s++; } return B64ENCODED; _t_UU: len = i; s = ptr; if (UUxlat[ACAST(*s)] == -1) { /* uutest */ if (encoding==UU_ENCODED) return 0; goto _t_XX; } j = UUxlen[UUxlat[ACAST(*s)]]; if (len-1 == j) /* remove trailing character */ len--; if (len != j) { switch (UUxlat[ACAST(*s)]%3) { case 1: if (j-2 == len) j-=2; break; case 2: if (j-1 == len) j-=1; break; } } /* * some encoders are broken with respect to encoding the last line of * a file and produce extraoneous characters beyond the expected EOL * So were not too picky here about the last line, as long as it's longer * than necessary and shorter than the maximum * this tolerance broke the xxdecoding, because xxencoded data was * detected as being uuencoded :( so don't accept 'h' as first character * also, if the first character is lowercase, don't accept the line to * have space characters. the only encoder I've heard of which uses * lowercase characters at least accepts the special case of encoding * 0 as `. The strchr() shouldn't be too expensive here as it's only * evaluated if the first character is lowercase, which really shouldn't * be in uuencoded text. */ if (len != j && ((ptr[0] == '-' && ptr[1] == '-' && strstr(ptr,"part")!=NULL) || !(*ptr != 'M' && *ptr != 'h' && len > j && len <= UUxlen[UUxlat['M']]))) { if (encoding==UU_ENCODED) return 0; goto _t_XX; /* bad length */ } if (len != j || islower (*ptr)) { /* * if we are not in a 'uuencoded' state, don't allow the line to have * space characters at all. if we know we _are_ decoding uuencoded * data, the rest of the line, beyond the length of encoded data, may * have spaces. */ if (encoding != UU_ENCODED) if (strchr (ptr, ' ') != NULL) goto _t_XX;/* suspicious = 1; we're careful here REMOVED 0.4.15 __FP__ */ len = j; } while (len--) { if (*s < 0 || UUxlat[ACAST(*s++)] < 0) { if (encoding==UU_ENCODED) return 0; goto _t_XX; /* bad code character */ } if (*s == ' ' && suspicious) { if (encoding==UU_ENCODED) return 0; goto _t_XX; /* this line looks _too_ suspicious */ } } return UU_ENCODED; /* data is valid */ _t_XX: /* XX Test */ len = i; s = ptr; if (XXxlat[ACAST(*s)] == -1) return 0; j = UUxlen[XXxlat[ACAST(*s)]]; /* Same line length table as UUencoding */ if (len-1 == j) /* remove trailing character */ len--; if (len != j) switch (UUxlat[ACAST(*s)]%3) { case 1: if (j-2 == len) j-=2; break; case 2: if (j-1 == len) j-=1; break; } /* * some encoders are broken with respect to encoding the last line of * a file and produce extraoneous characters beyond the expected EOL * So were not too picky here about the last line, as long as it's longer * than necessary and shorter than the maximum */ if (len != j && !(*ptr != 'h' && len > j && len <= UUxlen[UUxlat['h']])) return 0; /* bad length */ while(len--) { if(*s < 0 || XXxlat[ACAST(*s++)] < 0) { return 0; /* bad code character */ } } return XX_ENCODED; /* data is valid */}/* * This function may be called upon a line that does not look like * valid encoding on first sight, but might be erroneously encoded * data from Netscape, Lynx or MS Exchange. We might need to read * a new line from the stream, which is why we need the FILE. * Returns the type of encoded data if successful or 0 otherwise. */intUURepairData (FILE *datei, char *line, int encoding, int *bhflag){ int nflag, vflag=0, safety=42; char *ptr; nflag = UUBrokenByNetscape (line); while (vflag == 0 && nflag && safety--) { if (nflag == 1) { /* need next line to repair */ if (strlen (line) > 250) break; ptr = line + strlen (line); while (ptr>line && (*(ptr-1)=='\015' || *(ptr-1)=='\012')) ptr--; if (_FP_fgets (ptr, 255-(ptr-line), datei) == NULL) break; } else { /* don't need next line to repair */ } if (UUNetscapeCollapse (line)) { if ((vflag = UUValidData (line, encoding, bhflag)) == 0) nflag = UUBrokenByNetscape (line); } else nflag = 0; } /* * Sometimes, a line is garbled even without it being split into
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?