uunconc.c

来自「UUDeview是一个编码解码器」· C语言 代码 · 共 1,686 行 · 第 1/3 页

C
1,686
字号
/* * This file is part of uudeview, the simple and friendly multi-part multi- * file uudecoder  program  (c) 1994-2001 by Frank Pilhofer. The author may * be contacted at fp@fpx.de * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. *//* * These are the functions that are responsible for decoding. The * original idea is from a freeware utility called "uunconc", and * few lines of this code may still bear a remote resemblance to * its code. If you are the author or know him, contact me. * This program could only decode one multi-part, uuencoded file * where the parts were in order. Base64, XX and BinHex decoding, * support for multi-files and part-ordering covered by myself. **/#ifdef HAVE_CONFIG_H#include "config.h"#endif#ifdef SYSTEM_WINDLL#include <windows.h>#endif#ifdef SYSTEM_OS2#include <os2.h>#endif#include <stdio.h>#include <ctype.h>#ifdef STDC_HEADERS#include <stdlib.h>#include <string.h>#endif#ifdef HAVE_UNISTD_H#include <unistd.h>#endif#ifdef HAVE_ERRNO_H#include <errno.h>#endif#include <crc32.h>#include <uudeview.h>#include <uuint.h>#include <fptools.h>#include <uustring.h>char * uunconc_id = "$Id: uunconc.c,v 1.38 2004/03/01 22:52:27 fp Exp $";/* for braindead systems */#ifndef SEEK_SET#ifdef L_BEGIN#define SEEK_SET L_BEGIN#else#define SEEK_SET 0#endif#endif/* * decoder states */#define BEGIN     (1)#define DATA      (2)#define END       (3)#define DONE      (4)/* * mallocable areas */char *uunconc_UUxlat;char *uunconc_UUxlen;char *uunconc_B64xlat;char *uunconc_XXxlat;char *uunconc_BHxlat;char *uunconc_save;/* * decoding translation tables and line length table */static int      * UUxlen;	/* initialized in UUInitConc()    */static int      * UUxlat;	/* from the malloc'ed areas above */static int      * B64xlat;static int      * XXxlat;static int     	* BHxlat;/* * buffer for decoding */static char *save[3];/* * mallocable areas */char *uuncdl_fulline;char *uuncdp_oline;/* * Return information for QuickDecode */static int uulboundary;/* * To prevent warnings when using a char as index into an array */#define ACAST(s)	((int)(unsigned char)(s))/* * Initialize decoding tables */voidUUInitConc (void){  int i, j;  /*   * Update pointers   */  UUxlen  = (int *) uunconc_UUxlen;  UUxlat  = (int *) uunconc_UUxlat;  B64xlat = (int *) uunconc_B64xlat;  XXxlat  = (int *) uunconc_XXxlat;  BHxlat  = (int *) uunconc_BHxlat;  save[0] = uunconc_save;  save[1] = uunconc_save + 256;  save[2] = uunconc_save + 512;  /* prepare decoding translation table */  for(i = 0; i < 256; i++)    UUxlat[i] = B64xlat[i] = XXxlat[i] = BHxlat[i] = -1;  /*   * At some time I received a file which used lowercase characters for   * uuencoding. This shouldn't be, but let's accept it. Must take special   * care that this doesn't break xxdecoding. This is giving me quite a   * headache. If this one file hadn't been a Pocahontas picture, I might   * have ignored it for good.   */  for (i = ' ', j = 0; i < ' ' + 64; i++, j++)    UUxlat[i] /* = UUxlat[i+64] */ = j;  for (i = '`', j = 0; i < '`' + 32; i++, j++)    UUxlat[i] = j;  /* add special cases */  UUxlat['`'] = UUxlat[' '];  UUxlat['~'] = UUxlat['^'];  /* prepare line length table */  UUxlen[0] = 1;  for(i = 1, j = 5; i <= 61; i += 3, j += 4)    UUxlen[i] = UUxlen[i+1] = UUxlen[i+2] = j;  /* prepare other tables */  for (i=0; i<64; i++) {    B64xlat[ACAST(B64EncodeTable[i])] = i;    XXxlat [ACAST(XXEncodeTable [i])] = i;    BHxlat [ACAST(BHEncodeTable [i])] = i;  }}/* * Workaround for Netscape *//* * Determines whether Netscape may have broken up a data line (by * inserting a newline). This only seems to happen after <a in a * href statement */intUUBrokenByNetscape (char *string){  char *ptr;  int len;  if (string==NULL || (len=strlen(string))<3)    return 0;  if ((ptr = _FP_stristr (string, "<a href=")) != NULL) {    if (_FP_stristr (string, "</a>") > ptr)      return 2;  }  ptr = string + len;  while (len && (*(ptr-1)=='\015' || *(ptr-1)=='\012')) {    ptr--; len--;  }  if (len<3)         return 0;  if (*--ptr == ' ') ptr--;  ptr--;  if (_FP_strnicmp (ptr, "<a", 2) == 0)    return 1;  return 0;}/* * Try to repair a Netscape-corrupted line of data. * This must only be called on corrupted lines, since non-Netscape * data may even _get_ corrupted by this procedure. *  * Some checks are included multiply to speed up the procedure. For * example: (*p1!='<' || strnicmp(p1,"</a>",4)). If the first expression * becomes true, the costly function isn't called :-) * * Since '<', '>', '&' might even be replaced by their html equivalents * in href strings, I'm now using two passes, the first one for &amp; + co, * the second one for hrefs. */intUUNetscapeCollapse (char *string){  char *p1=string, *p2=string;  int res = 0;  if (string==NULL)    return 0;  /*   * First pass   */  while (*p1) {    if (*p1 == '&') {      if      (_FP_strnicmp (p1, "&amp;", 5) == 0) { p1+=5; *p2++='&'; res=1; }      else if (_FP_strnicmp (p1, "&lt;",  4) == 0) { p1+=4; *p2++='<'; res=1; }      else if (_FP_strnicmp (p1, "&gt;",  4) == 0) { p1+=4; *p2++='>'; res=1; }      else *p2++ = *p1++;    }    else *p2++ = *p1++;  }  *p2 = '\0';  /*   * Second pass   */  p1 = p2 = string;  while (*p1) {    if (*p1 == '<') {      if ((_FP_strnicmp (p1, "<ahref=", 7) == 0 ||	   _FP_strnicmp (p1, "<a href=",8) == 0) && 	  (_FP_strstr (p1, "</a>") != 0 || _FP_strstr (p1, "</A>") != 0)) {	while (*p1 && *p1!='>')        p1++;	if (*p1=='\0' || *(p1+1)!='<') return 0;	p1++;	while (*p1 && (*p1!='<' || _FP_strnicmp(p1,"</a>",4)!=0)) {	  *p2++ = *p1++;	}	if (_FP_strnicmp(p1,"</a>",4) != 0)	  return 0;	p1+=4;	res=1;      }      else	*p2++ = *p1++;    }    else      *p2++ = *p1++;  }  *p2 = '\0';  return res;}/* * The second parameter is 0 if we are still searching for encoded data, * otherwise it indicates the encoding we're using right now. If we're * still in the searching stage, we must be a little more strict in * deciding for or against encoding; there's too much plain text looking * like encoded data :-( */intUUValidData (char *ptr, int encoding, int *bhflag){  int i=0, j, len=0, suspicious=0, flag=0;  char *s = ptr;  if ((s == NULL) || (*s == '\0')) {    return 0;              /* bad string */  }  while (*s && *s!='\012' && *s!='\015') {    s++;    len++;    i++;  }  if (i == 0)    return 0;  switch (encoding) {  case UU_ENCODED:    goto _t_UU;  case XX_ENCODED:    goto _t_XX;  case B64ENCODED:    goto _t_B64;  case BH_ENCODED:    goto _t_Binhex;  case YENC_ENCODED:    return YENC_ENCODED;  } _t_Binhex:                 /* Binhex Test */  len = i; s = ptr;  /*   * bhflag notes the state we're in. Within the data, it's 1. If we're   * still looking for the initial :, it's 0   */  if (*bhflag == 0 && *s != ':') {    if (encoding==BH_ENCODED) return 0;    goto _t_B64;  }  else if (*bhflag == 0 /* *s == ':' */) {    s++; len--;  }  while (len && BHxlat[ACAST(*s)] != -1) {    len--; s++;  }  /* allow space characters at the end of the line if we are sure */  /* that this is Binhex encoded data or the line was long enough */  flag = (*s == ':') ? 0 : 1;  if (*s == ':' && len>0) {    s++; len--;  }  if (((i>=60 && len<=10) || encoding) && *s==' ') {    while (len && *s==' ') {      s++; len--;    }  }  /*   * BinHex data shall have exactly 64 characters (except the last   * line). We ignore everything with less than 40 characters to   * be flexible   */  if (len != 0 || (flag && i < 40)) {    if (encoding==BH_ENCODED) return 0;    goto _t_B64;  }  *bhflag = flag;  return BH_ENCODED; _t_B64:                    /* Base64 Test */  len = i; s = ptr;  /*   * Face it: there _are_ Base64 lines that are not a multiple of four   * in length :-(   *   * if (len%4)   *   goto _t_UU;   */  while (len--) {    if (*s < 0 || (B64xlat[ACAST(*s)] == -1 && *s != '=')) {      /* allow space characters at the end of the line if we are sure */      /* that this is Base64 encoded data or the line was long enough */      if (((i>=60 && len<=10) || encoding) && *s++==' ') {	while (*s==' ' && len) s++;	if (len==0) return B64ENCODED;      }      if (encoding==B64ENCODED) return 0;      goto _t_UU;    }    else if (*s == '=') {   /* special case at end */      /* if we know this is B64encoded, allow spaces at end of line */      s++;      if (*s=='=' && len>=1) {	len--; s++;      }      if (encoding && len && *s==' ') {	while (len && *s==' ') {	  s++; len--;	}      }      if (len != 0) {	if (encoding==B64ENCODED) return 0;	goto _t_UU;      }      return B64ENCODED;    }    s++;  }  return B64ENCODED; _t_UU:  len = i; s = ptr;  if (UUxlat[ACAST(*s)] == -1) {    /* uutest */    if (encoding==UU_ENCODED) return 0;    goto _t_XX;  }  j = UUxlen[UUxlat[ACAST(*s)]];  if (len-1 == j)	    /* remove trailing character */    len--;  if (len != j) {    switch (UUxlat[ACAST(*s)]%3) {    case 1:      if (j-2 == len) j-=2;      break;    case 2:      if (j-1 == len) j-=1;      break;    }  }  /*   * some encoders are broken with respect to encoding the last line of   * a file and produce extraoneous characters beyond the expected EOL   * So were not too picky here about the last line, as long as it's longer   * than necessary and shorter than the maximum   * this tolerance broke the xxdecoding, because xxencoded data was   * detected as being uuencoded :( so don't accept 'h' as first character   * also, if the first character is lowercase, don't accept the line to   * have space characters. the only encoder I've heard of which uses   * lowercase characters at least accepts the special case of encoding   * 0 as `. The strchr() shouldn't be too expensive here as it's only   * evaluated if the first character is lowercase, which really shouldn't   * be in uuencoded text.   */  if (len != j &&      ((ptr[0] == '-' && ptr[1] == '-' && strstr(ptr,"part")!=NULL) ||       !(*ptr != 'M' && *ptr != 'h' &&	 len > j && len <= UUxlen[UUxlat['M']]))) {    if (encoding==UU_ENCODED) return 0;    goto _t_XX;             /* bad length */  }  if (len != j || islower (*ptr)) {    /*     * if we are not in a 'uuencoded' state, don't allow the line to have     * space characters at all. if we know we _are_ decoding uuencoded     * data, the rest of the line, beyond the length of encoded data, may     * have spaces.     */    if (encoding != UU_ENCODED)      if (strchr (ptr, ' ') != NULL)	goto _t_XX;/*  suspicious = 1;    we're careful here REMOVED 0.4.15 __FP__ */    len        = j;  }  while (len--) {    if (*s < 0 || UUxlat[ACAST(*s++)] < 0) {      if (encoding==UU_ENCODED) return 0;      goto _t_XX;           /* bad code character */    }    if (*s == ' ' && suspicious) {      if (encoding==UU_ENCODED) return 0;      goto _t_XX;           /* this line looks _too_ suspicious */    }  }  return UU_ENCODED;        /* data is valid */ _t_XX:                     /* XX Test */  len = i; s = ptr;  if (XXxlat[ACAST(*s)] == -1)    return 0;  j = UUxlen[XXxlat[ACAST(*s)]];   /* Same line length table as UUencoding */  if (len-1 == j)	    /* remove trailing character */    len--;  if (len != j)    switch (UUxlat[ACAST(*s)]%3) {    case 1:      if (j-2 == len) j-=2;      break;    case 2:      if (j-1 == len) j-=1;      break;    }  /*   * some encoders are broken with respect to encoding the last line of   * a file and produce extraoneous characters beyond the expected EOL   * So were not too picky here about the last line, as long as it's longer   * than necessary and shorter than the maximum   */  if (len != j && !(*ptr != 'h' && len > j && len <= UUxlen[UUxlat['h']]))    return 0;               /* bad length */  while(len--) {    if(*s < 0 || XXxlat[ACAST(*s++)] < 0) {      return 0;             /* bad code character */    }  }  return XX_ENCODED;        /* data is valid */}/* * This function may be called upon a line that does not look like * valid encoding on first sight, but might be erroneously encoded * data from Netscape, Lynx or MS Exchange. We might need to read * a new line from the stream, which is why we need the FILE. * Returns the type of encoded data if successful or 0 otherwise. */intUURepairData (FILE *datei, char *line, int encoding, int *bhflag){  int nflag, vflag=0, safety=42;  char *ptr;  nflag = UUBrokenByNetscape (line);  while (vflag == 0 && nflag && safety--) {    if (nflag == 1) {		/* need next line to repair */      if (strlen (line) > 250)	break;      ptr = line + strlen (line);      while (ptr>line && (*(ptr-1)=='\015' || *(ptr-1)=='\012'))	ptr--;      if (_FP_fgets (ptr, 255-(ptr-line), datei) == NULL)	break;    }    else {			/* don't need next line to repair */    }    if (UUNetscapeCollapse (line)) {      if ((vflag = UUValidData (line, encoding, bhflag)) == 0)	nflag = UUBrokenByNetscape (line);    }    else      nflag = 0;  }  /*   * Sometimes, a line is garbled even without it being split into

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?