📄 afcopy.cpp
字号:
/* * afcopy.cpp: * * Copy one AFF file to another. * Resulting file is re-ordered and possibly re-compressed. *//* * Copyright (c) 2006 * Simson L. Garfinkel * All rights reserved. * * This code is derrived from software contributed by * Simson L. Garfinkel * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. [Omitted] * 4. Neither the name of Simson Garfinkel, Basis Technology, or other * contributors to this program may be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY SIMSON GARFINKEL AND CONTRIBUTORS ``AS * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SIMSON * GARFINKEL, BAIS TECHNOLOGy, OR CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */#include "config.h"#include "afflib.h"#include "afflib_i.h"#include "quads.h"#include "utils.h"#include <sys/signal.h>#ifdef HAVE_TIME_H#include <time.h>#endif#ifdef HAVE_SYS_TIME_H#include <sys/time.h>#endif#include <ctype.h>#include <zlib.h>#include <openssl/md5.h>#include <openssl/sha.h>#include <assert.h>#ifdef HAVE_UNISTD_H#include <unistd.h>#endif#ifdef HAVE_TERM_H#include <term.h>#endif#ifdef HAVE_NCURSES_TERM_H#include <ncurses/term.h>#endif#ifdef WIN32#include "unix4win32.h"#include <malloc.h>#endifchar *progname = "afcopy";int opt_verbose = 0;int opt_debug = 0;int opt_x = 0;int opt_y = 0;int opt_preen = 0;int opt_zap =0;int opt_missing = 0;void usage(){ printf("%s version %s\n",progname,PACKAGE_VERSION); printf("usage: %s [options] file1 file\n",progname); printf(" Copies file1 to file2\n"); printf(" %s [options] file1 file2 file3 ... dir\n",progname); printf(" Copies file1.. into dir\n"); printf(" %s [options] file1 file2 file3 ... dir1 dir2...\n",progname); printf(" Copies file1.. into dirs1, dir2, ...\n"); printf("\n"); printf("By default, all page MACs are verified on read and all segments\n"); printf("are verified after write.\n"); printf("Options:\n"); printf(" -v = verbose: print each file as it is copied\n"); printf(" -vv = very verbose: print each segment as it is copied\n"); printf(" -d = print debugging information as well\n"); printf(" -c = also verify decompression of each page\n"); printf(" -x = don't verify hashes on reads\n"); printf(" -y = don't verify writes\n"); printf(" -p = preen; recompress all pages with LZMA or NULL\n"); printf("\n"); printf(" -h = help; print this message.\n"); printf(" -V = print the program version and exit.\n"); printf(" -z = zap; copy even if the destination exists.\n"); printf(" -m = just copy the missing segments\n"); printf("\n"); printf("Examples:\n");#ifdef USE_S3 printf(" %s -vpy *.aff s3:/// Copy all files in current\n",progname); printf(" directory to S3 default bucket\n");#endif exit(1);}const char *current_source = 0;const char *current_dest = 0;const char *current_seg = 0;void sig_info(int arg){ if(current_source){ printf("Copying %s ",current_source); if(current_dest){ printf("--> %s",current_dest); if(current_seg) printf(" (%s) ",current_seg); } } printf("\n");}/* Copy pagenumber from ain to aout. Return 0 if success, -1 if can't do it. */int preen(AFFILE *ain,AFFILE *aout,int64 pagenum,unsigned long arg){ int alg = (arg & AF_PAGE_COMP_ALG_MASK); //int max = (alg & AF_PAGE_COMP_MAX); if(alg==AF_PAGE_COMP_ALG_ZERO) return -1; // can't preen ZERO if(alg==AF_PAGE_COMP_ALG_LZMA) return -1; // don't re-compress LZMA size_t pagesize = af_page_size(ain); if(pagesize<=0) return -1; // couldn't get pagesize unsigned char *pagebuf = (unsigned char *)malloc(pagesize); if(!pagebuf) return -1; // couldn't allocate memory for page? if(af_get_page(ain,pagenum,pagebuf,&pagesize)){ free(pagebuf); return -1; } /* Got the page; now write it out with max compression */ af_enable_compression(aout,AF_COMPRESSION_ALG_LZMA,AF_COMPRESSION_MAX); /* Write out the page */ if(af_update_page(aout,pagenum,pagebuf,pagesize)){ free(pagebuf); return -1; } free(pagebuf); return 0;}void unlink_outfiles(vector<string> outfiles){ for(vector<string>::const_iterator o = outfiles.begin(); o != outfiles.end(); o++){ printf("Unlinking %s\n",o->c_str()); unlink(o->c_str()); }}#if !defined( __BSD_VISIBLE) && !defined(isnumber)#define isnumber(x) isdigit(x)#endifint afcopy(char *infile,vector<string> &outfiles){#ifdef SIGINFO signal(SIGINFO,sig_info);#endif hashMapT hashMap; /* Open the input file */ AFFILE *ain = af_open(infile,O_RDONLY,0); if(!ain) err(1,"%s",infile); seglist segments; get_seglist(ain,&segments); outlist afouts; // vector of output AFFs vector<int64>preened_pages; /* Now, try to open the output files, to see if they exist */ current_source = infile; if(opt_verbose) printf("%s: ",infile); for(vector<string>::const_iterator o = outfiles.begin(); o != outfiles.end(); o++){ const char *fname = o->c_str(); outelement out; out.af = af_open(fname,O_RDWR|O_EXCL,0666); if(out.af){ fprintf(stderr,"%s: file exists... ",fname); if(opt_zap==0 && opt_missing==0){ fprintf(stderr,"Will not overwrite; use -m or -z\n"); af_close(out.af); continue; } if(opt_zap){ fprintf(stderr,"zapping...\n"); af_close(out.af); unlink(fname); out.af = 0; } if(opt_missing){ fprintf(stderr,"Filling in missing segments...\n"); if(af_page_size(ain) != af_page_size(out.af)){ fprintf(stderr,"%s and %s have different page sizes (%d != %d)\n", af_filename(ain), af_filename(out.af), af_page_size(ain), af_page_size(out.af)); af_close(out.af); out.af=0; continue; } } } if(out.af==0){ out.af = af_open(fname,O_RDWR|O_EXCL|O_CREAT,0666); if(!out.af){ warn("%s",fname); continue; } if(af_set_pagesize(out.af,af_page_size(ain))){ errx(1,"%s: cannot set page size to %d\n", af_filename(out.af),af_page_size(ain)); } } if(o != outfiles.begin()) printf("\t "); if(opt_verbose){ printf(" => %s ",fname); if(opt_preen) printf(" (preening) "); printf("\n"); } if(opt_missing) get_seglist(out.af,&out.segs); afouts.push_back(out); } /* IF we couldn't open any output files, return */ if(afouts.size()==0){ af_close(ain); // close the input file return -1; } /* Start the copying */ struct timeval t0,t1; gettimeofday(&t0,0); for(seglist::const_iterator seg = segments.begin(); seg!= segments.end();seg++){ /* For each segment, get the size of the segment */ const char *segname = seg->name.c_str(); current_seg = segname; size_t seglen=0; if(af_get_seg(ain,segname,0,0,&seglen)){ err(1,"Cannot read length of segment '%s' on input file %s", segname,af_filename(ain)); } unsigned char *segbuf = (unsigned char *)malloc(seglen); if(!segbuf){ err(1,"Cannot allocated %d bytes for segment '%s' in %s", (int)seglen,segname,af_filename(ain)); } /* Now get the source segment */ unsigned long arg; if(af_get_seg(ain,segname,&arg,segbuf,&seglen)){ unlink_outfiles(outfiles); err(1,"Cannot read segment '%s' in %s. Deleteing output file", segname,af_filename(ain)); } int64 pagenumber = af_segname_page_number(segname); /* Calculate the MD5 of this segment and remember it in the map */ md5blob md5; MD5(segbuf,seglen,md5.buf); hashMap[segname] = md5; /* Write the segment to each file */ for(outlist::iterator aout = afouts.begin(); aout != afouts.end(); aout++){ current_dest = af_filename(aout->af); if(opt_verbose>1 || opt_debug) printf("\n %s -> %s ...", segname,af_filename(aout->af)); if(pagenumber>=0 && opt_preen){ if(opt_debug) printf(" (PREENED) "); if(preen(ain,aout->af,pagenumber,arg)==0){ preened_pages.push_back(pagenumber); continue; } } for(seglist::const_iterator j = aout->segs.begin(); j != aout->segs.end(); j++){ if(j->name == segname){ printf("%s is already in %s\n",segname,af_filename(aout->af)); goto skip; } } if(af_update_seg(aout->af,segname,arg,segbuf,seglen)){ unlink_outfiles(outfiles); err(1,"Cannot write segment '%s' to %s.", segname,af_filename(aout->af)); } skip:; } free(segbuf); current_dest = 0; if(opt_verbose>1 || opt_debug) putchar('\n'); } current_seg = 0; af_close(ain); gettimeofday(&t1,0); if(afouts.size()==1){ AFFILE *af = afouts.begin()->af; uint64 w = af->bytes_written; double sec = ((t1.tv_sec-t0.tv_sec)+(t1.tv_usec-t0.tv_usec)/1000000.0); printf("%s: %qd bytes transfered in %.2f seconds. xfer rate: %.2f MBytes/sec\n", af_filename(af),w,sec,(w/1000000.0) / sec); } current_seg = "VERIFYING"; /* Now verify all of the hashes */ if(opt_verbose || opt_debug) printf("\n\nFiles copied. Verifying...\n"); for(seglist::const_iterator seg = segments.begin(); seg!= segments.end();seg++){ const char *segname = seg->name.c_str(); for(outlist::iterator aout = afouts.begin(); aout != afouts.end(); aout++){ size_t seglen=0; char b2[1024]; if((aout->af)->v->flag & AF_VNODE_TYPE_RELIABLE){ continue; // no need to verify a reliable write } if(opt_verbose>1 || opt_debug) printf(" verifying %s...\n",segname); again: if(af_get_seg(aout->af,segname,0,0,&seglen)){ if(segname != b2 && segname[0]=='s' && segname[1]=='e' && segname[2]=='g' && isnumber(segname[3])){ /* Looks like a legacy segname name was renamed. * Try the new name */ snprintf(b2,sizeof(b2),"page%s",segname+3); if(opt_verbose) printf(" Couldn't read %s; looking for %s\n", segname,b2); segname = b2; goto again; } unlink_outfiles(outfiles); errx(1,"Cannot read length of segment '%s' in output file %s", segname,af_filename(aout->af)); } int64 pagenumber = af_segname_page_number(segname); if(find(preened_pages.begin(),preened_pages.end(),pagenumber) !=preened_pages.end()){ /* TK: page pagenumber was preened. * It should be check against the original hash */ continue; } unsigned char *segbuf = (unsigned char *)malloc(seglen); if(!segbuf){ err(1,"Cannot allocated %d bytes for segment '%s' in %s", (int)seglen,segname,af_filename(ain)); } unsigned long arg; if(af_get_seg(aout->af,segname,&arg,segbuf,&seglen)){ err(1,"Cannot read segment '%s' in %s", segname,af_filename(aout->af)); } /* Calculate the MD5 of this segment and see if it matches the map. * (But don't do this for preened segments. */ unsigned char md5_read[16]; MD5(segbuf,seglen,md5_read); if(memcmp(hashMap[segname].buf,md5_read,16)!=0){ unlink_outfiles(outfiles); errx(1,"Hash read from %s for segment %s doesn't validate.", af_filename(aout->af),segname); } free(segbuf); // free the buffer } } for(outlist::iterator aout = afouts.begin(); aout != afouts.end(); aout++){ af_close(aout->af); } if(opt_verbose>1 || opt_debug) printf("\n"); current_source = 0; return 0;}int main(int argc,char **argv){ int ch; setvbuf(stdout,0,_IONBF,0); // turn off buffering on stdout while ((ch = getopt(argc, argv, "vdpVxyh?zm")) != -1) { switch (ch) { case 'v': opt_verbose++; break; case 'd': opt_debug++; break; case 'p': opt_preen++;break; case 'x': opt_x++;break; case 'y': opt_y++;break; case 'z': opt_zap++;break; case 'm': opt_missing++;break; case 'h': case '?': default: usage(); break; case 'V': printf("%s version %s\n",progname,PACKAGE_VERSION); exit(0); } } argc -= optind; argv += optind; if(argc<2){ // at this point, we need at least two args usage(); } /* Find any directories */ vector<string> dirlist; for(int i=argc-1;i>0;i--){ struct stat st; // s3 names that do not end with ".aff" are directories const char *last4 = strlen(argv[i])>4 ? argv[i]+strlen(argv[i])-4 : ""; if(strncmp(argv[i],"s3://",5)==0 && strcmp(last4,".aff")!=0){ dirlist.push_back(argv[i]); argc--; continue; } if(stat(argv[i],&st)!=0) break; // out of directories if((st.st_mode & S_IFMT)!=S_IFDIR) break; // found a non-dir dirlist.push_back(argv[i]); argc--; // ignore the last } /* If I found no directories, then there better just be two values */ if(dirlist.size()==0){ if(argc!=2){ fprintf(stderr,"Please specify a directory or just two AFF files.\n\n"); usage(); } /* Must be copying from file1 to file2. Make sure file2 does not exist */ if(access(argv[1],R_OK)==0){ fprintf(stderr,"File exists: %s\n",argv[1]); if(!opt_zap) exit(1); } vector<string> outfiles; outfiles.push_back(argv[1]); return afcopy(argv[0],outfiles); } /* Loop for each file and each directory */ while(argc--){ /* Open the output files */ vector<string> outfiles; for(unsigned int i=0;i<dirlist.size();i++){ string outfilename; const char *name = rindex(*argv,'/'); if(name) name++; else name = *argv; outfilename.append(dirlist[i]); if(outfilename[outfilename.size()-1]!='/') { outfilename.append("/"); } outfilename.append(name); outfiles.push_back(outfilename); } afcopy(argv[0],outfiles); // old outfiles will get GCed argv++; } return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -