sortd.src

来自「没有说明」· SRC 代码 · 共 748 行 · 第 1/2 页

SRC
748
字号
**              If the inputs are null ("" or 0) the procedure
**              will ask for them.
**
**  Globals:    None
*/

proc 0 = mergeby(infile1,infile2,outfile,keytyp);
    local err,f1,f2,fout,name1,name2,b1,b2,invar1,invar2,varnames,ws;
    clear f1,f2,fout,err;
get1:

    if infile1 $== "";
        Print "Name of input file 1: ";;
        infile1 = cons;
        print;
    endif;
get2:

    if infile2 $== "";
        Print "Name of input file 2: ";;
        infile2 = cons;
        print;
    endif;

    open f1 = ^infile1;
    if f1==-1;
        print "Input file 1: " infile1 " not found";
        infile1 = "";
        goto get1;
    endif;
    if iscplxf(f1);
        errorlog "ERROR: Not implemented for complex data sets.";
        end;
    endif;

    open f2 = ^infile2;
    if f2==-1;
        print "Input file 2: " infile2 " not found";
        infile1 = "";
        goto get2;
    endif;
    if iscplxf(f2);
        errorlog "ERROR: Not implemented for complex data sets.";
        end;
    endif;

    invar1 = seqa(1,1,colsf(f1));           /* File 1 variable indices  */
    invar2 = seqa(1,1,colsf(f2));           /* File 2 variable indices  */

    if outfile $== "";
        Print "Name of output file: ";;
        outfile = cons;
        print;
    endif;

    name1 = getname(infile1);
    name2 = getname(infile2);
    name1 = name1[invar1,1];
    name2 = name2[invar2,1];
    if indcv(name1[1,1],name2[1,1]) /= 1;
        errorlog "Key variables must have the same name";
        goto errout;
    endif;
    varnames = name1|name2[2:rows(name2),1];

        /* The assumption being made below is that all the variables from
        :: both input files will be included in the output file and the type
        :: of the data will follow the file with the largest type. The key
        :: variable will be output from only one file.
        */

    if not keytyp;
        print "Key variable type:";
        print "     1   numeric key, ascending order";
        print "     2   character key, ascending order";
        print;
        print "   ? ";;
        keytyp = stof(cons);
    print;
    endif;

    if keytyp==2 and (typef(f1) /= 8 or typef(f2) /= 8);
        errorlog "WARNING - Sort is character type, dataset not double precis"\
            "ion.";
    endif;
    create fout = ^outfile with ^varnames,0,maxc(typef(f1)|typef(f2));
    if fout==-1;
        "Can't open output file.";
        goto errout;
    endif;

    b1 = submat(readr(f1,1),1,invar1);
    b2 = submat(readr(f2,1),1,invar2);

    ws = 1;         /* is set to 1 inside loop if write is successful  */

    /* ------------------ merge loop ------------------ */

    if keytyp==2;
        gosub charkey;
    else;
        gosub numkey;
    endif;

finish:

    if err;
        goto writerr;
    endif;
    call close(f1);
    call close(f2);
    call close(fout);
    retp;

writerr:

    errorlog "Error writing output file";
errout:

    call close(f1);
    call close(f2);
    call close(fout);
    end;

/* ======================================================================== */
/* -------------------------- Subroutines Follow -------------------------- */
/* ======================================================================== */

/* =============================== */
/* ===== Numeric Key Merge ======= */
/* =============================== */

numkey:

    do while ws;    /* do while write successful */
        if b1[1,1] == b2[1,1];
                /* key column from file 2 is dropped here */
            ws = writer(fout,b1~b2[1,2:cols(b2)]);
            if eof(f1) or eof(f2);
                return;
            endif;
            b1 = submat(readr(f1,1),1,invar1);
            b2 = submat(readr(f2,1),1,invar2);
        elseif b1[1,1] > b2[1,1];
            if eof(f2);
                return;
            endif;
            b2 = submat(readr(f2,1),1,invar2);
        else;
            if eof(f1);
                return;
            endif;
            b1 = submat(readr(f1,1),1,invar1);
        endif;
    endo;
    err = 1;
    return;

/* =============================== */
/* ==== Character Key Merge ====== */
/* =============================== */

charkey:

    do while ws;    /* do while write successful */
        if b1[1,1] $== b2[1,1];
                /* NOTE - key column from file 2 is dropped here */
            ws = writer(fout,b1~b2[1,2:cols(b2)]);
            if eof(f1) or eof(f2);
                return;
            endif;
            b1 = submat(readr(f1,1),1,invar1);
            b2 = submat(readr(f2,1),1,invar2);
        elseif b1[1,1] $> b2[1,1];
            if eof(f2);
                return;
            endif;
            b2 = submat(readr(f2,1),1,invar2);
        else;
            if eof(f1);
                return;
            endif;
            b1 = submat(readr(f1,1),1,invar1);
        endif;
    endo;
    err = 1;
    return;
/* ======================= End of Subroutines =========================== */

endp;

/*
**> sortd
**
**  Purpose:    To sort data file on disk with respect to a
**              specified variable.
**
**  Format:     sortd(infile,outfile,keyvar,keytyp);
**
**  Inputs:     infile     string, name of input file.
**
**              outfile    string, name of output file, must be different.
**
**              keyvar     string, name of key variable.
**
**              keytyp     scalar, type of key variable.
**
**                          1   numeric key, ascending order
**                          2   character key, ascending order
**                         -1   numeric key, descending order
**                         -2   character key, descending order
**
**  Remarks:    The data set INFILE will be sorted on the variable
**              KEYVAR, and will be placed in OUTFILE.
**
**              INFILE can have up to 4095 rows, with up to about
**              8100 variables. Putting this file on a ram disk
**              can speed up the program considerably.
**
**              If the inputs are null ("" or 0) the procedure
**              will ask for them.
**
**  Globals:    None
*/

proc 0 = sortd(infile,outfile,keyvar,keytyp);
    local ord,fin,fout,inx,x,srtmat,inrow,off,nr,rs,r,mix,varnames;
    clear fin,fout;

get1:

    if infile $== "";
        Print "Name of input file: ";;
        infile = cons;
        print;
    endif;

    if outfile $== "";
        Print "Name of output file: ";;
        outfile = cons;
        print;
    endif;

    open fin = ^infile;

    if fin==-1;
        print "Can't open " infile;
        infile = "";
        goto get1;
    endif;
#ifUNIX
    if infile $== outfile;
#else
    if upper(infile) $== upper(outfile);
#endif
        errorlog "Names must be different";
        goto errout;
    endif;
    varnames = getname(infile);

    if keyvar $/= "";
        goto havit;
    endif;

    format 8,8;
retry1:

    print "Variables are:";
    print $varnames';
    print "Name of key variable: ";;
    keyvar = cons;
    print;
havit:

    inx = indcv(keyvar,varnames);
    if scalmiss(inx);
        print "Variable " keyvar " not found";
        goto retry1;
    endif;
    if not keytyp;
        print "Sort order:";
        print "     1   numeric key, ascending order";
        print "     2   character key, ascending order";
        print "    -1   numeric key, descending order";
        print "    -2   character key, descending order";
        print;
        print "   ? ";;
        keytyp = stof(cons);
    print;
    endif;
    if keytyp < 0;
        ord = 0;
        keytyp = -keytyp;
    else;
        ord = 1;
    endif;

    if iscplxf( fin );
        create complex fout = ^outfile with ^varnames,0,typef(fin);
    else;
        create fout = ^outfile with ^varnames,0,typef(fin);
    endif;

    if fout==-1;
        errorlog "Can't open output file";
        goto errout;
    endif;

   /* no. of rows to read per iter. change if there's memory constraints */
    nr = 3000/colsf(fin);

    inrow = rowsf(fin);

#ifLIGHT

    if inrow > floor(maxvec/2);
        errorlog "Maximum rows " $+ ftos(floor(maxvec/2),"%lf",1,0) $+
                 ", this file has " $+ ftos(rowsf(fin),"%lf",1,0);
        goto errout;
    endif;

#endif

    x = readr(fin,1);

    srtmat = 1~x[1,inx];
    off = 1;

     /* reading input file and keeping only key variable */
    do until eof(fin);
        x = readr(fin,nr);
        srtmat = srtmat|(seqa(off+1,1,rows(x))~x[.,inx]);
        off = off+rows(x);
    endo;
    mix = seqa(1,1,rows(srtmat))~rndu(rows(srtmat),1);
    mix = sortc(mix,2);

    /* make sure its not already sorted, a quicksort would be real slow */
    srtmat = srtmat[mix[.,1],.];

    clear mix;
    if keytyp==2;
        srtmat = sortcc(srtmat,2);
    else;
        srtmat = sortc(srtmat,2);
    endif;
    if ord == 0;
        srtmat = rev(srtmat);
    endif;
    r = 1;
    rs = 1;
    do while rs and r <= inrow;     /* now we write output using index  */
        call seekr( fin, real(srtmat[r,1]) );
        rs = writer(fout,readr(fin,1));
        r = r+1;
    endo;

    if not rs;
        errorlog "\nDisk Full - Sort Incomplete";
        goto errout;
    endif;

out:

    call close(fin);
    call close(fout);
    retp;
errout:

    call close(fin);
    call close(fout);
    end;
endp;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?