📄 sqljmac.sas

📁 SAS是功能強大的統計軟體
💻 SAS
字号:
 /****************************************************************/ /*          S A S   S A M P L E   L I B R A R Y                 */ /*                                                              */ /*    NAME: SQLJMAC                                             */ /*   TITLE: improves SQL join performance with macros           */ /* PRODUCT: BASE                                                */ /*  SYSTEM: ALL                                                 */ /*    KEYS: SQL DATMAN MACRO %UPCASE %SCAN %EVAL SELECT WHERE   */ /*          ALIAS                                               */ /*   PROCS: SQL                                                 */ /*    DATA:                                                     */ /*                                                              */ /* SUPPORT: pmk                         UPDATE:                 */ /*     REF:                                                     */ /*    MISC:                                                     */ /*                                                              */ /****************************************************************/ title1 'SAS SAMPLE LIBRARY, MEMBER(SQLJMAC)'; /*  *  * Users have noticed that joining a small SAS dataset with a Large  * DBMS table using PROC SQL can take a very long time, even if the  * "joining variables" are indexed in the DBMS.  *  * The reson for this is that DBMS Software does not make the  * indexes visible to applications software thru the SQL interface,  * so the PROC SQL query optimiser has no choice but to make a  * complete pass over all the DBMS rows to see if they match with  * the rows in the smaller SAS dataset. (This problem does not  * arise when the larger dataset is a SAS dataset with indexes  * as SAS is able to perform direct access operations on only  * those rows that it needs from the larger table)  *  * This performace gap can be remedied by constructing a where  * clause that requests only the needed rows from the larger file.  * PROC SQL will send any parts of a where clause that relate  * to a single table only off to the engine that is responsible  * for that table -- this gives the DBMS a chance to put the index  * to work.  *  * If the number of unique keys is small relative to the size  * of the DBMS table, this approach may give you very good  * performance gains.  *  * This technique of calculating the keys required for one  * side of the join, and then restricting the rows retrieved  * from the other side is a relatively new optimisation made  * by DBMS Software. (DB2 2.3 is the first db2 release that  * implements this scheme). A future Version of PROC SQL may  * perform this kind of optimisation transparently.  *  *  * If the number of unique keys in the small dataset gets large  * it may prove inefficient to create macro variables for all the  * values. The datastep in the prejoin macro could be modified  * to write the needed where clause to a temporary file..  * The drawback to this approach is that the other parts of the  * query must be written out to temporary files too, and then  * all the parts included together like this:  *  *      %inc(pre where post);  *  *  * If you know something about the range of keys in the smaller  * file, it may be more efficient to construct a BETWEEN x AND y  * to restrict the rows accessed from the larger table. This  * generates a smaller where clause and requires fewer macro variables,  * but is not as general in its application... it would be useful if  * (for example) you knew that the smaller dataset contained dates  * from just one month in a history file.  *  */ /*  *  * The prejoin macro constructs the list of unique key values  * that are needed to build a where clause to apply to the  * larger dataset. The pfx argument allows you to choose the  * "root" of the macro names created by the macro...  *  * Assuming the default prefix "TJ"  *  * TJKN            is the number of key variables.  * TJNN            is the number of unique ocurrences of the key.  * TJK1  .. TJKn   are the key variable names in the smaller table.  * TJT1  .. TJTn   are the datatypes of those key variables.  * TJ1X1 .. TJnXm  are the values of the keys.  *  */%macro prejoin( ds=         /*-- the smaller dataset name        --*/              , key=        /*-- the names of the join variables --*/              , keytype=    /*-- the types of the keys (C or N)  --*/              , pfx=TJ      /*-- work datasets and macros prefix --*/              );  /*---------------------------------------------------------------*/  /*-- load up the individual keynames into their own macro      --*/  /*-- variables.                                                --*/  /*---------------------------------------------------------------*/  %local i j var;  %let i    = 1;  %let var  = %scan(&key, &i);  %do %while ( &var ne );     %global &pfx.K&i;     %let    &pfx.K&i = &var;     %global &pfx.T&i;     %let    &pfx.T&i = %upcase(%scan(&keytype, &i));     %let    i        = %eval(&i+1);     %let    var      = %scan(&key, &i);     %end;  %global &pfx.KN &pfx.NN;  %let    &pfx.KN = %eval(&i - 1);  /*---------------------------------------------------------------*/  /*-- create a table with the unique values of the key from     --*/  /*-- the smaller dataset. these values will be used to build   --*/  /*-- a where clause to apply to the larger dataset.            --*/  /*---------------------------------------------------------------*/  proc sql;    create table work.&pfx as    select distinct           %do i = 1 %to &&&pfx.KN;              %if (&i > 1) %then %str(,);              &&&pfx.K&i              %end;      from &ds;  %global &pfx.NN;  %let    &pfx.NN = &sqlobs;  %put NOTE: There are &sqlobs unique keys in the smaller table.;    quit;  %do i = 1 %to &&&pfx.NN;      %do j = 1 %to &&&pfx.KN;          %global &pfx.&j.X&i;          %end;      %end;  /*---------------------------------------------------------------*/  /*-- place these unique values into macro variables, so that   --*/  /*-- we can use them later to build a where clause to apply to --*/  /*-- the larger table.                                         --*/  /*--                                                           --*/  /*-- the quote() function is new with sas6.07, and takes care  --*/  /*-- of the messy details of character strings with embedded   --*/  /*-- quotes.. If you know your keys dont have quotes, then     --*/  /*-- you could omit the quote() call.                          --*/  /*---------------------------------------------------------------*/  data _null_;    set work.&pfx;    length cn $6 mvar $8;    cn = 'X' || left(put(_n_, 5.));    %do i = 1 %to &&&pfx.KN;       mvar = "&pfx.&i" || cn;       * put mvar= &&&pfx.K&i=;       %if ( N = &&&pfx.T&i ) %then %do;          call symput( mvar, put(&&&pfx.K&i, best12.) );          %end;       %else %do;          call symput( mvar, quote(&&&pfx.K&i) );        * call symput( mvar,       &&&pfx.K&i  );  /* if no quotes */          %end;       %end;    run;%mend; /*  *  * The keyjoin macro constructs the where clause from the  * values saved away by the prejoin macro.  *  */%macro keyjoin( alias=      /*-- the alias of the big dataset    --*/              , key=        /*-- the names of the join variables --*/              , pfx=TJ      /*-- work datasets and macros prefix --*/              );  %local i j;  /*---------------------------------------------------------------*/  /*-- if they supply a key value, then the "names" of the keys  --*/  /*-- are not the same in the BIG dataset. so remember these    --*/  /*-- new key names.  (They better be in the same order with    --*/  /*-- repect to the order of the names given to prejoin.)       --*/  /*---------------------------------------------------------------*/  %if ( %str(X&key) ne X ) %then %do;     %let i    = 1;     %let var  = %scan(&key, &i);     %do %while ( &var ne );        %local  &pfx.K&i;        %let    &pfx.K&i = &var;        %let    i        = %eval(&i+1);        %let    var      = %scan(&key, &i);        %end;     %end;  /*---------------------------------------------------------------*/  /*-- go emit the key variables and their values that were      --*/  /*-- found in the "smaller" dataset.                           --*/  /*---------------------------------------------------------------*/(  %do i = 1 %to &&&pfx.NN;      %if (&i > 1) %then %str(OR); (      %do j = 1 %to &&&pfx.KN;          %if (&j > 1) %then %str(AND); &alias..&&&pfx.K&j = &&&pfx.&j.X&i          %end; )      %end;)  %mend;  /*---------------------------------------------------------------*/  /*-- lets make some data to play with.                         --*/  /*--                                                           --*/  /*-- doing this exercise when joining SAS datasets helps,      --*/  /*--   as it sometimes makes the size of the data we must      --*/  /*--   sort smaller.  but in 607 we often do joins without     --*/  /*--   any sorting, so you should test it out on YOUR data.    --*/  /*--                                                           --*/  /*--                                                           --*/  /*-- doing this exercise when joining SAS datasets with        --*/  /*--   datasets that come form an external source via          --*/  /*--   SAS/ACCESS software helps, as we dont have to           --*/  /*--   transfer as many records for the DBMS into SAS to       --*/  /*--   begin with, let alone sort them all..                   --*/  /*--                                                           --*/  /*---------------------------------------------------------------*/data big;  length k $20;  do i = 1 to 100;    do j = 1 to 5;      k = trim(put(i, words13.)) || '*' || put(j, words6.);      output;      end;    end;  run;data small;  length kk $20;  do i = 7 to 270 by 4;     j  = 3;     kk = trim(put(i, words13.)) || '*' || put(j, words6.);     output;     end;  run;  /*---------------------------------------------------------------*/  /*-- this example joins the variables on one numeric key -- I  --*/  /*--                                                           --*/  /*-- the %prejoin step computes the unique key values of I     --*/  /*--     in the "small" dataset.                               --*/  /*--                                                           --*/  /*-- then the %keyjoin macro emits a where expression that     --*/  /*--     selects only those values computed by %prejoin        --*/  /*--     from the big table.                                   --*/  /*--                                                           --*/  /*---------------------------------------------------------------*/%prejoin(ds=small, key=i, keytype=n);proc sql feedback;  select *    from big,small   where big.i=small.i     and big.j=2     and %keyjoin(alias=big);  /*---------------------------------------------------------------*/  /*-- this example joins the variables on two keys -- I and J   --*/  /*---------------------------------------------------------------*/%prejoin(ds=small, key=i j, keytype=n n);proc sql feedback;  select *    from big,small   where big.i=small.i and big.j=small.j     and %keyjoin(alias=big)       ;  /*---------------------------------------------------------------*/  /*-- join on one character key                                 --*/  /*-- just to make it interesting, its called K in one dataset  --*/  /*-- (the bigger one) and "KK" in the smaller one.             --*/  /*---------------------------------------------------------------*/%prejoin(ds=small, key=kk, keytype=c);proc sql feedback;  select *    from big,small   where big.k=small.kk     and %keyjoin(alias=big,key=k)       ;quit;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -