标题: 多数据集之间的比较 [打印本页] 作者: shiyiming 时间: 2003-9-15 18:57 标题: 多数据集之间的比较 /**********************************************************************************
*--------------------------------------------------------------------------------*
| SOURCE: compare.sas |
| |
| VERSION: RDS v3.0 |
| Part of the SPIKEware Rapid Decision Support SAS Macro Package |
| for SAS v8.x and above |
| |
| PURPOSE: This macro is used to compare multiple datasets and check for any |
| similar variable names. It also does a comparison of the number of |
| observations and prints a list of discrepencies of variable names |
| and variable types. |
| |
| SYNTAX: %compare (datalist = _SAS_Dataset_names_) |
| |
| Notes: |
| |
+--------------------------------------------------------------------------------+
************************************************************************************/
%macro compare (datalist=) /des='compares multiple datasets' ;
%let notes=%sysfunc(getoption(notes, keyword)) ;
options nonotes ;
%local datalist i merge numdata data obslst ndata mean min max notes ;
%let starttime = %sysfunc(datetime()) ;
%*----------------------------------*
| calculate variables and defaults |
*----------------------------------* ;
%*-------------------------------------------------*
| print error message if fewer than two datasets, |
| greater than 9,999,999 datasets, or complete |
| individual checks if criteria is met. |
*-------------------------------------------------* ;
%if &numdata < 2 %then %do ;
data _null_ ;
put 'ERROR: Utility macro %COMPARE requires two or more input datasets.' ;
put 'ERROR: The macro will stop executing.' ;
run ;
%goto the_end ;
%end ;
%else %if &numdata > 9999999 %then %do ;
data _null_ ;
put 'ERROR: Utility macro %COMPARE allows a maximum of 9,999,999 datasets.' ;
put 'ERROR: The macro will stop executing.' ;
run ;
%goto the_end ;
%end ;
%else %do i = 1 %to &numdata ;
%*--------------------------------------------------------*
| the %do loop uses the %scan function to parse out each |
| individual dataset name from the variable &datalist. |
*--------------------------------------------------------* ;
%let data = %data(%scan(&datalist, &i, %str( ))) ;
%*-------------------------------------------------------------*
| proc contents outputs variables to c&i for check with other |
| datasets from input string. note: cannot check more than |
| 9,999,999 datasets (just in case someone actually tries!). |
*-------------------------------------------------------------* ;
proc contents data=&data noprint
out=c&i(keep = name format label length nobs
type
rename = (nobs=nobs&i format=f&i type=t&i
length=l&i)) ;
run ;
%*------------------------------*
| create list of contents data |
*------------------------------* ;
%let merge = &merge c&i (in=c&i) ;
%*---------------------------------------------------*
| obslist is the list of variables of the number of |
| observations. later in the program, the mean, |
| min, and max observations will be calculated. |
| the making of this list is broken into two parts: |
| the first ends in a comma, and the last one does |
| not. this allows the use of the min, mean, and |
| max functions later. |
*---------------------------------------------------* ;
%*-----------------------------------------------------------*
| this datastep completes the comparison process by merging |
| the data together. |
*-----------------------------------------------------------* ;
data all (drop=nobs1-nobs&numdata min_obs max_obs mean_obs
mtch err_lnt err_typ err_fmt ehold) ;
merge &merge ;
by name ;
length count $&numdata.. ;
label length = 'length'
format = 'format'
name = 'name'
match = 'variable match'
label = 'description'
count = '1 = yes*0 = no*<-- scroll check -->' ;
format min_obs max_obs mean_obs comma16. ;
array there {&numdata} c1-c&numdata ;
array l {&numdata} l1-l&numdata ;
array t {&numdata} t1-t&numdata ;
array f {&numdata} f1-f&numdata ;
match = 'good ' ;
do i = 1 to &numdata ;
ehold = 'error = ' ;
substr(count,i,1) = put(there{i},1.) ;
if substr(count,i,1) = 0 then match = ehold||'m' ;
else do ;
mtch = ' ' ;
if l1 ne l{&numdata} then do ;
mtch = 'e' ;
err_lnt = 'l' ;
end ;
if t1 ne t{&numdata} then do ;
mtch = 'e' ;
err_typ = 't' ;
end ;
if f1 ne f{&numdata} then do ;
mtch = 'e' ;
err_fmt = 'f' ;
end ;
if mtch = 'e' then match = ehold||err_lnt||err_typ
||err_fmt ;
end ;
end ;
length = l1 ;
format = l1 ;
if t1 = 1 then type = 'num ' ;
else if t1 = 2 then type = 'char ' ;
else type = 'other' ;
if _n_ = 1 then do ;
min_obs = min(&obslst);
max_obs = max(&obslst);
mean_obs = round(mean(&obslst), .1) ;
call symput ("min", min_obs) ;
call symput ("max", max_obs) ;
call symput ("mean", mean_obs) ;
end ;
run ;
%let min = &min ;
%let max = &max ;
%let mean = &mean ;
title1 "comparison of sas datasets" ;
title2 "&datalist" ;
title4 "min obs = &min mean obs = &mean max obs = &max" ;
proc print data=all label split='*' ;
var name count label length format type match ;
run ;
title4 "correct match of data?" ;
proc freq data=all ;
tables match /missing ;
run ;
%*-----------------------------------*
| prepare sas session for open code |
*-----------------------------------* ;
proc datasets nolist memtype=data library=work ;
delete all %string(prefix=c, from=1, to=&numdata) ;
run ;
quit ;