|
|
沙发

楼主 |
发表于 2008-11-26 01:01:53
|
只看该作者
Re: 如何在数据集中生成一个新变量,该变量的值为文件名?
my 2 cents,
(1) you may use a dataset label to identify the table generated by the OUTPUT statement. if you want a new variable, you have to hard code it in a separate step;
(2) the OUTPUT statement may not be able to generate a dataset if some categories of your data are missing. if you add some zero-weighted observations for the missing categories and use the WEIGHT statement with ZEROS option, then there will always be a dataset.
As a computer slave, I do this a lot. The following is an example. It did slightly more than your needs because I transposed the frequency table before merging it horizontally with the p-value table. You sure can make the whole process a lot easier!
[code:3baztzel]
%macro draw_two_( dsn=,
outset=,
xvars=,
yvar=,
xcats=%str(0 1),
ycats=%str(0 1),
stat=%str(CHISQ|CHISQ|_PCHI_ P_PCHI)
);
%local i j k var;
%local N_xvar; %*# of X variables;
%local N_xcat; %*# of X categories;
%local N_ycat; %*# of Y categories;
%local stat_request
stat_out
stat_vars;
%let i=1;
%let var = %scan(&xvars, &i, %str( ));
%do %while(&var ne %str());
%local xvar&i;
%let xvar&i = %upcase(&var);
%let i=%eval(&i+1);
%let var = %scan(&xvars, &i, %str( ));
%end;
%let N_xvar=%eval(&i-1);
%let i=1;
%let var = %scan(&xcats, &i, %str( ));
%do %while(&var ne %str());
%local xcat&i;
%let xcat&i = &var;
%let i=%eval(&i+1);
%let var = %scan(&xcats, &i, %str( ));
%end;
%let N_xcat=%eval(&i-1);
%let i=1;
%let var = %scan(&ycats, &i, %str( ));
%do %while(&var ne %str());
%local ycat&i;
%let ycat&i = &var;
%let i=%eval(&i+1);
%let var = %scan(&ycats, &i, %str( ));
%end;
%let N_ycat=%eval(&i-1);
%let stat_request = %scan(&stat, 1, %str(|));
%let stat_out = %scan(&stat, 2, %str(|));
%let stat_vars = %scan(&stat, 3, %str(|));
/*
generate 6 names for temporary datasets;
*/
%do i=0 %to 5;
data;stop;run;
%let tmp&i = %sysfunc(tranwrd(&syslast, WORK., ));
%end;
PROC CONTENTS DATA=&dsn OUT=&tmp1(KEEP=name type) NOPRINT;
PROC SQL NOPRINT;
SELECT type INTO :ytype
FROM &tmp1
WHERE upcase(name) = upcase("&yvar");
SELECT type INTO :xtype
FROM &tmp1
WHERE upcase(name) = upcase("&xvar1");
QUIT;
%*padding all combinations of &&xvar&i * &yvar w/ weight 0 to the input dataset;
DATA &tmp5;
SET &dsn(KEEP=&xvars &yvar) END=eof;
LENGTH wt 3;
RETAIN wt 1;
OUTPUT;
IF eof THEN DO;
wt = 0;
%do i=1 %to &N_xcat;
%do k=1 %to &N_xvar;
%if &xtype=1 %then %str(&&xvar&k = &&xcat&i;); %else %str(&&xvar&k = "&&xcat&i";);
%end; %*end of k=1 to # of x variables;
%do j=1 %to &N_ycat;
%if &ytype=1 %then %str(&yvar = &&ycat&j;); %else %str(&yvar = "&&ycat&j");
OUTPUT;
%end; %*end of j=1 to # of y categories;
%end; %*end of i=1 to # of x categories;
END;
RUN;
PROC DATASETS NOLIST;
DELETE &tmp0 &tmp1 &tmp2 &tmp3 &tmp4;
RUN;
%do i = 1 %to &N_xvar;
PROC FREQ DATA=&tmp5 NOPRINT;
TABLES &&xvar&i*&yvar / OUT=&tmp1 OUTPCT &stat_request;
%if %length(&stat_out) %then OUTPUT OUT=&tmp2(KEEP=&stat_vars) &stat_out;;
WEIGHT wt/ZEROS;
RUN;
%*transpose the frequency table;
DATA &tmp3(DROP=&yvar count percent pct_row pct_col RENAME=(&&xvar&i=xcat));
LENGTH varname $ 32;
RETAIN varname "&&xvar&i";
DO UNTIL(LAST.&&xvar&i);
SET &tmp1;
BY &&xvar&i;
SELECT (&yvar);
%do j=1 %to &N_ycat;
%if &ytype=1 %then %str(WHEN (&&ycat&j)); %else %str(WHEN ("&&ycat&j"));
DO;
"count_&&ycat&j"n = count;
"percent_&&ycat&j"n = percent/100;
"pct_row_&&ycat&j"n = pct_row/100;
"pct_col_&&ycat&j"n = pct_col/100;
END;
%end;
OTHERWISE;
END;
END;
RUN;
%*merge frequency table with statistics table horizontally;
DATA &tmp4;
%do j=1 %to &N_xcat;
MERGE &tmp3(FIRSTOBS=&j OBS=&j) &tmp2;
OUTPUT;
%end;
RUN;
PROC APPEND BASE=&tmp0 DATA=&tmp4;
RUN;
PROC DATASETS NOLIST;
DELETE &tmp1 &tmp2 &tmp3 &tmp4;
RUN;
%end;
DATA &outset;
SET &tmp0;
RUN;
PROC DATASETS NOLIST;
DELETE &tmp0 &tmp5;
RUN;
%mend draw_two_;
data test;
array vars[*] var1-var9;
array p[9] _temporary_ (.1, .2, .3, .4, .5, .6, .7, .8, .9);
do i=1 to 100;
do j=1 to dim(vars);
vars[j] = ranbin(0,1,p[j]);
end;
y = ranbin(0, 2, .5);
output;
end;
drop i j;
run;
%draw_two_(dsn=test, outset=dummy, xvars=var1 var2 var3 var4 var5 var6 var7 var8 var9,
yvar = y, ycats=0 1 2)
PROC FORMAT;
VALUE pvaluecolor
0.01-<0.05 = 'brown'
0-<0.01 = 'red'
0.05-high = 'white'
other = 'gray'
;
PROC REPORT DATA=dummy NOWD Split=' '
style(column)={background=white font_face='helvetica'}
style(header)={background=khaki font_face=times};
COLUMN varname ('y=0' count_0 pct_col_0)
('y=1' count_1 pct_col_1)
('y=2' count_2 pct_col_2)
p_pchi;
DEFINE varname/display width=8;
DEFINE count_0 / 'n' display format=comma5.;
DEFINE pct_col_0 / '%' display format=percent8.2;
DEFINE count_1 / 'n' display format=comma5.;
DEFINE pct_col_1 / '%' display format=percent8.2;
DEFINE count_2 / 'n' display format=comma5.;
DEFINE pct_col_2 / '%' display format=percent8.2;
DEFINE p_pchi / display 'p-value' width=7 format=pvalue. style=[background=pvaluecolor.];
WHERE xcat = 1;
RUN;
[/code:3baztzel] |
|