[code:a8b0a]
/*以下将数据分组*/
proc sort data=data_1;
by age sex earning;
data data_1;
retain no 0;
set data_1;
by age sex earning;
flag=no;
if last.earning then no+1;
run;
%macro app;
%let j=0;
proc sql noprint;
select max(flag) into :num_flag from data_1;/*对定义flag的最大值*/
%do i=0 %to &num_flag;/*组的循环开始*/
proc sql noprint;
create table temp as select * from data_1 where flag=&i;/*导出同组数据*/
create table temp_1 as select * from temp where has_illness=0;/*导出同组数据中无病的数据*/
select count(*) into :num_ill from temp where has_illness=1;/*定义有病的人数*/
select count(*) into :num_noill from temp where has_illness=0;/*定义无病的人数*/
data illness_control;
set &datastr;
run;
%mend app;
%app;
[/code:a8b0a]作者: shiyiming 时间: 2004-9-29 18:25 标题: 优化 以上代码中存在两个可以优化的问题
1、如果区组过多,而且在很多组中没有一个患病,导致了大量的执行时间浪费。
办法:先剔除没有患者的组,再重新区组、进行后继程序。
2、如果区组过多,获得的sample必将很多,合并这些sample也要花费大量的执行时间。
办法:使用insert into temp_data select * from sample,将每次循环抽样的数据导入temp_data作者: shiyiming 时间: 2004-12-13 14:51 标题: sql(抽样的时候只是按产生的随机数大小而定) proc sql;
create table temp1 as
select name as name1,sex as sex1,age as age1,height as height1,weight as weight1 from sasuser.class where sex='F';
create table temp2 as
select name as name2,sex as sex2,age as age2,height as height2,weight as weight2 from sasuser.class where sex='M';
run;
proc sql;
create table temp3 as
select a.*,b.*,abs(b.age2 - a.age1) as diff,Normal(100) as rand from temp1 a ,temp2 b;
run;
proc rank data=temp3 out=totaldata;
var rand;
ranks ranknum;
run;
proc sql;
create table temp5 as
select * from totaldata where age1<0;
run;
%macro main;
proc sql noprint;
select count(*) into :num_male from temp1;/*记录男生人数*/
%do i=1 %to &num_male;/*按人数循环*/
proc sql ;
select min(diff) into:mindiff from totaldata;
select max(ranknum) into:maxrand from totaldata where diff=&mindiff;
insert into temp5 select * from totaldata where diff=&mindiff and ranknum =&maxrand;
delete from totaldata where name1 in (select name1 from temp5) or name2 in (select name2 from temp5);
%end;