|
|
6#

楼主 |
发表于 2010-7-10 07:04:37
|
只看该作者
Re: 如何分组?
我写了一个code,但是有个问题,那就是一旦mod有重复值(如第2,3行的4,4),那么,我的程序就必须重新再次把剩下的record再运行一遍,否则就不完整。
所以抛砖引玉,希望有高手对我的code进行修改,或全新的code. 谢谢!!!
[code:2p8uzi98]data a0;
input mod $ bdos;
cards;
tc 1
26 4
26 4
tc 6
tc 7
tc 12
26 13
26 14
tc 17
26 19
tc 22
tc 25
26 28
tc 29
26 30
;
run;
/*add pk unid*/
data a;set a0;
pk=_n_;
unid=catx('_',mod,bdos,pk);
run;
/*split data into 2 datasets by mod*/
data a1 a2; set a;
select (mod);
when ('tc') output a1;
when ('26') output a2;
end;
run;
/*grab min(diff)*/
proc sql;
create table diff1 as
select d1.*,abs(bdos_26-bdos_tc) as diff, catx('_',pk_tc,pk_26) as unid2,
d2.*,(calculated diff=min(calculated diff)) as min_d
from a1(rename=(mod=mod_tc bdos=bdos_tc pk=pk_tc)) d1,
a2(drop=unid rename=(mod=mod_26 bdos=bdos_26 pk=pk_26)) d2
group by unid
having min_d^=0
order by d1.pk_tc,unid;
quit;
proc sql;
create table diff2 as
select d2.*,abs(bdos_26-bdos_tc) as diff,catx('_',pk_tc,pk_26) as unid2,
d1.*,(calculated diff=min(calculated diff)) as min_d
from a1(drop=unid rename=(mod=mod_tc bdos=bdos_tc pk=pk_tc)) d1,
a2 (rename=(mod=mod_26 bdos=bdos_26 pk=pk_26)) d2
group by unid
having min_d^=0
order by d2.pk_26,unid;
quit;
/*remove 1 record VS mulit, keep 1:1*/
proc sort data=diff1 out=diff10 nodupkey;
by unid;
run;
proc sort data=diff2 out=diff20 nodupkey;
by unid;
run;
/*grab 2 records with the same min(diff)*/
proc sql;
create table diff_1 as
select d1.*
from diff10 d1,diff20 d2
where d1.unid2=d2.unid2;
quit;
/*add group id: g*/
data b0;
set diff_1;
g=_n_;
run;
/*separate data*/
data b1(rename=(mod_tc=mod bdos_tc=bdos) keep=mod_tc bdos_tc diff unid2 g)
b2(rename=(mod_26=mod bdos_26=bdos) keep=mod_26 bdos_26 diff unid2 g);
set b0;
run;
/*merge again*/
data final;
set b1 b2;
run;
/*sort dataset*/
proc sort data=final;
by g;
run;
[/code:2p8uzi98] |
|