Table Look Up in SAS, practical problems

shiyiming · 发表于 2010-10-22 13:39:51

From oloolo's blog on SasProgramming

<a href="http://feedads.g.doubleclick.net/~a/xj4fdEEQBbK002ZZAjjCnbKED4g/0/da"><img src="http://feedads.g.doubleclick.net/~a/xj4fdEEQBbK002ZZAjjCnbKED4g/0/di" border="0" ismap="true"></img></a> 
<a href="http://feedads.g.doubleclick.net/~a/xj4fdEEQBbK002ZZAjjCnbKED4g/1/da"><img src="http://feedads.g.doubleclick.net/~a/xj4fdEEQBbK002ZZAjjCnbKED4g/1/di" border="0" ismap="true"></img></a>One guy asked in a SAS forum about a typical table look up problem: 
He has a data with two IDs: 
id1 id2 
a b 
a e 
b c 
b e 
c e 
d e 
 
and he wants to generate a new data set with the following structure according to above information : 
id a b c d e 
a 0 1 0 0 1 
b 1 0 1 0 1 
c 0 1 0 0 1 
d 0 0 0 0 1 
e 1 1 1 1 0 
 
The real data is potentially big. 
***************************; 
At first look, this is a typical table look up problem SAS programmers facing almost everyday, that is duplicate keyed lookup table. It is a simple one because there is no inherent relationship among records. 
 
<pre style="background-color: #ebebeb; border-bottom: #999999 1px dashed; border-left: #999999 1px dashed; border-right: #999999 1px dashed; border-top: #999999 1px dashed; color: #000001; font-family: Andale Mono, Lucida Console, Monaco, fixed, monospace; font-size: 12px; line-height: 14px; overflow: auto; padding-bottom: 5px; padding-left: 5px; padding-right: 5px; padding-top: 5px; width: 100%;"><code>
data original;
input id1 $ id2 $;
datalines;
a b
a e
b c
b e
c e
d e
;
run;

proc datasets library=work nolist;
 modify original;
 index create id1 id2;
quit;

proc sql;
 create table all_cases as
 select a.*, monotonic() as seq
 from (
 select distinct id1 as id
 from original
 union
 select distinct id2 as id
 from original
 ) as a
 order by a.id
 ;
quit;

proc sql noprint;
 select id into :idnames separated by ' '
 from all_cases
 ;
quit;

data new;
 if _n_=1 then do;
 declare hash _h(dataset:'all_cases');
 _h.defineKey('id');
 _h.defineData('seq');
 _h.defineDone();
 end;
 set all_cases;

 array _a{*} &idnames;

 id1=id;
 set original key=id1;
 _mx_=%sysrc(_sok);

 do while (_iorc_=%sysrc(_sok));
 rc=_h.find(key:id2); if rc=0 then _a[seq]=1;
 id1=id;
 set original key=id1;

 end;
 _ERROR_=0;

 id2=id;
 set original key=id2;
 do while (_iorc_=%sysrc(_sok));
 rc=_h.find(key:id1); if rc=0 then _a[seq]=1;
 id2=id;
 set original key=id2;
 end;
 _ERROR_=0;
 do j=1 to dim(_a); _a[j]=max(0, _a[j]); end;
 keep id &idnames;
run;
</code></pre> 
On the other hand, this problem can be solved in a more SASsy way like this: 
 
<pre style="background-color: #ebebeb; border-bottom: #999999 1px dashed; border-left: #999999 1px dashed; border-right: #999999 1px dashed; border-top: #999999 1px dashed; color: #000001; font-family: Andale Mono, Lucida Console, Monaco, fixed, monospace; font-size: 12px; line-height: 14px; overflow: auto; padding-bottom: 5px; padding-left: 5px; padding-right: 5px; padding-top: 5px; width: 100%;"><code>
data original;
input id1 $ id2 $;
datalines;
a b
a e
b c
b e
c e
d e
;
run;

proc sql;
 create table newx as
 select a.id1, a.id2, (sum(a.id1=c.id1 & a.id2=c.id2)>0) as count
 from
 (select a.id as id1, b.id as id2
 from all_cases as a, all_cases as b) as a
left join original as c
 on a.id1=c.id1 or a.id2=c.id1
group by a.id1, a.id2
;
quit;

proc transpose data=newx out=_freq_t name=id2;
 by id1;
 var count;
 id id2;
run;

data _freq_t;
 set _freq_t;
 array _n{*} _numeric_;
 do i=1 to dim(_n);
 _n[i]=(_n[i]>0);
 end;
 drop i;
run;

proc transpose data=_freq_t(drop=id2) out=_freq_t2 name=id1;
 id id1;
run;

proc sql noprint;
 select id1, count(distinct id1) into :covars separated by ' ', :count
 from _freq_t;
quit;

data new2;
 set _freq_t;
 array _x{*} &covars;
 array _x2{&count} _temporary_;

 do j=1 to &count; _x2[j]=_x[j]; end;
 set _freq_t2;
 do j=1 to &count; _x[j]=(_x[j]+_x2[j]>0); end;
 drop j id2;
run;
</code></pre><div class="blogger-post-footer"><img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/29815492-5475932701046935991?l=www.sas-programming.com' alt='' /></div><img src="http://feeds.feedburner.com/~r/SasProgramming/~4/ZtARrBGYt4s" height="1" width="1"/>

		自动登录	找回密码
密码			立即注册