标题: Table Look Up in SAS, practical problems [打印本页] 作者: shiyiming 时间: 2010-10-22 13:39 标题: Table Look Up in SAS, practical problems From oloolo's blog on SasProgramming
<p><a href="http://feedads.g.doubleclick.net/~a/xj4fdEEQBbK002ZZAjjCnbKED4g/0/da"><img src="http://feedads.g.doubleclick.net/~a/xj4fdEEQBbK002ZZAjjCnbKED4g/0/di" border="0" ismap="true"></img></a><br/>
<a href="http://feedads.g.doubleclick.net/~a/xj4fdEEQBbK002ZZAjjCnbKED4g/1/da"><img src="http://feedads.g.doubleclick.net/~a/xj4fdEEQBbK002ZZAjjCnbKED4g/1/di" border="0" ismap="true"></img></a></p>One guy asked in a SAS forum about a typical table look up problem:<br />
He has a data with two IDs:<br />
id1 id2<br />
a b<br />
a e<br />
b c<br />
b e<br />
c e<br />
d e<br />
<br />
and he wants to generate a new data set with the following structure according to above information :<br />
id a b c d e<br />
a 0 1 0 0 1<br />
b 1 0 1 0 1<br />
c 0 1 0 0 1<br />
d 0 0 0 0 1<br />
e 1 1 1 1 0<br />
<br />
The real data is potentially big.<br />
***************************;<br />
At first look, this is a typical table look up problem SAS programmers facing almost everyday, that is duplicate keyed lookup table. It is a simple one because there is no inherent relationship among records.<br />
<br />
<pre style="background-color: #ebebeb; border-bottom: #999999 1px dashed; border-left: #999999 1px dashed; border-right: #999999 1px dashed; border-top: #999999 1px dashed; color: #000001; font-family: Andale Mono, Lucida Console, Monaco, fixed, monospace; font-size: 12px; line-height: 14px; overflow: auto; padding-bottom: 5px; padding-left: 5px; padding-right: 5px; padding-top: 5px; width: 100%;"><code>
data original;
input id1 $ id2 $;
datalines;
a b
a e
b c
b e
c e
d e
;
run;
proc sql;
create table all_cases as
select a.*, monotonic() as seq
from (
select distinct id1 as id
from original
union
select distinct id2 as id
from original
) as a
order by a.id
;
quit;
proc sql noprint;
select id into :idnames separated by ' '
from all_cases
;
quit;
data new;
if _n_=1 then do;
declare hash _h(dataset:'all_cases');
_h.defineKey('id');
_h.defineData('seq');
_h.defineDone();
end;
set all_cases;
array _a{*} &idnames;
id1=id;
set original key=id1;
_mx_=%sysrc(_sok);
do while (_iorc_=%sysrc(_sok));
rc=_h.find(key:id2); if rc=0 then _a[seq]=1;
id1=id;
set original key=id1;
end;
_ERROR_=0;
id2=id;
set original key=id2;
do while (_iorc_=%sysrc(_sok));
rc=_h.find(key:id1); if rc=0 then _a[seq]=1;
id2=id;
set original key=id2;
end;
_ERROR_=0;
do j=1 to dim(_a); _a[j]=max(0, _a[j]); end;
keep id &idnames;
run;
</code></pre><br />
On the other hand, this problem can be solved in a more SASsy way like this:<br />
<br />
<pre style="background-color: #ebebeb; border-bottom: #999999 1px dashed; border-left: #999999 1px dashed; border-right: #999999 1px dashed; border-top: #999999 1px dashed; color: #000001; font-family: Andale Mono, Lucida Console, Monaco, fixed, monospace; font-size: 12px; line-height: 14px; overflow: auto; padding-bottom: 5px; padding-left: 5px; padding-right: 5px; padding-top: 5px; width: 100%;"><code>
data original;
input id1 $ id2 $;
datalines;
a b
a e
b c
b e
c e
d e
;
run;
proc sql;
create table newx as
select a.id1, a.id2, (sum(a.id1=c.id1 & a.id2=c.id2)>0) as count
from
(select a.id as id1, b.id as id2
from all_cases as a, all_cases as b) as a
left join original as c
on a.id1=c.id1 or a.id2=c.id1
group by a.id1, a.id2
;
quit;
proc transpose data=newx out=_freq_t name=id2;
by id1;
var count;
id id2;
run;
data _freq_t;
set _freq_t;
array _n{*} _numeric_;
do i=1 to dim(_n);
_n[i]=(_n[i]>0);
end;
drop i;
run;
proc transpose data=_freq_t(drop=id2) out=_freq_t2 name=id1;
id id1;
run;
proc sql noprint;
select id1, count(distinct id1) into :covars separated by ' ', :count
from _freq_t;
quit;
data new2;
set _freq_t;
array _x{*} &covars;
array _x2{&count} _temporary_;
do j=1 to &count; _x2[j]=_x[j]; end;
set _freq_t2;
do j=1 to &count; _x[j]=(_x[j]+_x2[j]>0); end;
drop j id2;
run;
</code></pre><div class="blogger-post-footer"><img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/29815492-5475932701046935991?l=www.sas-programming.com' alt='' /></div><img src="http://feeds.feedburner.com/~r/SasProgramming/~4/ZtARrBGYt4s" height="1" width="1"/>