SAS中文论坛

 找回密码
 立即注册

扫一扫,访问微社区

查看: 701|回复: 0
打印 上一主题 下一主题

Table Look Up in SAS, practical problems

[复制链接]

49

主题

76

帖子

1462

积分

管理员

Rank: 9Rank: 9Rank: 9

积分
1462
楼主
 楼主| 发表于 2010-10-22 13:39:51 | 只看该作者

Table Look Up in SAS, practical problems

From oloolo's blog on SasProgramming


<p><a href="http://feedads.g.doubleclick.net/~a/xj4fdEEQBbK002ZZAjjCnbKED4g/0/da"><img src="http://feedads.g.doubleclick.net/~a/xj4fdEEQBbK002ZZAjjCnbKED4g/0/di" border="0" ismap="true"></img></a><br/>
<a href="http://feedads.g.doubleclick.net/~a/xj4fdEEQBbK002ZZAjjCnbKED4g/1/da"><img src="http://feedads.g.doubleclick.net/~a/xj4fdEEQBbK002ZZAjjCnbKED4g/1/di" border="0" ismap="true"></img></a></p>One guy asked in a SAS forum about a typical table look up problem:<br />
He has a data with two IDs:<br />
id1 id2<br />
a   b<br />
a   e<br />
b   c<br />
b   e<br />
c   e<br />
d   e<br />
<br />
and he wants to generate a new data set with the following structure according to above information :<br />
id a b c d e<br />
a  0 1 0 0 1<br />
b  1 0 1 0 1<br />
c  0 1 0 0 1<br />
d  0 0 0 0 1<br />
e  1 1 1 1 0<br />
<br />
The real data is potentially big.<br />
***************************;<br />
At first look, this is a typical table look up problem SAS programmers facing almost everyday, that is duplicate keyed lookup table. It is a simple one because there is no inherent relationship among records.<br />
<br />
<pre style="background-color: #ebebeb; border-bottom: #999999 1px dashed; border-left: #999999 1px dashed; border-right: #999999 1px dashed; border-top: #999999 1px dashed; color: #000001; font-family: Andale Mono, Lucida Console, Monaco, fixed, monospace; font-size: 12px; line-height: 14px; overflow: auto; padding-bottom: 5px; padding-left: 5px; padding-right: 5px; padding-top: 5px; width: 100%;"><code>
data original;
   input id1 $ id2 $;
datalines;
a b
a e
b c
b e
c e
d e
;
run;

proc datasets library=work nolist;
     modify original;
  index create id1 id2;
quit;

proc sql;
     create table all_cases as
  select a.*, monotonic() as seq
  from (
  select distinct id1 as id
  from original
  union
  select distinct id2 as id
  from original
  ) as a
  order by a.id
  ;
quit;

proc sql noprint;
     select id into :idnames separated by ' '
  from   all_cases
  ;
quit;

data new;
  if _n_=1 then do;
     declare hash _h(dataset:'all_cases');
     _h.defineKey('id');
     _h.defineData('seq');
     _h.defineDone();
     end;
     set all_cases;

  array _a{*} &idnames;

  id1=id;  
  set original key=id1;      
  _mx_=%sysrc(_sok);
  
  do while (_iorc_=%sysrc(_sok));   
     rc=_h.find(key:id2); if rc=0 then _a[seq]=1;
  id1=id;
     set original key=id1;  
  
  end;
  _ERROR_=0;
  
  id2=id;  
  set original key=id2;      
  do while (_iorc_=%sysrc(_sok));
     rc=_h.find(key:id1); if rc=0 then _a[seq]=1;
  id2=id;
     set original key=id2;  
  end;
  _ERROR_=0;
  do j=1 to dim(_a); _a[j]=max(0, _a[j]); end;
  keep id &idnames;
run;
</code></pre><br />
On the other hand, this problem can be solved in a more SASsy way like this:<br />
<br />
<pre style="background-color: #ebebeb; border-bottom: #999999 1px dashed; border-left: #999999 1px dashed; border-right: #999999 1px dashed; border-top: #999999 1px dashed; color: #000001; font-family: Andale Mono, Lucida Console, Monaco, fixed, monospace; font-size: 12px; line-height: 14px; overflow: auto; padding-bottom: 5px; padding-left: 5px; padding-right: 5px; padding-top: 5px; width: 100%;"><code>
data original;
   input id1 $ id2 $;
datalines;
a b
a e
b c
b e
c e
d e
;
run;

proc sql;
     create table newx as
     select a.id1, a.id2, (sum(a.id1=c.id1 & a.id2=c.id2)>0) as count
     from   
       (select a.id as id1, b.id as id2
        from all_cases as a, all_cases as b) as a
left join   original as c
       on   a.id1=c.id1 or a.id2=c.id1
    group by a.id1, a.id2
    ;
quit;

proc transpose data=newx  out=_freq_t name=id2;
     by id1;
     var count;
     id id2;
run;

data _freq_t;
     set _freq_t;
     array _n{*} _numeric_;
     do i=1 to dim(_n);
        _n[i]=(_n[i]>0);
     end;
     drop i;
run;

proc transpose data=_freq_t(drop=id2) out=_freq_t2  name=id1;
     id id1;
run;

proc sql noprint;
     select id1, count(distinct id1) into :covars separated by ' ', :count
     from   _freq_t;  
quit;

data new2;
     set _freq_t;
     array _x{*} &covars;
     array _x2{&count} _temporary_;

     do j=1 to &count; _x2[j]=_x[j]; end;
     set _freq_t2;
     do j=1 to &count; _x[j]=(_x[j]+_x2[j]>0); end;
     drop j id2;
run;
</code></pre><div class="blogger-post-footer"><img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/29815492-5475932701046935991?l=www.sas-programming.com' alt='' /></div><img src="http://feeds.feedburner.com/~r/SasProgramming/~4/ZtARrBGYt4s" height="1" width="1"/>
回复 支持 反对

使用道具 举报

您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

QQ|小黑屋|手机版|Archiver|SAS中文论坛  

GMT+8, 2026-2-3 20:15 , Processed in 0.344780 second(s), 22 queries .

Powered by Discuz! X3.2

© 2001-2013 Comsenz Inc.

快速回复 返回顶部 返回列表