标题: 请教矩阵生成问题? [打印本页] 作者: shiyiming 时间: 2010-8-4 12:54 标题: 请教矩阵生成问题? 有没可行办法?作者: shiyiming 时间: 2010-8-4 15:35 标题: Re: 请教关系矩阵生成问题? 莫非又需要DFS?作者: shiyiming 时间: 2010-8-4 16:45 标题: Re: 请教关系矩阵生成问题? 抄老猪的代码真爽 <!-- s:lol: --><img src="{SMILIES_PATH}/icon_lol.gif" alt=":lol:" title="Laughing" /><!-- s:lol: -->
[code:1ln4x94h]data raw;
input id1 $ id2 $;
datalines;
a b
a e
b c
b e
c e
d e
;
data fmt(keep=id);
set raw;
id=id1; output;
id=id2; output;
run;
proc means data=fmt nway noprint;
class id;
output out=fmt(drop=_type_);
run;
/*proc sort data=fmt out=fmt(drop=_freq_);*/
/* by descending _freq_;*/
/*run;*/
data fmt;
retain fmtname 'id_fmt' type 'i';
set fmt(keep=id rename=(id=start)) end=last;
label=_n_;
if last then call symputx('nobs',_n_);
run;
proc format cntlin=fmt;
run;
data out;
array var{&nobs} (&nobs*0);
do id=1 to &nobs;
output;
end;
run;
%macro varlist(type=NQ);
%do i=1 %to &nobs;
%if %upcase(&type)=Q %then "var&i";
%else var&i;
%if &i ne &nobs %then %str(,);
%end;
%mend;
data _null_;
if _n_=1 then
do;
declare hash h(dataset:'out',hashexp:16,ordered:'yes');
h.defineKey('id');
h.defineData('id',%varlist(type=q));
h.defineDone();
call missing(id,%varlist());
end;
set raw end=last;
array arr{&nobs} var1-var&nobs;
id=input(id1,id_fmt.);
rc=h.find();
arr(input(id2,id_fmt.))=1;
rc=h.replace();
id=input(id2,id_fmt.);
rc=h.find();
arr(input(id1,id_fmt.))=1;
rc=h.replace();
if last then rc=h.output(dataset:'out');
run;
data out;
merge fmt(keep=start label rename=(start=id_label label=id))
out;
by id;
run;[/code:1ln4x94h]作者: shiyiming 时间: 2010-8-4 23:48 标题: Re: 请教关系矩阵生成问题? how about my code <!-- s:) --><img src="{SMILIES_PATH}/icon_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
100K records? no problem at all, even for millions of records....
the recursive search problem posted before can be solved use similar idea, actually.
[code:2j892gil]
data original;
input id1 $ id2 $;
datalines;
a b
a e
b c
b e
c e
d e
;
run;
proc sql;
create table all_cases as
select a.*, monotonic() as seq
from (
select distinct id1 as id
from original
union
select distinct id2 as id
from original
) as a
order by a.id
;
quit;
proc sql noprint;
select id into :idnames separated by ' '
from all_cases
;
quit;
data new;
if _n_=1 then do;
declare hash _h(dataset:'all_cases');
_h.defineKey('id');
_h.defineData('seq');
_h.defineDone();
end;
set all_cases;
array _a{*} &idnames;
id1=id;
set original key=id1;
_mx_=%sysrc(_sok);
do while (_iorc_=%sysrc(_sok));
rc=_h.find(key:id2); if rc=0 then _a[seq]=1;
id1=id;
set original key=id1;
end;
_ERROR_=0;
id2=id;
set original key=id2;
do while (_iorc_=%sysrc(_sok));
rc=_h.find(key:id1); if rc=0 then _a[seq]=1;
id2=id;
set original key=id2;
end;
_ERROR_=0;
do j=1 to dim(_a); _a[j]=max(0, _a[j]); end;
keep id &idnames;
run;
[/code:2j892gil]
On the other hand, this problem can be solved in a more SASsy way like this <!-- s:lol: --><img src="{SMILIES_PATH}/icon_lol.gif" alt=":lol:" title="Laughing" /><!-- s:lol: --> :
[code:2j892gil]
data original;
input id1 $ id2 $;
datalines;
a b
a e
b c
b e
c e
d e
;
run;
proc sql;
create table newx as
select a.id1, a.id2, (sum(a.id1=c.id1 & a.id2=c.id2)>0) as count
from
(select a.id as id1, b.id as id2
from all_cases as a, all_cases as b) as a
left join original as c
on a.id1=c.id1 or a.id2=c.id1
group by a.id1, a.id2
;
quit;
proc transpose data=newx out=_freq_t name=id2;
by id1;
var count;
id id2;
run;
data _freq_t;
set _freq_t;
array _n{*} _numeric_;
do i=1 to dim(_n);
_n[i]=(_n[i]>0);
end;
drop i;
run;
proc transpose data=_freq_t(drop=id2) out=_freq_t2 name=id1;
id id1;
run;
proc sql noprint;
select id1, count(distinct id1) into :covars separated by ' ', :count
from _freq_t;
quit;
data new2;
set _freq_t;
array _x{*} &covars;
array _x2{&count} _temporary_;
do j=1 to &count; _x2[j]=_x[j]; end;
set _freq_t2;
do j=1 to &count; _x[j]=(_x[j]+_x2[j]>0); end;
drop j id2;
run;
[/code:2j892gil]
you can test these two approaches using the following dummy data. On my server the first approach took less than 1second overall.
[code:2j892gil]
data original;
do i=1 to 5e5;
x=65 + floor(ranuni(0)*26);
id1=byte(x);
x=65 + floor(ranuni(0)*18);
id2=byte(x);
output;
drop i x;
end;
run;
[/code:2j892gil]作者: shiyiming 时间: 2010-8-5 09:11 标题: Re: 请教关系矩阵生成问题? 多谢hopewell ,oloolo的解答;先抄来学习撒!呵呵作者: shiyiming 时间: 2010-8-5 14:57 标题: Re: 请教关系矩阵生成问题? to hopewell
<!-- s:lol: --><img src="{SMILIES_PATH}/icon_lol.gif" alt=":lol:" title="Laughing" /><!-- s:lol: --> 猪兄删代码后,google cache里全是没删的吧,呵呵作者: shiyiming 时间: 2010-8-8 00:01 标题: Re: 请教关系矩阵生成问题? 认真看了一下,不是DSF。作者: shiyiming 时间: 2010-8-10 14:29 标题: Re: 请教关系矩阵生成问题? to hopewell
怎么听上去像老猪被你爽到了的感觉,怪怪的。 <!-- s:shock: --><img src="{SMILIES_PATH}/icon_eek.gif" alt=":shock:" title="Shocked" /><!-- s:shock: -->