SAS中文论坛

标题: 请教矩阵生成问题? [打印本页]

作者: shiyiming    时间: 2010-8-4 12:54
标题: 请教矩阵生成问题?
有没可行办法?
作者: shiyiming    时间: 2010-8-4 15:35
标题: Re: 请教关系矩阵生成问题?
莫非又需要DFS?
作者: shiyiming    时间: 2010-8-4 16:45
标题: Re: 请教关系矩阵生成问题?
抄老猪的代码真爽 <!-- s:lol: --><img src="{SMILIES_PATH}/icon_lol.gif" alt=":lol:" title="Laughing" /><!-- s:lol: -->
[code:1ln4x94h]data raw;
        input id1 $ id2 $;
datalines;
a b
a e
b c
b e
c e
d e
;
data fmt(keep=id);
        set raw;
        id=id1; output;
        id=id2; output;
run;
proc means data=fmt nway noprint;
        class id;
        output out=fmt(drop=_type_);
run;
/*proc sort data=fmt out=fmt(drop=_freq_);*/
/*        by descending _freq_;*/
/*run;*/
data fmt;
        retain fmtname 'id_fmt' type 'i';
        set fmt(keep=id rename=(id=start)) end=last;
        label=_n_;
        if last then call symputx('nobs',_n_);
run;
proc format cntlin=fmt;
run;
data out;
        array var{&amp;nobs} (&amp;nobs*0);
        do id=1 to &amp;nobs;
                output;
        end;
run;
%macro varlist(type=NQ);
        %do i=1 %to &amp;nobs;
                %if %upcase(&amp;type)=Q %then &quot;var&amp;i&quot;;
                %else var&amp;i;
                %if &amp;i ne &amp;nobs %then %str(,);
        %end;
%mend;
data _null_;
        if _n_=1 then
                do;
                        declare hash h(dataset&#58;'out',hashexp&#58;16,ordered&#58;'yes');
                        h&#46;defineKey('id');
                        h&#46;defineData('id',%varlist(type=q));
                        h&#46;defineDone();
                        call missing(id,%varlist());
                end;
        set raw end=last;
        array arr{&amp;nobs} var1-var&amp;nobs;
        id=input(id1,id_fmt&#46;);
        rc=h&#46;find();
        arr(input(id2,id_fmt&#46;))=1;
        rc=h&#46;replace();
        id=input(id2,id_fmt&#46;);
        rc=h&#46;find();
        arr(input(id1,id_fmt&#46;))=1;
        rc=h&#46;replace();
        if last then rc=h&#46;output(dataset&#58;'out');
run;
data out;
        merge fmt(keep=start label rename=(start=id_label label=id))
                out;
        by id;
run;[/code:1ln4x94h]
作者: shiyiming    时间: 2010-8-4 23:48
标题: Re: 请教关系矩阵生成问题?
how about my code <!-- s:) --><img src="{SMILIES_PATH}/icon_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
100K records? no problem at all, even for millions of records....

the recursive search problem posted before can be solved use similar idea, actually.
[code:2j892gil]
data original;
   input id1 $ id2 $;
datalines;
a b
a e
b c
b e
c e
d e
;
run;

proc datasets library=work nolist;
     modify original;
         index create id1 id2;
quit;

proc sql;
     create table all_cases as
         select a&#46;*, monotonic() as seq
         from (
         select distinct id1 as id
         from original
         union
         select distinct id2 as id
         from original
         ) as a
         order by a&#46;id
         ;
quit;

proc sql noprint;
     select id into &#58;idnames separated by ' '
         from   all_cases
         ;
quit;


data new;
         if _n_=1 then do;
            declare hash _h(dataset&#58;'all_cases');
            _h&#46;defineKey('id');
            _h&#46;defineData('seq');
            _h&#46;defineDone();
     end;
     set all_cases;

         array _a{*} &amp;idnames;       

         id1=id;         
         set original key=id1;             
         _mx_=%sysrc(_sok);
         
         do while (_iorc_=%sysrc(_sok));   
            rc=_h&#46;find(key&#58;id2); if rc=0 then _a&#91;seq&#93;=1;
                id1=id;
            set original key=id1;               
               
         end;
         _ERROR_=0;
         
         id2=id;         
         set original key=id2;             
         do while (_iorc_=%sysrc(_sok));
            rc=_h&#46;find(key&#58;id1); if rc=0 then _a&#91;seq&#93;=1;
                id2=id;
            set original key=id2;               
         end;
         _ERROR_=0;
         do j=1 to dim(_a); _a&#91;j&#93;=max(0, _a&#91;j&#93;); end;
         keep id &amp;idnames;
run;
[/code:2j892gil]

On the other hand, this problem can be solved in a more SASsy way like this <!-- s:lol: --><img src="{SMILIES_PATH}/icon_lol.gif" alt=":lol:" title="Laughing" /><!-- s:lol: --> :
[code:2j892gil]
data original;
   input id1 $ id2 $;
datalines;
a b
a e
b c
b e
c e
d e
;
run;

proc sql;
     create table newx as
     select a&#46;id1, a&#46;id2, (sum(a&#46;id1=c&#46;id1 &amp; a&#46;id2=c&#46;id2)&gt;0) as count
     from   
       (select a&#46;id as id1, b&#46;id as id2
        from all_cases as a, all_cases as b) as a
left join   original as c
       on   a&#46;id1=c&#46;id1 or a&#46;id2=c&#46;id1
    group by a&#46;id1, a&#46;id2
    ;
quit;

proc transpose data=newx  out=_freq_t name=id2;
     by id1;
     var count;
     id id2;
run;

data _freq_t;
     set _freq_t;
     array _n{*} _numeric_;
     do i=1 to dim(_n);
        _n&#91;i&#93;=(_n&#91;i&#93;&gt;0);
     end;
     drop i;
run;

proc transpose data=_freq_t(drop=id2) out=_freq_t2  name=id1;
     id id1;
run;

proc sql noprint;
     select id1, count(distinct id1) into &#58;covars separated by ' ', &#58;count
     from   _freq_t;  
quit;

data new2;
     set _freq_t;
     array _x{*} &amp;covars;
     array _x2{&amp;count} _temporary_;

     do j=1 to &amp;count; _x2&#91;j&#93;=_x&#91;j&#93;; end;
     set _freq_t2;
     do j=1 to &amp;count; _x&#91;j&#93;=(_x&#91;j&#93;+_x2&#91;j&#93;&gt;0); end;
     drop j  id2;
run;
[/code:2j892gil]

you can test these two approaches using the following dummy data. On my server the first approach took less than 1second overall.
[code:2j892gil]
data original;
     do i=1 to 5e5;
            x=65 + floor(ranuni(0)*26);
            id1=byte(x);
                x=65 + floor(ranuni(0)*18);
                id2=byte(x);
                output;
                drop i x;
         end;
run;
[/code:2j892gil]
作者: shiyiming    时间: 2010-8-5 09:11
标题: Re: 请教关系矩阵生成问题?
多谢hopewell ,oloolo的解答;先抄来学习撒!呵呵
作者: shiyiming    时间: 2010-8-5 14:57
标题: Re: 请教关系矩阵生成问题?
to hopewell
<!-- s:lol: --><img src="{SMILIES_PATH}/icon_lol.gif" alt=":lol:" title="Laughing" /><!-- s:lol: --> 猪兄删代码后,google cache里全是没删的吧,呵呵
作者: shiyiming    时间: 2010-8-8 00:01
标题: Re: 请教关系矩阵生成问题?
认真看了一下,不是DSF。
作者: shiyiming    时间: 2010-8-10 14:29
标题: Re: 请教关系矩阵生成问题?
to hopewell
怎么听上去像老猪被你爽到了的感觉,怪怪的。 <!-- s:shock: --><img src="{SMILIES_PATH}/icon_eek.gif" alt=":shock:" title="Shocked" /><!-- s:shock: -->




欢迎光临 SAS中文论坛 (https://mysas.net/forum/) Powered by Discuz! X3.2