十分感谢.作者: shiyiming 时间: 2009-8-12 09:29 标题: Re: 请问data step怎么实现count distinct及引申问题 [quote:5gvm15u9]然后从第n个(n=1到N)user_id开始统计第1个到第n个user_id的friend_id中所有不重复的个数count_num[/quote:5gvm15u9]
不太明白
[code:5gvm15u9]/* 创建模拟数据 */
data raw_ds(keep=user_id friend_id);
seed=123;
do i=1 to 10;
user_id=i;
friend_id=ceil(10*ranuni(seed));
do j=1 to ceil(10*ranuni(seed));
friend_id+1;
output;
end;
end;
run;
/* by user_id计算friend数量 */
data final_ds;
friend_count=0;
do _n_=1 by 1 until(last.user_id);
set raw_ds;
by user_id;
friend_count+1;
end;
do _n_=1 to _n_;
set raw_ds;
by user_id;
output;
end;
run;
/* 为user_id创建新序号 */
data final_ds;
sort_id+1;
do _n_=1 by 1 until(last.user_id);
set final_ds;
by descending friend_count user_id;
output;
end;
run;
/* 取前n个user_id的friend_id的count */
%let n=3;
proc sql;
create table _temp_repfid as
select friend_id, count(friend_id) as repeat_count
from final_ds
where sort_id<=&n
group by friend_id
order by friend_id;
quit;
data final_ds;
set final_ds;
set _temp_repfid key=friend_id/unique ;
if _iorc_ ne 0 then do;
_iorc_=0;
_error_=0;
repeat_count=.;
end;
if sort_id>&n then repeat_count=.;
run;
/* 计算非重复的friend数量 */
data final_ds;
do _n_=1 by 1 until(last.sort_id);
set final_ds;
by sort_id;
if repeat_count=1 then count_num=sum(count_num,1);
end;
do _n_=1 to _n_;
set final_ds;
by sort_id;
if sort_id<=&n and count_num=. then count_num=0;
output;
end;
run;[/code:5gvm15u9]作者: shiyiming 时间: 2009-8-12 17:17 标题: Re: 请问data step怎么实现count distinct及引申问题 谢谢你的回复, 不过倒不是我要的结果,
不好意思,可能是我表达的不清晰, 复述一下
data m;
set a;
by rank_id;
if _n_=1 then do;
declare hash myhash();
myhash.definekey("friend_id");
myhash.definedone();
end;
if myhash.find(key:friend_id) ne 0 then rc=myhash.add() ;
if last.rank_id then do;
ncount_num=myhash.num_items;
output;
end;
keep rank_id ncount_num ;
run;
proc sql;
select distinct rank_id,
( select count( distinct friend_id) from a b1 where b1.rank_id<=b2.rank_id )as ncount
from a b2;
quit;
[/code:17y9krj9]作者: shiyiming 时间: 2009-8-14 03:31 标题: Re: 请问data step怎么实现count distinct及引申问题 /* ´´½¨Ä£ÄâÊý¾Ý */
data raw_ds(keep=user_id friend_id);
seed=123;
do i=1 to 10;
user_id=i;
friend_id=ceil(10*ranuni(seed));
do j=1 to ceil(10*ranuni(seed));
friend_id+1;
output;
end;
end;
run;
* the code above by hopewell;
* if this is what you want;
%macro ct;
proc sql;
select distinct user_id into: users separated by ' '
from raw_ds;
%let n=&sqlobs;
create table result as
select distinct user_id, count(distinct friend_id) as count_n
from (select distinct user_id, friend_id
from raw_ds
where user_id in (%scan(&users,1)));
%do i=2 %to &n.;
%let usid = %scan(&users,&i);
select count(distinct friend_id) into:n
from raw_ds
where user_id in
(%scan(&users,1)
%do k=2 %to &i;
%scan(&users,&k)
%end;
);
insert into result set user_id=&usid,count_n = &n;
%end;