[color=#FF0000:1fr6hjj2]匹配条件是:case-V6 要匹配 control-v6±200 要生成匹配号
(病例中的一条记录可以找到好几个对照记录,接下来就随机的选择一个,且对照库中的记录一旦参加匹配,就不参加下一次的匹配),
配对好的输出到一个新的数据集,如若病例库的记录没有配到的可单独输出到另外一个数据集[/color:1fr6hjj2]
期待各位大虾!!作者: shiyiming 时间: 2010-4-22 17:11 标题: Re: 求助:按照指定条件随机配对 [code:jwyxiikw]data case;
input ID V1-V6;
datalines;
1001 154 50 55 38 1 3280
1002 163 56 59 39 1 3500
1003 165 63 67 39 1 3980
1004 156 47 47 40 1 3700
1005 162 44 45 39 1 3650
1006 162 56 58 39 0 3570
1007 158 49 50 40 0 3140
1008 161 44 47 38 0 2740
1009 162 68 70 40 0 3125
1010 159 53 55 38 0 3250
1011 158 63 63 38 0 3350
1012 163 58 58 39 1 3130
1013 160 54 54 39 0 3500
1014 162 44 45 40 0 3120
1015 154 48 51 39 0 2900
1016 159 51 53 41 1 3985
1017 156 46 50 40 1 2700
1018 158 48 52 39 1 3870
1019 165 55 58 40 1 3910
1020 156 58 58 39 0 3800
;
data control;
input ID V1-V6;
datalines;
2001 164 59 60 39 1 3485
2002 166 55 58 41 1 3730
2003 156 46 49 40 0 3390
2004 160 65 71 40 1 3370
2005 160 46 54 39 1 3700
2006 166 56 60 39 0 3650
2007 159 48 49 41 1 3200
2008 166 56 59 39 1 3700
2009 156 56 57 37 1 2850
2010 156 59 61 38 1 3500
2011 158 45 48 39 1 3450
2012 157 47 45 41 0 3600
2013 155 54 57 39 0 3170
2014 157 55 57 41 1 3880
2015 159 60 61 42 1 3590
2016 160 51 53 41 0 3540
2017 158 60 65 39 1 3950
2018 162 56 59 41 0 3070
2019 157 45 45 42 1 3320
2020 165 51 63 39 1 3865
;
proc sql;
create table _temp_nsize as
select a.v6,min(case_n,control_n) as _nsize_
from (select v6,count(v6) as case_n from case group by v6) as a,
(select v6,count(v6) as control_n from control group by v6) as b
where a.v6=b.v6
order by v6;
create table _temp_control as
select *
from control
where v6 in (select v6 from _temp_nsize)
order by v6;
create table _temp_case as
select *
from case
where v6 in (select v6 from _temp_nsize)
order by v6;
quit;
proc surveyselect data=_temp_control method=srs sampsize=_temp_nsize seed=20100422
out=_temp_control noprint;
strata v6;
run;
proc surveyselect data=_temp_case method=srs sampsize=_temp_nsize seed=20100422
out=_temp_case noprint;
strata v6;
run;
data out_match(drop=s:);
retain group;
set _temp_case (in=g1) _temp_control(in=g2);
if g1 then group=1;
else if g2 then group=2;
run;
proc sql;
create table out_nomatch as
select *
from case
where id not in (select id from _temp_case)
order by id;
quit;
proc datasets library=work nolist;
delete _temp: / memtype=data;
quit;[/code:jwyxiikw]作者: shiyiming 时间: 2010-4-22 19:21 标题: Re: 求助:按照指定条件随机配对 谢谢斑竹!!!作者: shiyiming 时间: 2010-4-22 23:19 标题: Re: 求助:按照指定条件随机配对 Obviously the code for Hopewell runs much faster than mine. But only for giving another view of this problem....firstly i numbered each case and cotrol and then matched them by the ordered number; in the resulted data, cases with missing matched control are those that cannot find the matched part in control group. On the other hand, if you are sure all the IDs in the two data sets are unique, you can use the IDs to do sampling directly .
[code:22vivtap]data Control Control_; set Control; RecordControl+1; output Control; output Control_; run;
data Case Case_ ; set Case ; RecordCase +1; output Case ; output Case_ ; run;
%macro SelectMcr(v6,RecordCase);
proc sql;
create table control_1 as
select RecordControl, v6 from Control_ where v6 between &v6-200 and &v6+200;
quit;
proc surveyselect data = control_1 method = srs seed = 11 n = 1 out = Matched noprint;
run;
data Control_;
merge Control_ Matched (in = m); by RecordControl;
if not m;
run;
data Matched;
set Matched;
matchedCase = &recordCase;
run;
proc append base = base data = Matched force; run;
%mend SelectMcr;
data base;
length RecordControl v6 matchedCase 8.;
delete;
run;
data _null_;
set Case_;
call execute('%SelectMcr('||v6||','||RecordCase||')');
run;
data Match_Results;
set base;
lagRcordControl = lag(RecordControl);
if RecordControl = lagRcordControl then call missing(RecordControl, v6);
keep RecordControl matchedCase;
rename RecordControl = MatchedControl matchedCase = Case;
run;[/code:22vivtap]作者: shiyiming 时间: 2010-4-22 23:53 标题: Re: 求助:按照指定条件随机配对 To hopewell:
I find some discrepancy in the results when I ran your code. You know, I am not good at reading others code; but when I see you are using strata in the proc, I am thinking how you can manage to repeatedly sample from those controls which can be or likely be matched to many cases. That is same to say, one control may be with multiple labels, but only those been sampled can leave the candidate list safely in term of the next round sampling.作者: shiyiming 时间: 2010-4-23 08:52 标题: Re: 求助:按照指定条件随机配对 TO 国际Su:
恩,是有问题,这不没看明白"control-v6±200"那加减200是什么意思嘛. <!-- s:lol: --><img src="{SMILIES_PATH}/icon_lol.gif" alt=":lol:" title="Laughing" /><!-- s:lol: -->作者: shiyiming 时间: 2010-4-23 13:01 标题: Re: 求助:按照指定条件随机配对 [url:1mx66394]http://support.sas.com/resources/papers/proceedings10/061-2010.pdf[/url:1mx66394]
Using SAS® to Perform Individual Matching in Design of Case-Control Studies
这个PAPER就是讲这个问题的,可惜没给出MACRO的源程序作者: shiyiming 时间: 2010-4-23 20:04 标题: Re: 求助:按照指定条件随机配对 前述的问题有点乱,再次表述我的目的,希望斑竹hopewell, jingju11 别嫌烦,期待你们的回复出现在下面,
case
;
data case;
input IDcase v4; u1 = ranuni(11); datalines;
11 1
12 1
13 3
14 3
15 4
;
proc sort data = control; by v4 u1;run; *randomly permutate controls if their v4's are the same;
proc sort data = case; by v4 u1; *sometimes the order of matching does matter;
run;
data all;
merge control case(in = c1);
by v4;
lagIDcontrol = lag(IDcontrol);
if lagIDcontrol = IDcontrol then IDcontrol = .; *one control cannot be matched more than one time;
if c1; * the unmatched control should leave;
drop u1 lagIDcontrol;
run;
proc sort nodupkey; by IDcase; run; *delete redundant matches ;
proc print;run;[/code:23tyhqag]作者: shiyiming 时间: 2010-4-24 15:57 标题: Re: 求助:按照指定条件随机配对 弄了一山寨版 <!-- s:) --><img src="{SMILIES_PATH}/icon_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
[code:3rnk6uwp]/*******************************************************************************/
/* Using SAS to Perform Individual Matching in Design of Case-Control Studies */
/* http://support.sas.com/resources/papers/proceedings10/061-2010.pdf */
/*******************************************************************************/
libname mymacro 'd:\mymacro';
options mstored sasmstore=mymacro;
%macro matchcc(casedata=, /* SAS data set of all cases */
controldata=, /* SAS data set of potential controls (pool of all controls) */
matchvar=, /* List of matching variables */
matchval=, /* List of values giving the maximum difference allowed for the matching variables */
fopvar=, /* An optional variable giving the follow-up time of each subject */
id=id, /* Patient identifier are both datasets */
controlspercase=1 /* Mumber of controls to be matched with each case */
) / store des='Perform Individual Matching in Design of Case-Control Studies';
options nosymbolgen nomprint;
%local _error_ matchvar_cnt;
%let _error_=0;
%macro verify_ds_exist(inds,anno) /store;
/* 验证数据集是否存在 */
%if &_error_=0 %then
%do;
%if %sysfunc(exist(&inds))=0 %then
%do;
%let _error_=1;
%put ERROR: &anno 数据集 &inds 不存在!;
%end;
%end;
%mend verify_ds_exist;
%macro verify_keyvar_exist(invar,anno) /store;
/* 验证key变量列表是否为null */
%if &_error_=0 %then
%do;
%if &invar eq %then
%do;
%let _error_=1;
%put ERROR: &anno 为 NULL!;
%end;
%end;
%mend verify_keyvar_exist;
%macro return_ds_attribute(inds,type) /store;
/* 返回数据集的var个数 or 变量列表 or obs个数 */
%local dsid i dsid rc return_val;
%let dsid=%sysfunc(open(&inds,i));
%if &type=NVARS %then %let return_val=%sysfunc(attrn(&dsid,nvars));
%if &type=NLOBS %then %let return_val=%sysfunc(attrn(&dsid,nlobs));
%else %if &type=VARNAME %then
%do;
%let return_val=;
%do i=1 %to %sysfunc(attrn(&dsid,nvars));
%let return_val=&return_val %sysfunc(varname(&dsid,&i));
%end;
%end;
%let rc=%sysfunc(close(&dsid));
%upcase(&return_val)
%mend return_ds_attribute;
%macro verify_keyvar_affect(invar,inds,anno) /store;
/* 验证key变量是否存在于数据集,是否不唯一 */
%local varlist i sub_invar;
%if &_error_=0 and &invar ne %then
%do;
%let varlist=%return_ds_attribute(&inds,VARNAME);
%let i=1;
%let sub_invar=%scan(&invar,1,' ');
%do %until(&sub_invar eq);
%if %sysfunc(indexw(&varlist,&sub_invar))=0 %then
%do;
%let _error_=1;
%put ERROR: &anno 变量 &sub_invar 在数据集 &inds 中不存在!;
%let sub_invar=;
%end;
%else %do;
%let i=%eval(&i+1);
%let sub_invar=%scan(&invar,&i,' ');
%end;
%end;
%if &_error_=0 and &anno=MATCHVAR %then %let matchvar_cnt=%eval(&i-1); /* matchvar变量的个数,供匹配判断用 */
%end;
%if &_error_=0 and &invar ne %then
%do;
%if &anno=FOPVAR or &anno=ID or &anno=CONTROLSPERCASE %then
%do;
%if %scan(&invar,2,' ') ne %then
%do;
%let _error_=1;
%put ERROR: &anno 变量: &invar 不唯一!;
%end;
%end;
%end;
%mend verify_keyvar_affect;
%macro verify_keyval_num(inval,anno) /store;
/* 判断matchval的值是否为数字,个数与matchvar是否匹配 */
%local i sub_inval;
%if &_error_=0 %then
%do;
%let i=1;
%let sub_inval=%scan(&inval,1,' ');
%do %until(&sub_inval eq);
%if %sysfunc(notdigit(&sub_inval)) %then
%do;
%let _error_=1;
%put ERROR: &anno 的值 &sub_inval 不是有效数字!;
%let sub_inval=;
%end;
%else %do;
%let i=%eval(&i+1);
%let sub_inval=%scan(&inval,&i,' ');
%end;
%end;
%let i=%eval(&i-1);
%end;
%if &_error_=0 %then
%do;
%if &anno=MATCHVAL and &i ne &matchvar_cnt %then
%do;
%let _error_=1;
%put ERROR: MACTHVAL 值域与 MATCHVAR 不匹配!;
%put ERROR: MACTHVAR (&matchvar_cnt.个): &matchvar;
%put ERROR: MACTHVAL (&i.个): &matchval;
%end;
%else %if &anno=CONTROLSPERCASE and &i gt 1 %then
%do;
%let _error_=1;
%put ERROR: &anno 值: &inval 不唯一!;
%end;
%end;
%mend verify_keyval_num;
%macro create_var_loop(var_source,val_list,anno=) /store;
/* 1 根据数据集的变量列表创建rename语句or赋值语句 */
/* 2 根据参数创建if语句的condition */
%local var_list i sub_var return;
%let i=1;
%let return=;
%if &anno=RENAME or &anno=EVALUATE %then %let var_list=%return_ds_attribute(&var_source,VARNAME);
%else %let var_list=&var_source;
%let sub_var=%scan(&var_list,1,' ');
%let sub_val=%scan(&val_list,1,' ');
%do %until(&sub_var eq);
%if &anno=RENAME %then %let return=&return &sub_var=C_&sub_var;
%else %if &anno=EVALUATE %then %let return=&return &sub_var=C_&sub_var%nrstr(;);
%else %if &anno=FOPCONDITION %then %let return=C_&sub_var>=&sub_var;
%else %if &anno=MATCHCONDITION %then
%do;
%if &i=1 %then %let return=abs(&sub_var-C_&sub_var)<=&sub_val;
%else %let return=&return and abs(&sub_var-C_&sub_var)<=&sub_val;
%end;
%let i=%eval(&i+1);
%let sub_var=%scan(&var_list,&i,' ');
%let sub_val=%scan(&val_list,&i,' ');
%end;
&return
%mend create_var_loop;
%macro verify_parameter /store;
%let casedata=%upcase(&casedata);
%let controldata=%upcase(&controldata);
%let matchvar=%upcase(&matchvar);
%let matchval=%upcase(&matchval);
%let fopvar=%upcase(&fopvar);
%let id=%upcase(&id);
%let controlspercase=%upcase(&controlspercase);
%verify_ds_exist(&casedata,CASEDATA)
%verify_ds_exist(&controldata,CONTROLDATA)
%verify_keyvar_exist(&matchvar,MATCHVAR)
%verify_keyvar_exist(&matchval,MATCHVAL)
%verify_keyvar_exist(&id,ID)
%verify_keyvar_exist(&controlspercase,CONTROLSPERCASE)
%verify_keyvar_affect(&matchvar,&casedata,MATCHVAR)
%verify_keyvar_affect(&matchvar,&controldata,MATCHVAR)
%verify_keyvar_affect(&fopvar,&casedata,FOPVAR)
%verify_keyvar_affect(&fopvar,&controldata,FOPVAR)
%verify_keyvar_affect(&id,&casedata,ID)
%verify_keyvar_affect(&id,&controldata,ID)
%verify_keyval_num(&matchval,MATCHVAL)
%verify_keyval_num(&controlspercase,CONTROLSPERCASE)
%mend verify_parameter;
%macro match_pretreatment /store;
%macro delete_existds(inds) /store;
%if %sysfunc(exist(&inds)) %then
%do;
proc datasets library=work nolist;
delete &inds / memtype=data;
quit;
%end;
%mend delete_existds;
%if &_error_=0 %then
%do;
%delete_existds(matchall)
%delete_existds(nomatchall)
* Sort control dataset by a random number;
proc sql;
create table _temp_random_controls as
select *,ranuni(12345) as random
from &controldata
order by random;
quit;
* Rename control variable names - put c_ at beginning;
proc datasets library=work memtype=data nolist;
modify _temp_random_controls;
rename
%create_var_loop(&controldata,anno=RENAME)
;
quit;
%end;
%mend match_pretreatment;
%macro match_main /store;
%if &_error_=0 %then
%do;
%do i=1 %to %return_ds_attribute(&casedata,NLOBS);
* Select the current case;
data _temp_active;
n=&i;
set &casedata point=n;
output;
stop;
run;
%do j=1 %to &controlspercase;
* Main section of the program. Create dataset for matches, non-matches ;
data _temp_match(keep=%return_ds_attribute(&casedata,VARNAME) setnumber ccstat)
_temp_nomatch (keep=%return_ds_attribute(&casedata,VARNAME) setnumber)
_temp_used (keep=c_&id);
setnumber=&i;
set _temp_active;
do i=1 to totobs;
set _temp_random_controls point=i nobs=totobs;
if %create_var_loop(&matchvar,&matchval,anno=MATCHCONDITION) then
do;
%if &fopvar ne %then
%str(if %create_var_loop(&fopvar,anno=FOPCONDITION) then do;);
%if &j=1 %then
%do;
ccstat=1;
output _temp_match;
%end;
%create_var_loop(&casedata,anno=EVALUATE)
ccstat=&j+1;
output _temp_match;
output _temp_used;
stop;
%if &fopvar ne %then %str(end;);
end;
end;
output _temp_nomatch;
run;
proc append data=_temp_match base=matchall; run;
proc append data=_temp_nomatch base=nomatchall; run;
* Need to re-sort control dataset by subject id;
proc sort data=_temp_random_controls;
by c_&id;
run;
* Remove used control from control dataset;
data _temp_random_controls;
merge _temp_random_controls _temp_used (in=used);
by c_&id;
if used ne 1;
run;
* Need to resort control dataset by random number for next iteration;;
proc sort data=_temp_random_controls;
by random;
run;
%end;
%end;
%end;
proc datasets library=work nolist;
delete _temp: / memtype=data;
quit;
%mend match_main;
%macro note /store;
%if &_error_=0 and &syserr=0 %then
%do;
%put WARNING- *****************************************************;
%put WARNING- * MACRO MATCHCC: Execution completed successfully ! *;
%put WARNING- *****************************************************;
%end;
%if &_error_ ne 0 or &syserr ge 4 %then
%do;
%put ERROR- **************************************;
%put ERROR- * MACRO MATCHCC: An error occurred ! *;
%put ERROR- **************************************;
%end;
%mend;
%verify_parameter
%match_pretreatment
%match_main
%note
%mend matchcc;[/code:3rnk6uwp]作者: shiyiming 时间: 2010-4-24 20:15 标题: Re: 求助:按照指定条件随机配对 <!-- s:!: --><img src="{SMILIES_PATH}/icon_exclaim.gif" alt=":!:" title="Exclamation" /><!-- s:!: --> <!-- s:!: --><img src="{SMILIES_PATH}/icon_exclaim.gif" alt=":!:" title="Exclamation" /><!-- s:!: -->
thanks a lot for hopewell!!作者: shiyiming 时间: 2010-4-24 23:59 标题: Re: 求助:按照指定条件随机配对 就这个问题,进一步的挖掘,
条件如下;
[color=#FF0000:1qjxjc71]0、匹配比例为1:2,就是病例组一个记录对应2个对照组记录;[/color:1qjxjc71]
1、病例库与对照库v4相同的匹配;
2、如果是对照库有好几个符合病例库中的一条记录,则要随机选择;
3、已经配好的要生成匹配号;
4、对照库中的记录一旦参加匹配,就不参加下一次的匹配;
5、病例库中的记录没有与之符合的匹配,则输出到另外一个数据集;