|
|
Re: 请教高手:多列带有重复的ID合并成一列
Here is my two cents. Haven't got the chance to test it on huge datasets though. <!-- s:D --><img src="{SMILIES_PATH}/icon_biggrin.gif" alt=":D" title="Very Happy" /><!-- s:D -->
[code:297rq356]data a1;
input id1 id2 id3 id4;
cards;
1 11 21 31
1 12 22 31
2 11 22 32
2 13 21 32
3 14 23 32
4 11 22 33
5 6 7 8
5 0 0 0
4 20 30 40
9 4 3 8
10 0 5 6
100 100 100 100
;
run;
data a1;
set a1;
rowid = _n_;
run;
proc sql;
create table matches as
select a.rowid as rowid_a,
a.id1 as id1_a,
a.id2 as id2_a,
a.id3 as id3_a,
a.id4 as id4_a,
b.rowid as rowid_b,
b.id1 as id1_b,
b.id2 as id2_b,
b.id3 as id3_b,
b.id4 as id4_b
from a1 as a,
a1 as b
where a.rowid ^= b.rowid and
(a.id1 = b.id1 or
a.id2 = b.id2 or
a.id3 = b.id3 or
a.id4 = b.id4
);
quit;
proc sql;
create table matches as
select distinct
1 as id,
min(rowid_a, rowid_b) as rowid1,
max(rowid_a, rowid_b) as rowid2
from matches;
create table matches as
select id,
rowid1,
rowid2
from matches
union all
select 1 as id,
a.rowid as rowid1,
a.rowid as rowid2
from a1 as a
left join
matches as b
on a.rowid = b.rowid1 or
a.rowid = b.rowid2
where b.rowid1 is null;
quit;
data results(keep = rowsetid rowset);
set matches;
by id;
retain rowsetid rowset newset;
length rowset $256; /* Adjust this length if necessary. */
if first.id then do;
rowsetid = 1;
newset = 1;
end;
if newset = 1 then do;
rowset = ', ' || strip(put(rowid1, best.)) || ',';
if index(rowset, ', ' || strip(put(rowid2, best.)) || ',') = 0 then rowset = strip(rowset) || ' ' || strip(put(rowid2, best.)) || ',';
newset = 0;
end; else do;
if index(rowset, ', ' || strip(put(rowid1, best.)) || ',') > 0 and index(rowset, ', ' || strip(put(rowid2, best.)) || ',') = 0 then
rowset = strip(rowset) || ' ' || strip(put(rowid2, best.)) || ','; else
if index(rowset, ', ' || strip(put(rowid2, best.)) || ',') > 0 and index(rowset, ', ' || strip(put(rowid1, best.)) || ',') = 0 then
rowset = strip(rowset) || ' ' || strip(put(rowid1, best.)) || ','; else
if index(rowset, ', ' || strip(put(rowid1, best.)) || ',') = 0 and index(rowset, ', ' || strip(put(rowid2, best.)) || ',') = 0 then do;
rowset = substr(rowset, 3, length(rowset) - 3);
output;
rowsetid = rowsetid + 1;
rowset = ', ' || strip(put(rowid1, best.)) || ',';
if index(rowset, ', ' || strip(put(rowid2, best.)) || ',') = 0 then rowset = strip(rowset) || ' ' || strip(put(rowid2, best.)) || ',';
end;
end;
if last.id then do;
rowset = substr(rowset, 3, length(rowset) - 3);
output;
end;
run;[/code:297rq356] |
|