/******************* URBAN INSTITUTE MACRO LIBRARY *********************
: Dup_check
Macro: Check data set for duplicate obs. by key variables and
Description
print out duplicates.
: Open code
Use
: Peter Tatian
Author
***********************************************************************/
Dup_check(
%macro data=, /** Input data set **/
by=, /** BY variable list **/
id=, /** ID variables to include in list of duplicate obs. **/
out=_dup_check, /** Output data set containing duplicate obs. (def. _DUP_CHECK is deleted at end of macro execution) **/
listdups=Y, /** Print list of duplicate obs. to output (Y/N) **/
printnumdups=Y, /** Show number of duplicate obs. for each BY group (Y/N) **/
count=dup_check_count, /** Name of macro variable where no. of duplicate obs. will be saved **/
quiet=N, /** Suppress printing of final duplicate count to LOG (Y/N) **/
debug=N /** Debug mode (MPRINT on) (Y/N) **/
);
/*************************** USAGE NOTES *****************************
:
SAMPLE CALLdup_check( data=test, by=byvar1 byvar2, id=firstname lastname )
%in test that have same values of byvar1 and
prints observations
byvar2*********************************************************************/
/*************************** UPDATE NOTES ****************************
*********************************************************************/
***** ***** ***** MACRO SET UP ***** ***** *****;
%
%local by_data id_data by_freq i item1 item2 itme3 coalesce where_cond;
push_option( mprint, quiet=y )
%
%if %mparam_is_yes( &debug ) %then %do;
options mprint;
%end;%else %do;
options nomprint;
%end;
***** ***** ***** ERROR CHECKS ***** ***** *****;
%
%if %length( &data ) = 0 %then %do;
err_mput( macro=Dup_check, msg=You must provide a data set specification in the DATA= option. Macro exiting. )
%note_mput( macro=Dup_check, msg=Macro exiting. )
%
%goto exit;
%end;
%if &by = %then %do;
err_mput( macro=Dup_check, msg=You must provide one or more BY variables in the BY= option. )
%note_mput( macro=Dup_check, msg=Macro exiting. )
%
%goto exit;
%end;
***** ***** ***** MACRO BODY ***** ***** *****;
%
** Create comma-separated variable lists **;
%
%let by_data = %ListChangeDelim( &by, prefix=data., quiet=y );
%let id_data = %ListChangeDelim( &id, prefix=data., quiet=y );
%let by_freq = %ListChangeDelim( &by, prefix=freq., quiet=y );
** Create coalesce() lists and where condition for join **;
%
= 1;
%let i %let item1 = %scan( &by_data, &i, %str(,) );
%let item2 = %scan( &by_freq, &i, %str(,) );
%let item3 = %scan( &by, &i, %str( ) );
= ;
%let coalesce = ;
%let where_cond
%do %while ( %length( &item1 ) > 0 );
= &coalesce coalesce( &item1, &item2 ) as &item3;
%let coalesce = &where_cond &item1 = &item2;
%let where_cond
%let i = %eval( &i + 1 );
%let item1 = %scan( &by_data, &i, %str(,) );
%let item2 = %scan( &by_freq, &i, %str(,) );
%let item3 = %scan( &by, &i, %str( ) );
%if &item1 ~= %then %let where_cond = &where_cond and;
%if &item1 ~= %then %let coalesce = &coalesce,;
%end;
** Find duplicate obs. **;
%
proc sql;&out as
create table &coalesce,
select %if &id_data ~= %then %do;
&id_data,
%end;
freq._freq_ from &data as data,
(&by_data, count(*) as _freq_ from
select &data as data
&by_data
group by
) as freqwhere ( &where_cond ) and freq._freq_ > 1
&by_data
order by %if &id_data ~= %then %do;
&id_data
,
%end;
;
** Count no. of duplicate obs. **;
%
proc sql noprint;count(*) into :_dup_check_count
select &out;
from
quit;
%if &count ~= %then %do;
&count;
%global &count = &_dup_check_count;
%let
%end;
** Report findings to log (if requested) **;
%
%if %mparam_is_no( &quiet ) %then %do;
%note_mput( macro=Dup_check, msg=&_dup_check_count duplicate observations found in data set %upcase(&data) )
%note_mput( macro=Dup_check, msg=for BY variables %upcase(&by). )
%end;
** Print duplicate obs. (if requested) **;
%
%if %mparam_is_yes( &listdups ) %then %do;
=&out noobs
proc print data%if %mparam_is_yes( &printnumdups ) %then %do;
='Number of duplicates = '
n
%end;
;&by;
id &by;
by &id;
var
run;
%end;
***** ***** ***** CLEAN UP ***** ***** *****;
%
** Clean up temporary data set **;
%
%if &out = _dup_check %then %do;
=work nolist nowarn;
proc datasets library/memtype=data;
delete _dup_check
quit;
%end;
pop_option( mprint, quiet=y )
%
:
%exit
%mend Dup_check;
/************************ UNCOMMENT TO TEST ***************************
"Dup_check: UI SAS Autocall Macro Library";
title "K:\Metro\PTatian\UISUG\Uiautos";
filename uiautos =(uiautos sasautos);
options sasautos
options nocenter;
options mprint nosymbolgen nomlogic;=i;
options msglevel
data test;$ 12;
length firstname lastname
input byvar1 byvar2 firstname lastname;
datalines;1 1 Albert Arbor
1 2 Barbara Bouche
1 3 Chelsea Ciel
1 4 Dagmar Desire
1 5 Euripides Erudite
1 1 Alberta Amie
1 3 Constantine Clochard
1 4 Dagy Deauville
1 3 Cedille Cellule
;
run;=test;
proc print datadup_check( )
%dup_check( data=test )
%dup_check( data=test, by=byvar1 byvar2, id=firstname lastname, debug=y )
%=&dup_check_count;
%put dup_check_count/**********************************************************************/
Check for Duplicates
Purpose: Check data set for duplicate observations by key variables and print out duplicates.