/******************* URBAN INSTITUTE MACRO LIBRARY *********************
Macro: Dissimilarity_index
Description: Autocall macro to calculate the dissimilarity index between
two populations.
Use: Open code
Author: Peter Tatian
***********************************************************************/
/***** Macro %Dissimilarity_index() - Calculates dissimilarity index *****/
%macro Dissimilarity_index(
data=, /** Input data set **/
out=, /** Output data set & var name (optional) **/
varA=, /** Population A **/
varB=, /** Population B **/
by=, /** By variable for grouping results (optional) **/
print=y /** Print results (Y/N) **/
);
/*************************** USAGE NOTES *****************************
SAMPLE CALL:
%Dissimilarity_index( data=Test, varA=A, varB=B, by=geo, out=Result )
calculates dissimilarity index for populations A and B for units
defined by var GEO, results saved to Result data set
*********************************************************************/
/*************************** UPDATE NOTES ****************************
07/01/11 Peter A. Tatian
*********************************************************************/
%***** ***** ***** MACRO SET UP ***** ***** *****;
%local input_data varDI;
%if &out = %then %do;
%let out = _di_output;
%let varDI = _DI;
%end;
%else %do;
%let varDI = %DSNameOnly( &out );
%end;
%***** ***** ***** ERROR CHECKS ***** ***** *****;
%***** ***** ***** MACRO BODY ***** ***** *****;
** Calculate total group A and B populations **;
%if &by ~= %then %do;
%let input_data = _di_sorted;
proc sort data=&data out=&input_data;
by &by;
run;
%end;
%else %do;
%let input_data = &data;
%end;
proc means data=&input_data noprint nway;
%if &by ~= %then %do;
class &by;
%end;
var &varA &varB;
output out=_di_totals sum=&varA._s &varB._s;
run;
/***
proc print data=_di_totals;
id &by;
title3 "_di_totals";
run;
***/
** Merge population totals to individual unit data **;
data _di_calc;
%if &by ~= %then %do;
merge &input_data (keep=&by &varA &varB) _di_totals;
by &by;
%end;
%else %do;
set &input_data (keep=&varA &varB);
if _n_ = 1 then set _di_totals;
%end;
** Calculate dissimilarity index formula for each unit **;
if &varA._s > 0 and &varB._s > 0 then
&varDI = 0.5 * abs( ( &varA / &varA._s ) - ( &varB / &varB._s ) );
label &varDI = "Dissimilarity index of &varA vs. &varB";
run;
/***
proc print data=_di_calc (obs=15);
var &by &varA &varB &varA._s &varB._s &varDI;
title3 "_di_calc";
run;
***/
** Sum DI for all units **;
proc means data=_di_calc noprint nway;
%if &by ~= %then %do;
class &by;
%end;
var &varDI;
output out=&out (drop=_type_ _freq_) sum= ;
run;
%if %mparam_is_yes( &print ) %then %do;
** Print DI results **;
proc print data=&out label noobs;
%if &by ~= %then %do;
id &by;
%end;
var &varDI;
run;
%end;
%***** ***** ***** CLEAN UP ***** ***** *****;
** Cleanup temporary data sets **;
proc datasets library=work memtype=(data) nolist nowarn;
delete _di_:;
quit;
%mend Dissimilarity_index;
/************************ UNCOMMENT TO TEST ***************************
** Locations of SAS autocall macro libraries **;
filename uiautos "K:\Metro\PTatian\UISUG\Uiautos";
options sasautos=(uiautos sasautos);
options nocenter;
options mprint nosymbolgen nomlogic;
data Test;
input id geo A B;
datalines;
1 1 10 20
2 1 50 0
3 1 40 40
4 1 0 20
5 2 80 40
6 2 20 90
7 2 60 50
8 2 30 0
9 2 0 0
10 2 40 60
11 3 0 50
12 3 0 70
;
run;
proc print data=Test;
title2 'Input data set: TEST';
run;
title2;
%Dissimilarity_index( data=Test, varA=A, varB=B, by=geo, out= )
%Dissimilarity_index( data=Test, varA=A, varB=B, by=, out=work.di_out, print=n )
%File_info( data=di_out )
/**********************************************************************/Dissimilarity Index
Purpose: Autocall macro to calculate the dissimilarity index between two populations.