/******************* URBAN INSTITUTE MACRO LIBRARY *********************
: Transform_geo_data
Macro: Autocall macro to convert data from an original geographic
Description
level to new geography using a normalized weighting file.
: Open code
Use
: Peter Tatian
Author
***********************************************************************/
Transform_geo_data(
%macro dat_ds_name = , /** Input data set library and name **/
dat_org_geo = , /** Original geo id in input data set **/
dat_new_geo = , /** Specify an alternate aggregation geo var from input data set when a weighting file is not being used (optional) **/
dat_count_vars = , /** Count vars in input data set **/
dat_prop_vars = , /** Proportion vars in input data set **/
dat_id_vars = , /** ID variables from input data set to add to output data set observations (optional) **/
dat_count_moe_vars = , /** Margin of error vars for transformed count vars (optional) **/
calc_vars = , /** SAS statements to define calculated vars based on transformed data (optional) **/
calc_vars_labels = , /** Variable labels for vars defined in calc_vars= (optional) **/
wgt_ds_name = , /** Weighting file data set library and name **/
wgt_org_geo = , /** Original geo id in weighting file **/
wgt_new_geo = , /** New geo id in weighting file **/
wgt_new_geo_fmt = , /** Format for weighting file geo var, will be added to output data set (optional) **/
wgt_id_vars = , /** Additional geo IDs in weighting file (optional) **/
wgt_wgt_var = , /** Alias for wgt_count_var= **/
wgt_count_var = , /** Name of weight variable to use for counts **/
wgt_prop_var = , /** Name of weight variable to use for proportions **/
out_ds_name = , /** Output data set library and name **/
out_ds_label = , /** Label for output data set (optional) **/
show_warnings = 10, /** Number of nonmatch warnings to show (optional, def. 10) **/
keep_nonmatch = N, /** Keep nonmatching obs in output data (optional, Y/N, def. N) **/
print_diag = Y, /** Print diagnostic table at end if input and output count sums do not match (optional, Y/N, def. Y) **/
max_diff = 0.00001, /** Maximum difference allowed for diagnostic check of count variable sums (optional, def. 0.00001) **/
full_diag = N, /** Print full diagnostics (optional, Y/N, def. N) **/
mprint = N /** Sets printing of resolved macro code (optional, Y/N, def. N) **/
);
/*************************** USAGE NOTES *****************************
:
SAMPLE CALLTransform_geo_data(
%dat_ds_name = Test_dat,
dat_org_geo = geo_in,
dat_count_vars = count1 count2,
dat_prop_vars = prop1,
calc_vars =
prop2 = 100 * ( count1 / count2 );
sum1 = count1 + count2; ,
calc_vars_labels =
prop2 = "100 * ( count1 / count2 )"
sum1 = "count1 + count2" ,
wgt_ds_name = Test_wgt,
wgt_org_geo = geo_in,
wgt_new_geo = geo_out,
wgt_id_vars = alt_id,
wgt_count_var = wt,
wgt_prop_var = wt,
out_ds_name = Test_out,
out_ds_label = Transform_geo_data macro test output data set
):
The following named parameters must be supplied= Input data set library and name
dat_ds_name = Original geo id in input data set
dat_org_geo = Weighting file data set library and name
wgt_ds_name = Original geo id in weighting file
wgt_org_geo = New geo id in weighting file
wgt_new_geo = Output data set library and name
out_ds_name
lists (or both) must be supplied:
At least one of the two variable = Count vars in input data set
dat_count_vars = Proportion vars in input data set
dat_prop_vars
:
One or more of the following weighting variables must be supplied based on the variable lists specified= Name of weight variable to use for counts
wgt_count_var = Alias for wgt_count_var=
wgt_wgt_var = Name of weight variable to use for proportions
wgt_prop_var :
The following parameters are optional= Specify an alternate aggregation geo var from input data set when a weighting file is not being used
dat_new_geo = ID variables from input data set to add to output data set observations
dat_id_vars = Margin of error vars for transformed count vars
dat_count_moe_vars = Label for output data set
out_ds_label = Format for weighting file geo var, will be added to output data set
wgt_new_geo_fmt = Additional geo IDs in weighting file
wgt_id_vars = SAS statements to define calculated vars based on transformed data
calc_vars = Variable labels for vars defined in calc_vars=
calc_vars_labels = Number of nonmatch warnings to show (def. 10)
show_warnings = Keep nonmatching obs in output data (Y/N, def. N)
keep_nonmatch = Print diagnostic table at end if input and output count sums do not match (Y/N, def. Y)
print_diag = Print full diagnostics (Y/N, def. N)
full_diag = Maximum difference allowed for diagnostic check of count variable sums (def. 0.00001)
max_diff = Sets printing of resolved macro code (Y/N, def. N)
mprint
:
NOTES
The weighting file is assumed to be normalized, that is, the valuesvariable (wgt_count_var=) must sum to 1 for each value of
of the count weight variable (wgt_org_geo=) and
the original geography variable (wgt_prop_var=) must sum to 1 for each
values of the proportion weight variable (wgt_new_geo=).
value of the new geography
Neither the weight file nor the input file needs to be sorted.
= parameter controls how obs. in the input data set
The keep_nonmatchin the weighting data set are
that do not have a matching geo ID =Y, the nonmatching obs are kept in the
handled. If keep_nonmatchin the input file.
output file and are given the same geo ID value as =N, the nonmatching obs are dropped.
If keep_nonmatch
in the SAS log
For each nonmatch, a warning message will be printed in show_warnings=. To suppress all nonmatch
up to the number specified =0.
warnings, set show_warnings
*********************************************************************/
/*************************** UPDATE NOTES ****************************
09/09/02 Peter A. Tatian
10/03/02 Macro now issues a nonmatch warning if the matching record
in the weight file has a missing value for the weight variable.
PAT08/11/03 Added calc_vars= and calc_vars_labels= options. PAT
12/30/03 Corrected warning message using wgt_org_geo. PAT
01/14/05 Suppress variable labels in diagnostic output. PAT
08/21/06 Added WGT_NEW_GEO_FMT= option.
Modified diagnostic output to use Proc Compare. PAT08/27/06 Added MAX_DIFF= option. Changed Proc Compare method to relative.
Updated macro log messages.08/28/06 Changed Compare method to Absolute. Added MPRINT= option.
12/18/07 Added dat_id_vars= parameter.
06/27/11 Added dat_count_moe_vars= parameter to support calculation of
aggregated margins of error.permitted. (Useful for margin of error
Omitting weight file now
aggregations without any weighting file.) Optional parameter = can be used to specify an alternate aggregation
DAT_NEW_GEO
geo from the input data set when a weighting file is not being
used. PAT07/09/11 Changed weighting for MOE calculation to simple weight
(previously was squared weight). 07/19/11 Corrected problem with "&out_ds_label] ~= " test. Replaced with
"%length( &out_ds_label ) > 0". PAT
07/14/12 Added wgt_count_var= and wgt_prop_var= parameters to specify
for counts and proportions. wgt_wgt_var=
separate weights for wgt_count_var=.
is an alias *********************************************************************/
***** ***** ***** MACRO SET UP ***** ***** *****;
%
%local macro version i v;
= Transform_geo_data;
%let macro = 7/14/12;
%let version
***** ***** ***** ERROR CHECKS ***** ***** *****;
%
***** ***** ***** MACRO BODY ***** ***** *****;
%
note_mput( macro=¯o, msg=Starting macro (version &version). )
%
%let mprint = %upcase( &mprint );
push_option( mprint )
%
%if %mparam_is_yes( &mprint ) %then %do;
options mprint;
%end;%else %do;
options nomprint;
%end;
%if &wgt_count_var = %then %let wgt_count_var = &wgt_wgt_var;
** Check parameters **;
%
%if &wgt_ds_name = %then %do;
note_mput( macro=¯o, msg=No weighting file specified (WGT_DS_NAME=). )
%
= &dat_new_geo;
%let wgt_new_geo
%end;%else %do;
%if &dat_new_geo ~= %then %do;
warn_mput( macro=¯o, msg=Weighting file specified so DAT_NEW_GEO=&dat_new_geo will be ignored. )
%= ;
%let dat_new_geo
%end;
%if ( ( &dat_count_vars ~= ) or ( &dat_count_moe_vars ~= ) ) and &wgt_count_var = %then %do;
err_mput( macro=¯o, msg=No weight variable specified for count variables (WGT_COUNT_VAR=). )
%
%goto exit;
%end;
%if &dat_prop_vars ~= and &wgt_prop_var = %then %do;
err_mput( macro=¯o, msg=No weight variable specified for proportion variables (WGT_PROP_VAR=). )
%
%goto exit;
%end;
%end;
= &calc_vars;
%put calc_vars
** Sort data for merging **;
%if &wgt_ds_name ~= %then %do;
proc sort =&wgt_ds_name (keep=&wgt_org_geo &wgt_new_geo &wgt_count_var &wgt_prop_var &wgt_id_vars )
data=_tgd_wgt_file;
out*where not missing( sum( of &wgt_count_var &wgt_prop_var ) );
%if &wgt_count_var ~= %then %do;
%if &wgt_prop_var ~= %then %do;
&wgt_count_var >= 0 or &wgt_prop_var >= 0;
where
%end;%else %do;
&wgt_count_var >= 0;
where
%end;
%end;%else %if &wgt_prop_var ~= %then %do;
&wgt_prop_var >= 0;
where
%end;&wgt_org_geo;
by
run;
%end;
proc sort =&dat_ds_name (keep=&dat_org_geo &dat_new_geo &dat_count_vars &dat_count_moe_vars
data&dat_prop_vars &dat_id_vars)
=_tgd_dat_file;
out&dat_org_geo;
by
run;
data _tgd_wgt_merg;
** Initialize warning count **;
_warn_ct (&show_warnings);
retain
%if &wgt_ds_name ~= %then %do;
** Transform using weighting file **;
merge_tgd_wgt_file (in=in_wf)
_tgd_dat_file (in=in_df rename=(&dat_org_geo=&wgt_org_geo));
&wgt_org_geo;
by
if not( in_wf ) then do;
if _warn_ct > 0 then do;
warn_put( macro=¯o, msg="Matching obs. not found in weight file: &dat_ds_name/"
%"&dat_org_geo=" &wgt_org_geo )
%if %mparam_is_yes( &keep_nonmatch ) %then %do;
note_put( macro=¯o, msg="Nonmatching obs. will be kept (KEEP_NONMATCH=Y)." )
%
%end;%else %do;
note_put( macro=¯o, msg="Nonmatching obs will be dropped (KEEP_NONMATCH=N)." )
%
%end;= _warn_ct - 1;
_warn_ct if _warn_ct = 0 then do;
note_put( macro=¯o, msg="No further nonmatch warnings will be printed because maximum reached (SHOW_WARNINGS=&show_warnings)." )
%
end;
end;
%if %mparam_is_yes( &keep_nonmatch ) %then %do;
%if &wgt_count_var ~= %then %do;
&wgt_count_var = 1;
%end;%if &wgt_prop_var ~= %then %do;
&wgt_prop_var = 1;
%end;&wgt_new_geo = &wgt_org_geo;
%end;%else %do;
delete;
%end;
end;
** Only retain obs in data file **;
if in_df;
drop _warn_ct;
%end;%else %do;
** Transform without weighting file **;
= _tgd_weight_dum;
%let wgt_wgt_var
set _tgd_dat_file;
%if &wgt_count_var ~= %then %do;
&wgt_count_var = 1;
%end;%if &wgt_prop_var ~= %then %do;
&wgt_prop_var = 1;
%end;
%end;
run;
%if %mparam_is_yes( &full_diag ) %then %do;
=_last_;
proc print data"_tgd_Wgt_merg";
title3
run;
title3;
%end;
%if &wgt_new_geo ~= %then %do;
=_tgd_wgt_merg;
proc sort data&wgt_new_geo;
by
run;
%end;
=_tgd_wgt_merg;
proc summary data%if ( &dat_count_vars ~= ) or ( &dat_count_moe_vars ~= ) %then %do;
&dat_count_vars &dat_count_moe_vars /weight=&wgt_count_var;
var
%end;%if &dat_prop_vars ~= %then %do;
&dat_prop_vars /weight=&wgt_prop_var;
var
%end;%if &dat_id_vars ~= or &wgt_id_vars ~= %then %do;
&dat_id_vars &wgt_id_vars;
id
%end;%if &wgt_new_geo ~= %then %do;
&wgt_new_geo;
by
%end;
output %if %length( &calc_vars ) > 0 or %length( &dat_count_moe_vars ) > 0 %then %do;
=_tgd_wgt_merg_sum (drop=_type_ _freq_)
out
%end;%else %do;
=&out_ds_name
outdrop=_type_ _freq_
(%if %length( &out_ds_label ) > 0 %then %do;
label="&out_ds_label"
%end;
)
%end;%if &dat_count_vars ~= %then %do;
sum( &dat_count_vars )=
%end;%if &dat_prop_vars ~= %then %do;
mean( &dat_prop_vars )=
%end;%if &dat_count_moe_vars ~= %then %do;
uss( &dat_count_moe_vars )=
%end;
; %if &wgt_new_geo_fmt ~= %then %do;
&wgt_new_geo &wgt_new_geo_fmt;
format
%end;
run;
%if %length( &calc_vars ) > 0 or %length( &dat_count_moe_vars ) > 0 %then %do;
&out_ds_name (
data %if %length( &out_ds_label ) > 0 %then %do;
label="&out_ds_label"
%end;
);
set _tgd_wgt_merg_sum;
** Calculated variables **;
&calc_vars ;
%if %length( &calc_vars_labels ) > 0 %then %do;
&calc_vars_labels;
label
%end;
%if &dat_count_moe_vars ~= %then %do;
** MOE variables **;
= 1;
%let i %let v = %scan( &dat_count_moe_vars, &i, %str( ) );
%do %until ( &v = );
&v = sqrt( &v );
%let i = %eval( &i + 1 );
%let v = %scan( &dat_count_moe_vars, &i, %str( ) );
%end;
%end;
run;
%end;
%if %mparam_is_yes( &full_diag ) %then %do;
=&out_ds_name;
proc print data"&out_ds_name";
title3
run;
title3;
%end;
%if %mparam_is_yes( &print_diag ) and &dat_count_vars ~= %then %do;
note_mput( macro=¯o, msg=Running diagnostic on count vars. )
%
/** Alternate diagnostic **/
=&dat_ds_name;
proc summary data&dat_count_vars;
var =_tgd_input sum=;
output out
=&out_ds_name;
proc summary data&dat_count_vars;
var =_tgd_output sum=;
output out
=_tgd_input compare=_tgd_output maxprint=(40,32000) nosummary
proc compare base=absolute criterion=&max_diff;
method&dat_count_vars;
var "*** Transform_geo_data(): Diagnostic";
title3 "*** Proc Compare should report that ""No unequal values were found,""";
title4 "*** unless keep_nonmatch=N and there were non-matching obs.";
title5 "*** BASE is input data set (&dat_ds_name), COMPARE is output data set (&out_ds_name).";
title6
run;
title3;
%end;%else %do;
note_mput( macro=¯o, msg=Diagnostic not run because PRINT_DIAG=N or DAT_COUNT_VARS= is empty. )
%
%end;
:
%exit
***** ***** ***** CLEAN UP ***** ***** *****;
%
=work memtype=(data) nolist nowarn;
proc datasets library:;
delete _tgd_
quit;
pop_option( mprint )
%
note_mput( macro=¯o, msg=Macro exiting. )
%
%mend Transform_geo_data;
/************************ UNCOMMENT TO TEST ***************************
**options mprint symbolgen mlogic;
options mprint;'Transform_geo_data: DCNIS, Macro Library';
title ** Autocall macros **;
"K:\Metro\PTatian\UISUG\Uiautos";
filename uiautos =(uiautos sasautos);
options sasautos
data Test_wgt;$ 8;
length geo_in geo_out alt_id
input geo_in geo_out alt_id wt;
cards;0.40
A A1 X 0.60
A A2 X 1.00
B B1 X 1.00
C B1 Y 1.00
D D Y 0.25
E E1 Z 0.75
E F1 Z 0.50
F E1 Z 0.50
F F1 Z 1.00
H H Z
;
run;=Test_wgt;
proc print data"Test_wgt";
title3
run;
data Test_dat;$ 8;
length geo_in
input geo_in count1 count2 prop1 ;
cards;100 1000 20
A 100 1000 40
B 100 1000 50
C 100 1000 60
D 100 1000 80
E 100 1000 100
F 100 1000 10
G
;
run;=Test_dat;
proc print data"Test_dat";
title3
run;** Invoke macro to convert count variables count1 and count2 and
** proportion variable prop1 in Test_dat from geographic level
** geo_in to geo_out. Weighting file is Test_wgt.
**;
Transform_geo_data(
%dat_ds_name = Test_dat,
dat_org_geo = geo_in,
dat_count_vars = count1 count2,
dat_prop_vars = prop1,
calc_vars =
prop2 = 100 * ( count1 / count2 );
sum1 = count1 + count2; ,
calc_vars_labels =
prop2 = "100 * ( count1 / count2 )"
sum1 = "count1 + count2" ,
wgt_ds_name = Test_wgt,
wgt_org_geo = geo_in,
wgt_new_geo = geo_out,
wgt_id_vars = alt_id,
wgt_wgt_var = wt,
wgt_prop_var = wt,
out_ds_name = Test_out,
out_ds_label = Transform_geo_data macro test output data set,
show_warnings = 1,
keep_nonmatch = Y,
print_diag = Y,
full_diag = Y,
mprint = Y
)=Test_out;
proc contents data
run;** Check that macro temporary data sets have been deleted **;
=work memtype=(data);
proc datasets library
quit;/**********************************************************************/
Geographic Crosswalk
Purpose: Autocall macro to convert data from an original geographic level to new geography using a normalized weighting file.