/*SAS Code to Calculate GPQI-2016 Total and Component Scores June 1, 2018, version 1.0 The Grocery Purchase Quality Index-2016, a tool for assessing grocery food purchase quality, is based on expenditure shares for food categories found in the USDA Food Plans. It includes eight adequacy components (Total Vegetables, Greens & Beans, Total Fruit, Whole Fruit, Whole Grains, Dairy, Total Protein Foods, and Seafood & Nuts) and three moderation components (Refined Grains, Sweets & Sodas, and Processed Meats). This SAS code was developed by Philip J. Brewster with assistance from John F. Hurdle and Patricia M. Guenther. Contact: phil.brewster@utah.edu Copyright 2018 Philip J. Brewster, John F. Hurdle and Patricia M. Guenther This work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. Please view the license at http://creativecommons.org/licenses/by-nc-sa/4.0/. *** THE DEFAULT DISPLAY SHOWS DETAILED OUTPUT FROM MULTIPLE PROCs. THIS CAN BE SUPRESSED AS NEEDED, E.G., COMMENTING OUT THE ODS GRAPHICS ON/OFF CODE BLOCKS *** */ /* define paths and file names for input and output data sets. >>> These examples are in Microsoft Windows filepath format. <<< */ FILENAME input_HH "\.csv"; FILENAME scores "\results\gpqi2016_scores_per_HHNUM.csv"; FILENAME stats "\results\gpqi2016_score_stats.xlsx"; /* input standardized expenditure shares (29 rows) and outputs as 1 row array, z1-z29 */ data expshr_expected ; INPUT fp_avg_exp ; DATALINES; 6.22052987499022 2.85693519027631 1.85939270005858 5.44131358789183 2.64911637904211 5.78485222298546 2.36949443575892 5.95830915727835 8.91026870968573 14.8964923326386 2.40746203010892 1.04321710091228 11.8970250605619 0.753954043748406 0.448252617310133 5.86699062891241 5.14199873736087 5.50631614812002 0.507376460886826 3.64796064097102 0.293677277118281 1.19369882350403 1.09030415970205 0.126784375848355 1.28577734107721 0.443657064946841 1.14108448555025 0.094364417151258 0.16422804251221 ; run; data expshr_expected(keep=hhnum z1-z29 ); set expshr_expected; format hhnum z1-z29 BEST12. ; retain z1-z29 0.00000; array z{*} z1-z29 _NUMERIC_; hhnum =.; z{_N_} = fp_avg_exp; if _N_ = 29 then output; run; /* import CSV input file (required) with aggregated observed expenditures in US dollars for each of 29 Food Plan categories, by household ID (hhnum), named as c1-c31 in array; positions 1-29 correspond to the USDA Food Plan categories listed in Table 1 of the README file; position 30 is for bottled water purchases; position 31 (optional) is for non-Food Plan expenditures or set to 0.00 */ proc import datafile=input_HH dbms=csv out=SAS_HH_spend replace; run; /* aggregate household food expenditures in c1-c30 ($$) to obtain total 'fpspend' per household (= denominator for expenditure shares) */ data aggregated_HH_obs(keep=hhnum c1-c31 fpspend totspend); set SAS_HH_spend ; format c1-c31 fpspend totspend BEST12.; retain c1-c31 fpspend totspend 0.0000 ; array c{*} c1-c31 _NUMERIC_ ; fpspend = 0.0000; totspend = 0.0000; /* sum per-category expenditures for expenditure share denominator = total expenditures within Food Plan category schema 1-29 + position 30 for water */ do ix = 1 to 30; fpspend = c{ix} + fpspend; end; totspend = fpspend + c31; run; ods graphics on; title 'Descriptive statistics for household expenditures per category with totals ($$)'; proc univariate data=aggregated_HH_obs plot; var c1-c31 fpspend totspend; run; ods graphics off; data ratio_model(keep=hhnum fpspend c1-c30 x1-x30 y1-y29 r1-r29 ); set expshr_expected aggregated_HH_obs; format x1-x30 y1-y29 r1-r29 xO yE BEST12.; retain x1-x30 y1-y29 r1-r29 0.000 seqn 0; array c{*} c1-c30 _NUMERIC_ ; array x{*} x1-x30 _NUMERIC_; array z{*} z1-z29 _NUMERIC_ ; array y{*} y1-y29 _NUMERIC_; array r{*} r1-r29 _NUMERIC_; /* import standardized expenditure share row and retain as y1-y29 array */ if _N_ = 1 then do; do ix = 1 to 29; y{ix} = z{ix} ; end; end; else do; /* process household summary records and compute observed expenditure share in x1-x29 array */ do ix = 1 to 30; x{ix} = 0.00000; if c{ix} > 0.000 then do; x{ix} = ( c{ix} / fpspend ) * 100; end; end; /* process household summary records and compute base ratio of observed/standardized expenditure share in r1-r29 */ do ix = 1 to 29; r{ix} = 0.00000; if x{ix} > 0.00000 and y{ix} > 0.00000 then do; xO = (x{ix}/100); yE = (y{ix}/100); r{ix} = xO / yE ; end; end; output ratio_model; end; run; ods graphics on; title 'Descriptive statistics for observed household expenditure shares per category (%)'; proc univariate data=ratio_model plot; var x1-x30 ; run; ods graphics off; /* recast USDA Food Plan categories (1-29) into 11 components for GPQI-2016 scoring: x1-x29 --> xgpqi1-xgpqi11 y1-y29 --> ygpqi1-ygpqi11 rgpqi1-rgpqi11 = xgpqi1-xgpqi11 / ygpqi1-ygpqi11 */ data ratio_model_GPQI(drop=ix); set ratio_model end=eof; format xgpqi1-xgpqi11 ygpqi1-ygpqi11 rgpqi1-rgpqi11 BEST12. ; retain xgpqi1-xgpqi11 ygpqi1-ygpqi11 rgpqi1-rgpqi11 0.0000; array xgpqi{*} xgpqi1-xgpqi11 _NUMERIC_ ; array ygpqi{*} ygpqi1-ygpqi11 _NUMERIC_ ; array rgpqi{*} rgpqi1-rgpqi11 _NUMERIC_; label fpspend = 'Total household food expenditures ($$)' xgpqi1 ='Total Vegetables expenditure share' xgpqi2 ='Greens and Beans expenditure share' xgpqi3 ='Total Fruit expenditure share' xgpqi4 ='Whole Fruit expenditure share' xgpqi5 ='Whole Grains expenditure share' xgpqi6 ='Dairy expenditure share' xgpqi7 ='Total Protein Foods expenditure share' xgpqi8 ='Seafood and Nuts expenditure share' xgpqi9 ='Refined Grains expenditure share' xgpqi10 ='Sweets and Sodas expenditure share' xgpqi11 ='Processed Meats expenditure share' ygpqi1 ='Total Vegetables standardized exp share' ygpqi2 ='Greens and Beans standardized exp share' ygpqi3 ='Total Fruit standardized exp share' ygpqi4 ='Whole Fruit standardized exp share' ygpqi5 ='Whole Grains standardized exp share' ygpqi6 ='Dairy standardized exp share' ygpqi7 ='Total Protein Foods standardized exp share' ygpqi8 ='Seafood and Nuts standardized exp share' ygpqi9 ='Refined Grains standardized exp share' ygpqi10 ='Sweets and Sodas standardized exp share' ygpqi11 ='Processed Meats standardized exp share' rgpqi1 ='Total Vegetables base ratio' rgpqi2 ='Greens and Beans base ratio' rgpqi3 ='Total Fruit base ratio' rgpqi4 ='Whole Fruit base ratio' rgpqi5 ='Whole Grains base ratio' rgpqi6 ='Dairy base ratio' rgpqi7 ='Total Protein Foods base ratio' rgpqi8 ='Seafood and Nuts base ratio' rgpqi9 ='Refined Grains base ratio' rgpqi10 ='Sweets and Sodas base ratio' rgpqi11 ='Processed Meats base ratio' ; do ix = 1 to 11; ygpqi{ix} = 0.0000; end; /* total vegetables */ ygpqi1 = (y5 + y6 + y7 + y8 + y9); /* greens and beans */ ygpqi2 = (y6 + y8); /* total fruit */ ygpqi3 = (y10 + y11) ; /* whole fruit*/ ygpqi4 = y10; /* whole grains */ ygpqi5 = (y1 + y2 + y3) ; /* dairy */ ygpqi6 = (y12 + y13 + y14) ; /* total protein foods */ ygpqi7 = (y16 + y17 + y18 + y20 + y21) ; /* seafood and nuts */ ygpqi8 = ( y18 + y20 ) ; /* refined grains */ ygpqi9 = y4; /* sugars and sweets */ ygpqi10 = (y15 + y25 + y26) ; /* processed meats */ ygpqi11 = y19; do ix = 1 to 11; xgpqi{ix} = 0.0000; rgpqi{ix} = 0.0000; end; /* total vegetables */ xgpqi1 = (x5 + x6 + x7 + x8 + x9); if xgpqi1 > 0.00 then rgpqi1 = xgpqi1 / ygpqi1; /* greens and beans */ xgpqi2 = (x6 + x8); if xgpqi2 > 0.00 then rgpqi2 = xgpqi2 / ygpqi2; /* total fruit */ xgpqi3 = (x10 + x11) ; if xgpqi3 > 0.00 then rgpqi3 = xgpqi3 / ygpqi3; /* whole fruit */ xgpqi4 = x10; rgpqi4 = r10; /* whole grains */ xgpqi5 = (x1 + x2 + x3); if xgpqi5 > 0.00 then rgpqi5 = xgpqi5 / ygpqi5; /* dairy */ if x12 > y12 then x12 = y12; /*set regular fat dairy to Food Plan limit w/ std exp share*/ if x14 > y14 then x14 = y14; /*set cheeses to Food Plan limit w/ std exp share*/ xgpqi6 = (x12 + x13 + x14); if xgpqi6 > 0.00 then rgpqi6 = xgpqi6 / ygpqi6 ; /* total protein foods */ xgpqi7 = (x16 + x17 + x18 + x20 + x21); if xgpqi7 > 0.00 then rgpqi7 = xgpqi7 / ygpqi7; /* seafood and nuts */ xgpqi8 = ( x18 + x20 ); if xgpqi8 > 0.00 then rgpqi8 = xgpqi8 / ygpqi8 ; /* refined grains */ xgpqi9 = x4; if xgpqi9 > 0.00 then rgpqi9 = xgpqi9 / ygpqi9 ; /* sweets and sodas */ xgpqi10 = (x15 + x25 + x26); if xgpqi10 > 0.00 then rgpqi10 = xgpqi10 / ygpqi10 ; /* processed meats */ xgpqi11 = x19; if xgpqi11 > 0.00 then rgpqi11 = xgpqi11 / ygpqi11 ; output ratio_model_GPQI ; run; ods graphics on; title 'Descriptive statistics for component-level observed expenditure shares (%) and base ratios (0-1.0)'; proc univariate data=ratio_model_GPQI plot; var xgpqi1-xgpqi11 rgpqi1-rgpqi11 ; run; ods graphics off; /* score the GPQI-2016 using the gpqi component-level ratios rgpqi1-rgpqi11 for the first 8 adequacy components = ratio * maximum points per component */ data gpqi_scoring(drop=ix max5 max10); set ratio_model_GPQI; format score1-score11 totscore max5 max10 BEST12.; retain score1-score11 totscore 0 max5 5.0 max10 10.0; array score{*} score1-score11 _NUMERIC_ ; label score1 ='Total Vegetables score' score2 ='Greens and Beans score' score3 ='Total Fruit score' score4 ='Whole Fruit score' score5 ='Whole Grains score' score6 ='Dairy score (adjusted)' score7 ='Total Protein Foods score' score8 ='Seafood and Nuts score' score9 ='Refined Grains score' score10 ='Sweets and Sodas score' score11 = 'Processed Meats score' totscore = 'Total GPQI-2016 score' ; totscore = 0.00 ; do ix = 1 to 11; score{ix} = 0.00 ; end; /*score total vegetables -- 5 points maximum */ score1 = rgpqi1 * max5; if score1 > max5 then score1 = max5; /* score greens and beans -- 5 points maximum */ score2 = rgpqi2 * max5; if score2 > max5 then score2 = max5; /* score total fruit -- 5 points maximum */ score3 = rgpqi3 * max5; if score3 > max5 then score3 = max5; /* score whole fruit -- 5 points maximum */ score4 = rgpqi4 * max5; if score4 > max5 then score4 = max5; /* score whole grains -- 10 points maximum */ score5 = rgpqi5 * max10; if score5 > max10 then score5 = max10; /* score dairy -- 10 points maximum */ score6 = rgpqi6 * max10; if score6 > max10 then score6 = max10; /* score total protein foods -- 5 points maximum */ score7 = rgpqi7 * max5; if score7 > max5 then score7 = max5; /* score seafood and nuts -- 5 points maximum */ score8 = rgpqi8 * max5; if score8 > max5 then score8 = max5; /* subtotal 8 adequacy scores only */ do ix = 1 to 8; totscore = totscore + score{ix}; end; run; /* use inverse ratio moderation scoring algorithm for remaining 3 GPQI-2016 component scores */ /* limits are set at the population estimates for the ratio values at the 85th percentile */ data gpqi2016_scores(drop=min max5 max10 LIM_RG LIM_AS LIM_PM); set gpqi_scoring; format min max5 max10 LIM_RG LIM_AS LIM_PM BEST12. ; retain max10 10.0 max5 5.0 LIM_RG 5.0 LIM_AS 14.9 LIM_PM 18.9 min 0.0; array score{*} score1-score11 _NUMERIC_ ; /* score refined grains -- 10 points maximum */ score9=Max10*(max(0.0,min(1.0,1.0-((rgpqi9-1.0)/(LIM_RG-1.0)))) ); / *score added sugars -- 10 points maximum */ score10=Max10*(max(0.0,min(1.0,1.0-((rgpqi10-1.0)/(LIM_AS-1.0)))) ); /*score processed meats -- 5 points maximum*/ score11=Max5*(max(0.0,min(1.0,1.0-((rgpqi11-1.0)/(LIM_PM-1.0)))) ); totscore = totscore + score9 + score10 + score11; run; /* have SAS calculate the descriptive statistics (mean/SE with CIs and median/IQR per component score and for the total score) */ proc surveymeans data=gpqi2016_scores plots=none mean stderr clm percentile=(25 50 75) ; var score1-score11 totscore ; ods output Statistics=rept_stats; title 'Component and total score means(se) and medians(iqrs) for GPQI-2016'; run; /* output the descriptive statistics (mean/SEs and median/IQRs) as an Excel file (optional) */ proc export data=rept_stats file=stats dbms=xlsx replace; run; /* output household respondent ID with GPQI-2016 scores; keep sample weights for National Food Acquisition and Purchase Survey data or similar complex survey designs only */ data rept_gpqi(keep=hhnum score1-score11 totscore); set gpqi2016_scores; run; /* save scores per household as CSV for further analysis; can also output to Stata formats or other suitable file types using the matching DBMS= option in SAS */ proc export data=rept_gpqi file=scores dbms=csv replace; run; /* end of GPQI-2016 scoring program */