/*
Title: Combine Commuting Zone and Institution data for EVE metrics
Author: Jared Colston
Date Created: 9.5.2024
*/


*------------------------------------------------------------------------------|
**# [i] Package dependencies

	/*
		net install dm88_1.pkg
	*/
	
*------------------------------------------------------------------------------|
**# [ii] File pathways

	global project "T:\projects_current\roi_gates"									// Use project folder as main filepath
	global raw "$project\analysis\01_raw"
	global temp "$project\analysis\03_temp"
	global master "$project\analysis\04_master"

*------------------------------------------------------------------------------|
**# [iii] Notes



*------------------------------------------------------------------------------|
**# [1] Pull in Institution data and connect to state crosswalk

	use "$temp\scorecard_ipeds.dta", clear
	gen str_stfips = string(st_fips, "%02.0f")
	merge m:1 str_stfips using "$temp\ntl_state_earnings.dta", nogenerate
	drop ntl*

*------------------------------------------------------------------------------|
**# [2] Create State thresholds by race and gender

	local rnames5 "white black hisp asian aian oth" 
	local sex "male female" 

	/*
		All earnings thresholds based on those w/positive earnings age 22 to 40
		All thresholds for <2 year institutions set to 0.89*AA degree earnings
			per IHEP consultation w/Jeff Strohl (GT CEW based on SIPP analysis)

		T0: median HS earnings plus 10-year amortized net cost of attendance 
		by race and by gender.

		T1: median earnings for same credential (BA if preddeg is 4+ year; AS if preddeg is 2 or <2 year)
		by race and by gender

		T2: for comparison to female earnings. average male earnings for same credential

		T3: 60th percentile of earnings
		by race and by gender

		I define T* as missing if there are fewer than 20 obs in the earnings cell.
		A flag: i_* is equal to D if obs are less than 50 in the relevant ACS cell.
	*/

// State T0----------------------------------------------------------
	gen st_T0_all = st_mdearnposHS_rall_sall + adebtpay_ug10 if st_NearnposHS_rall_sall > 20 
		gen st_i_T0_all = "D" if st_NearnposHS_rall_sall < 50 							// no small cell issues here 

	gen st_T0_all_pell = st_mdearnposHS_rall_sall + adebtpay_ug10_pell if st_NearnposHS_rall_sall > 20 
		gen st_T0_all_free = st_mdearnposHS_rall_sall + adebtpay_ug10_free if st_NearnposHS_rall_sall > 20 
		gen st_T0_all_ld = st_mdearnposHS_rall_sall + adebtpay_ug10_ld if st_NearnposHS_rall_sall > 20 


	foreach r of local rnames5 { 
		gen st_T0_r`r' =  st_mdearnposHS_r`r'_sall + adebtpay_ug10 if st_NearnposHS_r`r'_sall > 20 
		gen st_i_T0_r`r' = "D" if st_NearnposHS_r`r'_sall < 50 
	} 

	foreach s of local sex { 
		gen st_T0_s`s' =  st_mdearnposHS_rall_s`s' + adebtpay_ug10 if st_NearnposHS_rall_s`s' > 20 
		gen st_i_T0_s`s' = "D" if st_NearnposHS_rall_s`s' < 50 
	} 

// State T1----------------------------------------------------------
	gen st_T1_all = st_mdearnposBA_rall_sall if preddeg == 3 & st_NearnposBA_rall_sall > 20 
		replace st_T1_all = st_mdearnposAA_rall_sall if preddeg == 2 & st_NearnposAA_rall_sall > 20 
		replace st_T1_all = 0.89 * st_mdearnposAA_rall_sall if preddeg == 1 & st_NearnposAA_rall_sall > 20 
	gen st_i_T1_all = "D" if st_NearnposBA_rall_sall < 50 & preddeg == 3 
		replace st_i_T1_all = "D" if st_NearnposAA_rall_sall < 50 & inlist(preddeg,1,2) 

	foreach r of local rnames5 { 
		gen st_T1_r`r' =  st_mdearnposBA_r`r'_sall if preddeg == 3 & st_NearnposBA_r`r'_sall > 20 
			replace st_T1_r`r' =  st_mdearnposAA_r`r'_sall if preddeg == 2 & st_NearnposAA_r`r'_sall > 20 
			replace st_T1_r`r' =  0.89 * st_mdearnposAA_r`r'_sall if preddeg == 1 & st_NearnposAA_r`r'_sall > 20 
		gen st_i_T1_r`r' = "D" if st_NearnposBA_r`r'_sall < 50 & preddeg == 3 
			replace st_i_T1_r`r' = "D" if st_NearnposAA_r`r'_sall < 50 & inlist(preddeg,1,2) 
	} 

	foreach s of local sex { 
		gen st_T1_s`s' =  st_mdearnposBA_rall_s`s' if preddeg == 3 & st_NearnposBA_rall_s`s' > 20 
			replace st_T1_s`s' =  st_mdearnposAA_rall_s`s' if preddeg == 2 & st_NearnposAA_rall_s`s' > 20 
			replace st_T1_s`s' =  0.89*st_mdearnposAA_rall_s`s' if preddeg == 1 & st_NearnposAA_rall_s`s' > 20 
		gen st_i_T1_s`s' = "D" if st_NearnposBA_rall_s`s' < 50 & preddeg == 3 
			replace st_i_T1_s`s' = "D" if st_NearnposAA_rall_s`s' < 50 & inlist(preddeg,1,2) 
	} 

// State T2----------------------------------------------------------
	gen st_T2_smale = st_mdearnposBA_rall_smale if preddeg == 3 & st_NearnposBA_rall_smale > 20 
		replace st_T2_smale = st_mdearnposAA_rall_smale if preddeg == 2 & st_NearnposAA_rall_smale > 20 
		replace st_T2_smale = 0.89 * st_mdearnposAA_rall_smale if preddeg == 1 & st_NearnposAA_rall_smale > 20 
	gen st_i_T2_smale = "D" if st_NearnposBA_rall_smale < 50 & preddeg == 3 
		replace st_i_T2_smale = "D" if st_NearnposAA_rall_smale < 50 & inlist(preddeg,1,2)

	gen st_T2_sfemale = st_T2_smale 
	gen st_i_T2_sfemale = st_i_T2_smale 
	gen st_T2_all = .
	gen st_i_T2_all = ""

	gen st_T2_rwhite = st_mdearnposBA_rwhite_sall if preddeg == 3 & st_NearnposBA_rwhite_sall > 20 
		replace st_T2_rwhite = st_mdearnposAA_rwhite_sall if preddeg == 2 & st_NearnposAA_rwhite_sall > 20 
		replace st_T2_rwhite = 0.89 * st_mdearnposAA_rwhite_sall if preddeg == 1 & st_NearnposAA_rwhite_sall > 20 
	gen st_i_T2_rwhite = "D" if st_NearnposBA_rwhite_sall < 50 & preddeg == 3 
		replace st_i_T2_rwhite = "D" if st_NearnposAA_rwhite_sall < 50 & inlist(preddeg,1,2)

	foreach var in black hisp aian asian oth {
		gen st_T2_r`var' = st_T2_rwhite
		gen st_i_T2_r`var' = st_i_T2_rwhite 
	}

// State T3----------------------------------------------------------
	gen st_T3_all = st_p60earnpos_rall_sall if st_Nearnpos_rall_sall > 20 
	gen st_i_T3_all =  "D" if st_Nearnpos_rall_sall < 50 									// no small cells here 

	foreach r of local rnames5 { 
		gen st_T3_r`r' =  st_p60earnpos_r`r'_sall if st_Nearnpos_r`r'_sall > 20 
		gen st_i_T3_r`r' = "D" if st_Nearnpos_r`r'_sall < 50 
	} 

	foreach s of local sex { 
		gen st_T3_s`s' =  st_p60earnpos_rall_s`s' if st_Nearnpos_rall_s`s' > 20 
		gen st_i_T3_s`s' =  "D" if st_Nearnpos_rall_s`s' < 50 
	} 

*------------------------------------------------------------------------------|
**# [3] State EVI and EVC measures

	/*
		Note EVI is the product of a success component (rough proxy for fraction passing T0) and an access component ;
		- success component (_s) is assumed the same for all subgroups
		- access component (_a) is share of subgroup out of 12 month unduplicated headcount 
	*/

	gen st_mdearn_T0 = md_earn_wne_p10 - st_T0_all 
	gen st_mdearn_T0_inc1 = md_earn_wne_inc1_p10 - st_T0_all 
	gen st_mdearn_T0_inc2 = md_earn_wne_inc2_p10 - st_T0_all
	gen st_mdearn_T0_inc3 = md_earn_wne_inc3_p10 - st_T0_all
	gen st_mdearn_T0_women = md_earn_wne_male0_p10 - st_T0_all 
	gen st_mdearn_T0_men = md_earn_wne_male1_p10 - st_T0_all

	replace st_mdearn_T0 = . if st_mdearn_T0 < 0
	replace st_mdearn_T0_inc1 = . if st_mdearn_T0_inc1 < 0
	replace st_mdearn_T0_inc2 = . if st_mdearn_T0_inc2 < 0
	replace st_mdearn_T0_inc3 = . if st_mdearn_T0_inc3 < 0
	replace st_mdearn_T0_women = . if st_mdearn_T0_women < 0
	replace st_mdearn_T0_men = . if st_mdearn_T0_men < 0

	gen st_EVC_tot = st_mdearn_T0 * count_wne_p10
	gen st_EVC_inc_low = st_mdearn_T0_inc1 * count_wne_inc1_p10
	gen st_EVC_inc_mid = st_mdearn_T0_inc2 * count_wne_inc2_p10
	gen st_EVC_inc_high = st_mdearn_T0_inc3 * count_wne_inc3_p10
	gen st_EVC_women = st_mdearn_T0_women * count_wne_male0_p10 
	gen st_EVC_men = st_mdearn_T0_women * count_wne_male1_p10 

	foreach var in tot inc_low inc_mid inc_high women men {
		replace st_EVC_`var' = . if st_EVC_`var' < 0
	}

	gen st_EVC_tot_p = st_EVC_tot / st_EVC_tot
	gen st_EVC_inc_low_p = st_EVC_inc_low / st_EVC_tot
	gen st_EVC_inc_mid_p = st_EVC_inc_mid / st_EVC_tot
	gen st_EVC_inc_high_p = st_EVC_inc_high / st_EVC_tot
	gen st_EVC_women_p = st_EVC_women / st_EVC_tot 
	gen st_EVC_men_p = st_EVC_men / st_EVC_tot 

	foreach var in tot inc_low inc_mid inc_high women men {
		replace st_EVC_`var'_p = 0 if st_EVC_`var'_p < 0
	}

	tabstat st_mdearn_T0 st_mdearn_T0_inc* st_mdearn_T0_women st_mdearn_T0_men st_EVC_*
	sum st_mdearn_T0 st_mdearn_T0_women st_mdearn_T0_inc* st_EVC_*, detail 

	sum st_EVC*p, detail
	gen st_hs_earnings = st_mdearnposHS_rall_sall
	order st_mdearnposAA_rall_sall st_mdearnposBA_rall_sall st_p60earnpos_rall_sall, last

// Clean up variables------------------------------------------------
	drop str_stfips-st_Nearnpos_rall_sall 
	drop d150* accredagency insturl sch_deg hcm2 cdr2*
	drop pcip01-pcip12 pcip14-pcip50Art pcip54

// Reorder vars to keep similar things together----------------------
	order sector, after(control) 
	order religious relaffil, after(womenonly) 
	order st_fips region latitude longitude, after(zip) 
	order ugds-ugds_unkn pnonwhite, after(sat_avg_all) 
	order msi religious prgmofr progreporter, after(relaffil) 
	order efytotlt-cs_tot, after(pnonwhite) 
	order pcipSTEM , after(pcip52Bus) 
	order inexpfte pctpell, after(sat_avg_all) 
	gen pctnonpell=1-pctpell
	order pctnonpell, after(pctpell)

	rename *, lower 																// rename variables to lowercase 

// Remove disaggregated thresholds for low enrollment schools -- race only 

	foreach var in asian black hisp white aian { 
		gen enroll_`var' = efy_`var' 												// use efy enrollment unless it is missing or exactly equal across groups 
		replace enroll_`var' = ugds_`var' if efy_`var' == .
	}

	gen enroll_oth = efy_oth

// Change names for consistency--------------------------------------

	rename enroll_asian enroll_rasian
	rename enroll_black enroll_rblack
	rename enroll_hisp enroll_rhisp
	rename enroll_white enroll_rwhite
	rename enroll_aian enroll_raian
	rename enroll_oth enroll_roth 

	foreach var in rasian rblack rhisp rwhite raian roth { 
		replace st_t0_`var' = . if enroll_`var' < .5 
		replace st_t1_`var' = . if enroll_`var' < .5 
		replace st_t2_`var' = . if enroll_`var' < .5 
		replace st_t3_`var' = . if enroll_`var' < .5 
	}

	foreach threshold in t0 t1 t2 t3 {
		foreach var in all rblack rhisp roth rasian raian rwhite smale sfemale { 
			gen st_i_`threshold'_`var'_number = "1" if st_i_`threshold'_`var' != ""  
				replace st_i_`threshold'_`var'_number = "" if st_i_`threshold'_`var' == "."
				replace st_i_`threshold'_`var'_number = "" if st_i_`threshold'_`var' == "D"
				destring st_i_`threshold'_`var'_number, replace
			replace st_`threshold'_`var' = . if st_i_`threshold'_`var'_number == 1 
			gen  st_pass_`threshold'_`var'_amount = md_earn_wne_p10-st_`threshold'_`var' if md_earn_wne_p10 != . & st_`threshold'_`var' != .  
				label variable st_pass_`threshold'_`var'_amount "Overall median earnings relative to `threshold' for `var'" 
				drop st_i_`threshold'_`var'_number 
				label var st_i_`threshold'_`var' "Small sample flag for n between 20-50 (for n less than 20 values are suppressed)"
		} 
	}

	rename md_earn_wne_male0_p10 md_earn_wne_p10_sfemale
	rename md_earn_wne_male1_p10 md_earn_wne_p10_smale 

	foreach threshold in t0 t1 t2 t3 {
		foreach var in smale sfemale { 
			replace st_i_`threshold'_`var' = "1" if st_i_`threshold'_`var' != ""  
			replace st_i_`threshold'_`var' = "" if st_i_`threshold'_`var' == "."
			replace st_i_`threshold'_`var' = "" if st_i_`threshold'_`var' == "D"
			 
			destring st_i_`threshold'_`var', gen(st_i_`threshold'_`var'_number) 
			replace st_`threshold'_`var'=. if st_i_`threshold'_`var'_number==1 
			gen  st_pass_`threshold'_`var'_amount_disag=md_earn_wne_p10_`var'-st_`threshold'_`var' if md_earn_wne_p10!=. & st_`threshold'_`var'!=.  
			notes st_pass_`threshold'_`var'_amount_disag: "Disaggregated median earnings for `var' student relative to dissaggregated `var' `threshold'" 
			gen  st_pass_`threshold'_`var'_amount_overal=md_earn_wne_p10_`var'-st_`threshold'_all if md_earn_wne_p10!=. & st_`threshold'_all!=.  
			notes st_pass_`threshold'_`var'_amount_overal: "Disaggregated median earnings for `var' student relative to overall `threshold'" 
			label var st_pass_`threshold'_`var'_amount_disag "See notes"
			label var st_pass_`threshold'_`var'_amount_overal "See notes"
			drop st_i_`threshold'_`var'_number 
		} 
	}

	foreach threshold in t0 t1 t3 {
		gen st_i_`threshold'_all_number="" if st_i_`threshold'_all=="."
			replace st_i_`threshold'_all_number="" if st_i_`threshold'_all=="D"
			replace st_i_`threshold'_all_number="1" if st_i_`threshold'_all!=""   
			destring st_i_`threshold'_all_number, replace 
		replace st_`threshold'_all=. if st_i_`threshold'_all_number==1 
	foreach var in inc1 inc2 inc3 { 
		gen  st_pass_`threshold'_`var'_amount=md_earn_wne_`var'_p10-st_`threshold'_all if md_earn_wne_p10!=. & st_`threshold'_all!=.  
		notes st_pass_`threshold'_`var'_amount: "Disaggregated median earnings for `var' student relative to overall `threshold'" 
		label var st_pass_`threshold'_`var'_amount "See notes"
	} 
	}

	foreach var in rasian rblack rhisp rwhite raian roth { 
		replace st_pass_t0_`var'_amount=. if enroll_`var'<.5 
		replace st_pass_t1_`var'_amount=. if enroll_`var'<.5
		replace st_pass_t2_`var'_amount=. if enroll_`var'<.5
		replace st_pass_t3_`var'_amount=. if enroll_`var'<.5
	}

	gen threshold_displayed_race=""
	replace threshold_displayed_race="white" if enroll_rwhite>.5
	replace threshold_displayed_race="black" if enroll_rblack>.5
	replace threshold_displayed_race="hisp" if enroll_rhisp>.5
	replace threshold_displayed_race="asian" if enroll_rasian>.5
	replace threshold_displayed_race="other" if enroll_roth>.5
	
*------------------------------------------------------------------------------|
**# [4] Match to CZs using crosswalk	

	rename countycd FIPS
	merge m:1 FIPS using "U:\data\psu_cz\psu_cz.dta", keepusing(OUT10)
	replace OUT10 = 377 if inlist(FIPS,46102)										// Oglala Lakota, SD change
	drop if _merge == 2																// Drop empty CZs for now
	drop FIPS _merge

	merge m:1 OUT10 using "$temp\cz_earnings.dta"									// 105 truly empty CZs
	drop _merge
	sort unitid OUT10
	
*------------------------------------------------------------------------------|
**# [5] Create CZ thresholds by race and gender 

	local rnames5 "white black hisp asian aian oth" 
	local sex "male female" 

	/*
		All earnings thresholds based on those w/positive earnings age 22 to 40
		All thresholds for <2 year institutions set to 0.89*AA degree earnings
			per IHEP consultation w/Jeff Strohl (GT CEW based on SIPP analysis)

		T0: median HS earnings plus 10-year amortized net cost of attendance 
		by race and by gender.

		T1: median earnings for same credential (BA if preddeg is 4+ year; AS if preddeg is 2 or <2 year)
		by race and by gender

		T2: for comparison to female earnings. average male earnings for same credential

		T3: 60th percentile of earnings
		by race and by gender

		I define T* as missing if there are fewer than 20 obs in the earnings cell.
		A flag: i_* is equal to D if obs are less than 50 in the relevant ACS cell.
	*/

// CZ T0----------------------------------------------------------
	gen cz_T0_all = cz_mdearnposHS_rall_sall + adebtpay_ug10 if cz_NearnposHS_rall_sall > 20 
		gen cz_i_T0_all = "D" if cz_NearnposHS_rall_sall < 50 							// no small cell issues here 

	gen cz_T0_all_pell = cz_mdearnposHS_rall_sall + adebtpay_ug10_pell if cz_NearnposHS_rall_sall > 20 
		gen cz_T0_all_free = cz_mdearnposHS_rall_sall + adebtpay_ug10_free if cz_NearnposHS_rall_sall > 20 
		gen cz_T0_all_ld = cz_mdearnposHS_rall_sall + adebtpay_ug10_ld if cz_NearnposHS_rall_sall > 20 


	foreach r of local rnames5 { 
		gen cz_T0_r`r' =  cz_mdearnposHS_r`r'_sall + adebtpay_ug10 if cz_NearnposHS_r`r'_sall > 20 
		gen cz_i_T0_r`r' = "D" if cz_NearnposHS_r`r'_sall < 50 
	} 

	foreach s of local sex { 
		gen cz_T0_s`s' =  cz_mdearnposHS_rall_s`s' + adebtpay_ug10 if cz_NearnposHS_rall_s`s' > 20 
		gen cz_i_T0_s`s' = "D" if cz_NearnposHS_rall_s`s' < 50 
	} 

// CZ T1----------------------------------------------------------
	gen cz_T1_all = cz_mdearnposBA_rall_sall if preddeg == 3 & cz_NearnposBA_rall_sall > 20 
		replace cz_T1_all = cz_mdearnposAA_rall_sall if preddeg == 2 & cz_NearnposAA_rall_sall > 20 
		replace cz_T1_all = 0.89 * cz_mdearnposAA_rall_sall if preddeg == 1 & cz_NearnposAA_rall_sall > 20 
	gen cz_i_T1_all = "D" if cz_NearnposBA_rall_sall < 50 & preddeg == 3 
		replace cz_i_T1_all = "D" if cz_NearnposAA_rall_sall < 50 & inlist(preddeg,1,2) 

	foreach r of local rnames5 { 
		gen cz_T1_r`r' =  cz_mdearnposBA_r`r'_sall if preddeg == 3 & cz_NearnposBA_r`r'_sall > 20 
			replace cz_T1_r`r' =  cz_mdearnposAA_r`r'_sall if preddeg == 2 & cz_NearnposAA_r`r'_sall > 20 
			replace cz_T1_r`r' =  0.89 * cz_mdearnposAA_r`r'_sall if preddeg == 1 & cz_NearnposAA_r`r'_sall > 20 
		gen cz_i_T1_r`r' = "D" if cz_NearnposBA_r`r'_sall < 50 & preddeg == 3 
			replace cz_i_T1_r`r' = "D" if cz_NearnposAA_r`r'_sall < 50 & inlist(preddeg,1,2) 
	} 

	foreach s of local sex { 
		gen cz_T1_s`s' =  cz_mdearnposBA_rall_s`s' if preddeg == 3 & cz_NearnposBA_rall_s`s' > 20 
			replace cz_T1_s`s' =  cz_mdearnposAA_rall_s`s' if preddeg == 2 & cz_NearnposAA_rall_s`s' > 20 
			replace cz_T1_s`s' =  0.89*cz_mdearnposAA_rall_s`s' if preddeg == 1 & cz_NearnposAA_rall_s`s' > 20 
		gen cz_i_T1_s`s' = "D" if cz_NearnposBA_rall_s`s' < 50 & preddeg == 3 
			replace cz_i_T1_s`s' = "D" if cz_NearnposAA_rall_s`s' < 50 & inlist(preddeg,1,2) 
	} 

// CZ T2----------------------------------------------------------
	gen cz_T2_smale = cz_mdearnposBA_rall_smale if preddeg == 3 & cz_NearnposBA_rall_smale > 20 
		replace cz_T2_smale = cz_mdearnposAA_rall_smale if preddeg == 2 & cz_NearnposAA_rall_smale > 20 
		replace cz_T2_smale = 0.89 * cz_mdearnposAA_rall_smale if preddeg == 1 & cz_NearnposAA_rall_smale > 20 
	gen cz_i_T2_smale = "D" if cz_NearnposBA_rall_smale < 50 & preddeg == 3 
		replace cz_i_T2_smale = "D" if cz_NearnposAA_rall_smale < 50 & inlist(preddeg,1,2)

	gen cz_T2_sfemale = cz_T2_smale 
	gen cz_i_T2_sfemale = cz_i_T2_smale 
	gen cz_T2_all = .
	gen cz_i_T2_all = ""

	gen cz_T2_rwhite = cz_mdearnposBA_rwhite_sall if preddeg == 3 & cz_NearnposBA_rwhite_sall > 20 
		replace cz_T2_rwhite = cz_mdearnposAA_rwhite_sall if preddeg == 2 & cz_NearnposAA_rwhite_sall > 20 
		replace cz_T2_rwhite = 0.89 * cz_mdearnposAA_rwhite_sall if preddeg == 1 & cz_NearnposAA_rwhite_sall > 20 
	gen cz_i_T2_rwhite = "D" if cz_NearnposBA_rwhite_sall < 50 & preddeg == 3 
		replace cz_i_T2_rwhite = "D" if cz_NearnposAA_rwhite_sall < 50 & inlist(preddeg,1,2)

	foreach var in black hisp aian asian oth {
		gen cz_T2_r`var' = cz_T2_rwhite
		gen cz_i_T2_r`var' = cz_i_T2_rwhite 
	}

// CZ T3----------------------------------------------------------
	gen cz_T3_all = cz_p60earnpos_rall_sall if cz_Nearnpos_rall_sall > 20 
	gen cz_i_T3_all =  "D" if cz_Nearnpos_rall_sall < 50 									// no small cells here 

	foreach r of local rnames5 { 
		gen cz_T3_r`r' =  cz_p60earnpos_r`r'_sall if cz_Nearnpos_r`r'_sall > 20 
		gen cz_i_T3_r`r' = "D" if cz_Nearnpos_r`r'_sall < 50 
	} 

	foreach s of local sex { 
		gen cz_T3_s`s' =  cz_p60earnpos_rall_s`s' if cz_Nearnpos_rall_s`s' > 20 
		gen cz_i_T3_s`s' =  "D" if cz_Nearnpos_rall_s`s' < 50 
	} 

*------------------------------------------------------------------------------|
**# [6] CZ EVI and EVC measures

	/*
		Note EVI is the product of a success component (rough proxy for fraction passing T0) and an access component ;
		- success component (_s) is assumed the same for all subgroups
		- access component (_a) is share of subgroup out of 12 month unduplicated headcount 
	*/

	gen cz_mdearn_T0 = md_earn_wne_p10 - cz_T0_all 
	gen cz_mdearn_T0_inc1 = md_earn_wne_inc1_p10 - cz_T0_all 
	gen cz_mdearn_T0_inc2 = md_earn_wne_inc2_p10 - cz_T0_all
	gen cz_mdearn_T0_inc3 = md_earn_wne_inc3_p10 - cz_T0_all
	gen cz_mdearn_T0_women = md_earn_wne_p10_sfemale - cz_T0_all 
	gen cz_mdearn_T0_men = md_earn_wne_p10_smale - cz_T0_all

	replace cz_mdearn_T0 = . if cz_mdearn_T0 < 0
	replace cz_mdearn_T0_inc1 = . if cz_mdearn_T0_inc1 < 0
	replace cz_mdearn_T0_inc2 = . if cz_mdearn_T0_inc2 < 0
	replace cz_mdearn_T0_inc3 = . if cz_mdearn_T0_inc3 < 0
	replace cz_mdearn_T0_women = . if cz_mdearn_T0_women < 0
	replace cz_mdearn_T0_men = . if cz_mdearn_T0_men < 0

	gen cz_EVC_tot = cz_mdearn_T0 * count_wne_p10
	gen cz_EVC_inc_low = cz_mdearn_T0_inc1 * count_wne_inc1_p10
	gen cz_EVC_inc_mid = cz_mdearn_T0_inc2 * count_wne_inc2_p10
	gen cz_EVC_inc_high = cz_mdearn_T0_inc3 * count_wne_inc3_p10
	gen cz_EVC_women = cz_mdearn_T0_women * count_wne_male0_p10 
	gen cz_EVC_men = cz_mdearn_T0_women * count_wne_male1_p10 

	foreach var in tot inc_low inc_mid inc_high women men {
		replace cz_EVC_`var' = . if cz_EVC_`var' < 0
	}

	gen cz_EVC_tot_p = cz_EVC_tot / cz_EVC_tot
	gen cz_EVC_inc_low_p = cz_EVC_inc_low / cz_EVC_tot
	gen cz_EVC_inc_mid_p = cz_EVC_inc_mid / cz_EVC_tot
	gen cz_EVC_inc_high_p = cz_EVC_inc_high / cz_EVC_tot
	gen cz_EVC_women_p = cz_EVC_women / cz_EVC_tot 
	gen cz_EVC_men_p = cz_EVC_men / cz_EVC_tot 

	foreach var in tot inc_low inc_mid inc_high women men {
		replace cz_EVC_`var'_p = 0 if cz_EVC_`var'_p < 0
	}

	tabstat cz_mdearn_T0 cz_mdearn_T0_inc* cz_mdearn_T0_women cz_mdearn_T0_men cz_EVC_*
	sum cz_mdearn_T0 cz_mdearn_T0_women cz_mdearn_T0_inc* cz_EVC_*, detail 

	sum cz_EVC*p, detail 

// Remove disaggregated thresholds for low enrollment schools -- race only 

	foreach var in asian black hisp white aian { 
		gen enroll_`var' = efy_`var' 												// use efy enrollment unless it is missing or exactly equal across groups 
		replace enroll_`var' = ugds_`var' if efy_`var' == .
	}

	gen enroll_oth = efy_oth

// Change names for consistency--------------------------------------

	rename *, lower

	foreach var in rasian rblack rhisp rwhite raian roth { 
		replace cz_t0_`var' = . if enroll_`var' < .5 
		replace cz_t1_`var' = . if enroll_`var' < .5 
		replace cz_t2_`var' = . if enroll_`var' < .5 
		replace cz_t3_`var' = . if enroll_`var' < .5 
	}

	foreach threshold in t0 t1 t2 t3 {
		foreach var in all rblack rhisp roth rasian raian rwhite smale sfemale { 
			gen cz_i_`threshold'_`var'_number = "1" if cz_i_`threshold'_`var' != ""  
				replace cz_i_`threshold'_`var'_number = "" if cz_i_`threshold'_`var' == "."
				replace cz_i_`threshold'_`var'_number = "" if cz_i_`threshold'_`var' == "D"
				destring cz_i_`threshold'_`var'_number, replace
			replace cz_`threshold'_`var' = . if cz_i_`threshold'_`var'_number == 1 
			gen  cz_pass_`threshold'_`var'_amount = md_earn_wne_p10-cz_`threshold'_`var' if md_earn_wne_p10 != . & cz_`threshold'_`var' != .  
				label variable cz_pass_`threshold'_`var'_amount "Overall median earnings relative to `threshold' for `var'" 
				drop cz_i_`threshold'_`var'_number 
				label var cz_i_`threshold'_`var' "Small sample flag for n between 20-50 (for n less than 20 values are suppressed)"
		} 
	}

	foreach threshold in t0 t1 t2 t3 {
		foreach var in smale sfemale { 
			replace cz_i_`threshold'_`var' = "1" if cz_i_`threshold'_`var' != ""  
			replace cz_i_`threshold'_`var' = "" if cz_i_`threshold'_`var' == "."
			replace cz_i_`threshold'_`var' = "" if cz_i_`threshold'_`var' == "D"
			 
			destring cz_i_`threshold'_`var', gen(cz_i_`threshold'_`var'_number) 
			replace cz_`threshold'_`var'=. if cz_i_`threshold'_`var'_number==1 
			gen  cz_pass_`threshold'_`var'_amount_disag=md_earn_wne_p10_`var'-cz_`threshold'_`var' if md_earn_wne_p10!=. & cz_`threshold'_`var'!=.  
			notes cz_pass_`threshold'_`var'_amount_disag: "Disaggregated median earnings for `var' student relative to dissaggregated `var' `threshold'" 
			gen  cz_pass_`threshold'_`var'_amount_overal=md_earn_wne_p10_`var'-cz_`threshold'_all if md_earn_wne_p10!=. & cz_`threshold'_all!=.  
			notes cz_pass_`threshold'_`var'_amount_overal: "Disaggregated median earnings for `var' student relative to overall `threshold'" 
			label var cz_pass_`threshold'_`var'_amount_disag "See notes"
			label var cz_pass_`threshold'_`var'_amount_overal "See notes"
			drop cz_i_`threshold'_`var'_number 
		} 
	}

	foreach threshold in t0 t1 t3 {
		gen cz_i_`threshold'_all_number="" if cz_i_`threshold'_all=="."
			replace cz_i_`threshold'_all_number="" if cz_i_`threshold'_all=="D"
			replace cz_i_`threshold'_all_number="1" if cz_i_`threshold'_all!=""   
			destring cz_i_`threshold'_all_number, replace 
		replace cz_`threshold'_all=. if cz_i_`threshold'_all_number==1 
	foreach var in inc1 inc2 inc3 { 
		gen  cz_pass_`threshold'_`var'_amount=md_earn_wne_`var'_p10-cz_`threshold'_all if md_earn_wne_p10!=. & cz_`threshold'_all!=.  
		notes cz_pass_`threshold'_`var'_amount: "Disaggregated median earnings for `var' student relative to overall `threshold'" 
		label var cz_pass_`threshold'_`var'_amount "See notes"
	} 
	}

	foreach var in rasian rblack rhisp rwhite raian roth { 
		replace cz_pass_t0_`var'_amount=. if enroll_`var'<.5 
		replace cz_pass_t1_`var'_amount=. if enroll_`var'<.5
		replace cz_pass_t2_`var'_amount=. if enroll_`var'<.5
		replace cz_pass_t3_`var'_amount=. if enroll_`var'<.5
	}

	rename adebtpay_ug10 annualcost_ug10
	
	drop 	mn_earn_*  *_p6 *_p8 adebtpay_ug15 adebtpay_ug20 st_t2_all* cz_t2_all* ///
			st_i_t2_all* cz_i_t2_all* pnonwhite d2122 c150_l2 totprice_*_free tuition ///
			bacomps_* *_ftft_pel* ttd_2yr_* ttd_ba_* ttd_l2 twoyr_* ///
			l2_cohort adebtpay_ug* aveprice_* aga_ld

	rename c150_totlt c150
	drop cz_i_* st_i_* i_* cz_mnearn* enroll_* *nopell adm_rate_all ///
		sat_avg_all
		
*------------------------------------------------------------------------------|
**# [7] Create counts of institutions passing thresholds by st and cz

	gen dummy = 1 if unitid != .													// Exclude empty CZs 
	*egen st_inst_count = sum(dummy) if st_pass_t0_all_amount != . & , by(st_fips)
	*egen cz_inst_count = sum(dummy), by(commuting_zone)
	
	foreach x in st cz {
		foreach y in t0 t1 t3 {
			if "`x'" == "st" {
				local geo st_fips
			}
			else {
				local geo commuting_zone
			}
			
			egen `x'_insttot_`y'_count = sum(dummy) if `x'_pass_`y'_all_amount != ., by(`geo')
			
			gen `x'_`y'_count = .
			replace `x'_`y'_count = 1 if `x'_pass_`y'_all_amount > 0 & `x'_insttot_`y'_count != .
			egen `x'_`y'_sum_n = sum(`x'_`y'_count) if `x'_insttot_`y'_count != ., by(`geo')
			gen `x'_pct_`y'_sum_n = `x'_`y'_sum_n / `x'_insttot_`y'_count
		}
	}

*------------------------------------------------------------------------------|
**# [8] Trim, add labels, and save

	drop 	c150_4_pell c150_l4_pell count_wne_* totprice_pell ///
			totprice_free totprice_ld dummy st_t0_all_pell st_t0_all_free ///
			st_t0_all_ld cz_t0_all_pell cz_t0_all_free cz_t0_all_ld ///
			st_insttot_t0_count st_t0_count st_t0_sum_n st_insttot_t1_count ///
			st_t1_count st_t1_sum_n st_insttot_t3_count st_t3_count ///
			st_t3_sum_n cz_insttot_t0_count cz_t0_count cz_t0_sum_n ///
			cz_insttot_t1_count cz_t1_count cz_t1_sum_n cz_insttot_t3_count ///
			cz_t3_count cz_t3_sum_n aga_ftft

	label var instnm "Institution Name (CS, 2021-22)" 
	label var city "City (CS, 2021-22)"  
	label var stabbr "State (CS, 2021-22)"  
	label var zip "Zip Code (CS, 2021-22)"  
	label var st_fips "State FIPS Code (CS, 2021-22)"  
	label var region "Region (CS - 2021-22)"  

	label var main "Main Campus Indicator (CS, 2021-22)"  
	label var numbranch "Number of Branch Campuses (CS, 2021-22)"  
	label var preddeg "Predominant Credential Awarded (CS, 2021-22)"  
	label var highdeg "Highest Degree Awarded (CS, 2021-22)"  
	label var control "Institutional Control (CS, 2021-22)"  
	label var sector "Sector (CS, 2021-22)"  
	label var hbcu "HBCU Indicator (CS, 2021-22)"  
	label var pbi "Predominantly Black Institution (CS, 2021-22)"  
	label var annhi "Alaska Native Native Hawaiian Serving Institution (CS, 2021-22)"  
	label var tribal "Tribal Institution (CS, 2021-22)"  
	label var aanapii "Asian American Native American Pacific Islander Serving Institution (CS 2021-22)"  
	label var hsi "Hispanic Serving Institution (CS, 2021-22)"  
	label var nanti "Native American non-Tribal Institution (CS, 2021-22)"  
	label var menonly "Men Only College (CS, 2021-22)"  
	label var womenonly "Women Only College (CS, 2021-22)"  
	label var religious "Institution with a Religious Affiliation (CS, 2021-22)"  
	label var relaffil "Religious Affiliation (CS, 2021-22)"  
	label var msi "Minority Serving Institution (CS, 2021-22)"  
	label var prgmofr "Number of Programs Offered  (CS, 2021-22)"  
	label var progreporter "Program Reporter Flag (CS, 2021-22)"  
	label var adm_rate "Admission Rate (CS, 2021-22)"  
	label var sat_avg "Average SAT Equivalent (CS, 2021-22)"  
	label var inexpfte "Instructional Expenditures per FTE (CS, 2021-22)"  
	label var pctpell "% of Undergraduate Students Receiving Pell Grants (CS, 2021-22)"  
	label var pctnonpell "% of Undergraduate Students who do not receive Pell grants (CS, 2021-22)"
	label var e_r_exp "Total E&R Expenditures (calculated from IPEDS)"
	label var e_r_12mo "E&R Expenditures per 12-month unduplicated headcount"
	
	label var ugds "Fall enrollment undergraduate degree seeking students (CS, 2021-22)"  
	label var ugds_white "Fraction White of UGDS (CS, 2021-22)"  
	label var ugds_black "Fraction Black of UGDS (CS, 2021-22)"  
	label var ugds_hisp "Fraction Hispanic of UGDS (CS, 2021-22)"  
	label var ugds_asian "Fraction Asian of UGDS (CS, 2021-22)"  
	label var ugds_aian "Fraction American Indian/Alaska Native of UGDS (CS, 2021-22)"  
	label var ugds_nhpi "Fraction Native Hawaiian/Pacific Islander of UGDS (CS, 2021-22)"  
	label var ugds_2mor "Fraction 2 or more Races of UGDS (CS, 2021-22)"  
	label var ugds_nra "Fraction Nonresident Alien of UGDS (CS, 2021-22)"  
	label var ugds_unkn "Fraction Race Unknown of UGDS (CS, 2021-22)"  

	label var efytotlt "Unduplicated 12-month headcount enrollment, all students (IPEDS EFFY 2021)"  
	label var efy_men "% of unduplicated 12-month headcount who are men (IPEDS EFFY 2021)"  
	label var efy_women "% of unduplicated 12-month headcount who are Women (IPEDS EFFY 2021)"  
	label var efy_asian "% of unduplicated 12-month headcount who are Asian or NHPI (IPEDS EFFY 2021)"  
	label var efy_aian "% of unduplicated 12-month headcount who are AIAN (IPEDS EFFY 2021)" 
	label var efy_black "% of unduplicated 12-month headcount who are Black (IPEDS EFFY 2021)"  
	label var efy_hisp "% of unduplicated 12-month headcount who are Hispanic (IPEDS EFFY 2021)"  
	label var efy_white "% of unduplicated 12-month headcount who are White (IPEDS EFFY 2021)"  
	label var efy_oth "% of unduplicated 12-month headcount who are other races (IPEDS EFFY 2021)"  

	label var cs_tot "See notes"  
	label var cs_asian "See notes"  
	label var cs_oth "See notes"  
	label var cs_hisp "See notes"  
	label var cs_black "See notes"  
	label var cs_white "See notes"  
	label var cs_aian "See notes"  
	label var cs_men "See notes"  
	label var cs_women "See notes"  	

	label var pcipstem "See notes"
		notes pcipstem: "Percent of Awards in STEM (CS, 2021-22) Combines the share of awards in each of the following fields: computer and information sciences, engineering, biological sciences, math, and physical sciences (pcip11 pcip14 pcip15 pcip26 pcip27 pcip29 pcip40 pcip41"  
	label var pcip13ed "Percent of Awards in Education (CS, 2021-22)"
	label var pcip51health "percent of Awards in Healthcare (CS, 2021-22)"
	label var pcip52bus "Percent of Awards in Business (CS, 2021-22)"

	label var cdr3 "3 Year Cohort Default Rate (CS, 2021-22)" 
	label var cdr3_denom "Number of Borrowers 3 YR CDR Cohort (CS, 2021-22)"  
	notes netpricet4: "Average Net Price Among Title 4 Aid Recipients (CS, 2021-22). Combines npt4_pub npt4_priv npt4_prog and npt4_other into a single variable."  
	label var netpricet4 "See notes" 

	notes cs_tot: "Number of undergraduate students receiving awards/degrees between July 1, 2020 and June 30, 2021 across all race/ethnicities and both genders(IPEDS-C 2021_c)" 
	notes cs_asian: "Number of Asian or NHPI undergraduate students receiving awards/degrees between July 1, 2020 and June 30, 2021 (IPEDS-C 2021_c)" 
	notes cs_oth: "Number of two or more, unknown and non-resident undergraduate students receiving awards/degrees between July 1, 2020 and June 30, 2021 (IPEDS-C 2021_c)" 
	notes cs_hisp: "Number of Hispanic undergraduate students receiving awards/degrees between July 1, 2020 and June 30, 2021 (IPEDS-C 2021_c)" 
	notes cs_black: "Number of Black undergraduate students receiving awards/degrees between July 1, 2020 and June 30, 2021 (IPEDS-C 2021_c)" 
	notes  cs_white: "Number of White undergraduate students receiving awards/degrees between July 1, 2020 and June 30, 2021 (IPEDS-C 2021_c)" 
	notes cs_aian: "Number of American Indian/Alaskan Native undergraduate students receiving awards/degrees between July 1, 2020 and June 30, 2021 (IPEDS-C 2021_c)" 
	notes cs_men: "Number of undergraduate men receiving awards/degrees between July 1, 2020 and June 30, 2021 (IPEDS-C 2021_c)" 
	notes cs_women: "Number of undergraduate women receiving awards/degrees between July 1, 2020 and June 30, 2021 (IPEDS-C 2021_c)" 

	label var _completers_totlt "Number of students who complete a credential. (IPEDS GR2 2020 +2021)"  
	label var _completers_totlm "% of all completers who are men. (IPEDS GR 2020 + 2021)." 
	label var _completers_totlw "% of completers who are women. (IPEDS GR 2020 + 2021)."
	label var _completers_whitet "% of completers who are White. (IPEDS GR 2020 + 2021)."
	label var _completers_blackt "% of completers who are Black. (IPEDS GR 2020 + 2021)."
	label var _completers_hispt "% of completers who are Hispanic. (IPEDS GR 2020 + 2021)."
	label var _completers_asiant "% of completers who are Asian or NHPI. (IPEDS GR 2020 + 2021)."
	label var _completers_otht "% of completers who are two or more, unknown, or nonresident students. (IPEDS GR 2020 + 2021)."
	label var _completers_aiant "% of completers who are American Indian or Alaskan Native. (IPEDS GR 2020 + 2021)."  
	label var _completers_nonpell "% of all completers who are not Pell recipients. (IPEDS GR 2020 + 2021 Pell)." 
	label var _completers_pell "% of all completers who are Pell recipients. (IPEDS GR 2020 + 2021 Pell)." 

	label var c150 "See notes"  
	label var c150_pell "See notes"  
	label var c150_nonpell "See notes"  
	label var c150_men "See notes"  
	label var c150_women "See notes"  
	label var c150_white "See notes"  
	label var c150_black "See notes"  
	label var c150_hisp "See notes" 
	label var c150_asian "See notes"  
	label var c150_aian "See notes"  
	label var c150_otht "See notes"  

	notes c150: "Completion Rate 150% Normal Time (IPEDS-GR 2020 + 2021). GRYYYY has grad status as of end of Aug. 31. YYYY. 4-year cohort YYYY-6, 2-year cohort is YYYY-3 to address small cohort sizes, 2020 and 2021 data are pooled."  
	notes c150_pell: "150% Completion Rate, Pell Students (IPEDS-GR 2020 + 2021 Pell). GRYYYY has grad status as of end of Aug. 31. YYYY. 4-year cohort YYYY-6, 2-year cohort is YYYY-3 to address small cohort sizes, 2020 and 2021 data are pooled."  
	notes c150_nonpell: "150% Completion Rate, non-Pell Students (IPEDS-GR 2020 + 2021 Pell). GRYYYY has grad status as of end of Aug. 31. YYYY. 4-year cohort YYYY-6, 2-year cohort is YYYY-3 to address small cohort sizes, 2020 and 2021 data are pooled."  
	notes c150_men: "150% Completion Rate, men (IPEDS-GR 2020 + 2021). GRYYYY has grad status as of end of Aug. 31. YYYY. 4-year cohort YYYY-6, 2-year cohort is YYYY-3 to address small cohort sizes, 2020 and 2021 data are pooled."  
	notes c150_women: "150% Completion Rate, women (IPEDS-GR 2020 + 2021). GRYYYY has grad status as of end of Aug. 31. YYYY. 4-year cohort YYYY-6, 2-year cohort is YYYY-3 to address small cohort sizes, 2020 and 2021 data are pooled."  
	notes c150_white: "150% Completion Rate, White Students (IPEDS-GR 2020 + 2021). GRYYYY has grad status as of end of Aug. 31. YYYY. 4-year cohort YYYY-6, 2-year cohort is YYYY-3 to address small cohort sizes, 2020 and 2021 data are pooled."  
	notes c150_black: "150% Completion Rate, Black Students (IPEDS-GR 2020 + 2021). GRYYYY has grad status as of end of Aug. 31. YYYY. 4-year cohort YYYY-6, 2-year cohort is YYYY-3 to address small cohort sizes, 2020 and 2021 data are pooled."  
	notes c150_hisp: "150% Completion Rate, Hispanic Students (IPEDS-GR 2020 + 2021). GRYYYY has grad status as of end of Aug. 31. YYYY. 4-year cohort YYYY-6, 2-year cohort is YYYY-3 to address small cohort sizes, 2020 and 2021 data are pooled."  
	notes c150_asian: "150% Completion Rate, Asian Students (IPEDS-GR 2020 + 2021). GRYYYY has grad status as of end of Aug. 31. YYYY. 4-year cohort YYYY-6, 2-year cohort is YYYY-3 to address small cohort sizes, 2020 and 2021 data are pooled."  
	notes c150_aian: "150% Completion Rate, American Indian Alaska Native Students (IPEDS-GR 2020 + 2021). GRYYYY has grad status as of end of Aug. 31. YYYY. 4-year cohort YYYY-6, 2-year cohort is YYYY-3 to address small cohort sizes, 2020 and 2021 data are pooled."  
	notes c150_otht: "150% Completion Rate, two or more, unknown, and nonresident Students (IPEDS-GR 2020 + 2021). GRYYYY has grad status as of end of Aug. 31. YYYY. 4-year cohort YYYY-6, 2-year cohort is YYYY-3 to address small cohort sizes, 2020 and 2021 data are pooled."  

	label var md_earn_wne_p10  "Median Earnings 10 Years Post-Entry (CS 2019-20)"  
	label var pct25_earn_wne_p10  "25th Percentile Earnings 10 Years Post-Entry (CS 2019-20)"  
	label var pct75_earn_wne_p10  "75th Percentile Earnings 10 Years Post-Entry (CS 2019-20)"  

	label var md_earn_wne_inc1_p10 "Median Earnings 10 Years Post-Entry, Low-Income Dependents (CS 2019-20)"  
	label var md_earn_wne_inc2_p10  "Median Earnings 10 Years Post-Entry, Middle-Income Dependents (CS 2019-20)"  
	label var md_earn_wne_inc3_p10  "Median Earnings 10 Years Post-Entry, High-Income Dependents (CS 2019-20)"  
	label var md_earn_wne_p10_smale  "Median Earnings 10 Years Post-Entry, Men (CS 2019-20)"  
	label var md_earn_wne_p10_sfemale "Median Earnings 10 Years Post-Entry, Women (CS 2019-20)"  

	label var totprice_oc "Total Charges, On Campus (IPEDS-IC 2021). Published charges for academic year 2021-22, includes total tuition fees, books, supplies, room & board, and other expenses"  
	label var totprice_wf "Total Charges, Off Campus with Family. (IPEDS-IC 2021). Published charges for academic year 2021, includes total tuition fees, books, supplies, room & board, and other expenses"  
	label var totprice_nwf "Total Charges, Off Campus not with Family. (IPEDS-IC 2021). Published charges for academic year 2021-22, includes total tuition fees, books, supplies, room & board, and other expenses"  

	label var p_on "Fraction of FTFT Students Living On-Campus (IPEDS SFA 2021)"  
	label var p_off "Fraction of FTFT Students Living Off-Campus (IPEDS SFA 2021)"  
	label var p_owf "Fraction of FTFT Students Living Off-Campus with Family (IPEDS SFA 2021)"  
	label var p_onwf "Fraction of FTFT Students Living Off-Campus Not with Family (IPEDS SFA 2021)"  

	label var aveprice "Avg Yearly Price of Attend. Net of Grant Aid. From IPEDS-IC and IPEDS SFA." 
	 
	label var ttd_totlt "Average Time to Degree - All Students" 

	label var totprice "Average cumulative net price of credential, after accounting for grant aid. Derived from annual net price (aveprice) and time to credential (ttd)." 
	label var annualcost_ug10 "Yearly Payment - Amortized Total Price over 10 Years"  

	foreach var in all smale sfemale rwhite rblack rhisp rasian raian roth { 
		label var st_t0_`var' "State Threshold 0 (T0)"  
		notes st_t0_`var': "State Threshold zero (T0) measures median earnings of high school diploma or GED holders in the state where an institution is located, plus a measure of the cumulative net price to students of a credential."  
		label var st_t1_`var' "State Threshold 1 (T1)" 
		notes st_t1_`var': "State Threshold 1 measures if they reach at least the median earnings of others with the same credential, which accounts for expected variation in pay." 
		label var st_t3_`var' "State Threshold 3 (T3)" 
		notes st_t3_`var': "Informed by Opportunity Insights' measurement of economic mobility across institutions, this State threshold measures whether students earn enough to enter the fourth (upper middle) income quintile regardless of field of study."
		label var cz_t0_`var' "CZ Threshold 0 (T0)"  
		notes cz_t0_`var': "CZ Threshold zero (T0) measures median earnings of high school diploma or GED holders in the commuting zone where an institution is located, plus a measure of the cumulative net price to students of a credential."  
		label var cz_t1_`var' "CZ Threshold 1 (T1)" 
		notes cz_t1_`var': "CZ Threshold 1 measures if they reach at least the median earnings of others with the same credential, which accounts for expected variation in pay." 
		label var cz_t3_`var' "CZ Threshold 3 (T3)" 
		notes cz_t3_`var': "Informed by Opportunity Insights' measurement of economic mobility across institutions, this CZ threshold measures whether students earn enough to enter the fourth (upper middle) income quintile regardless of field of study." 
	} 

	foreach var in smale sfemale rwhite rblack rhisp rasian raian roth { 
		label var st_t2_`var' "Threshold 2 (T2)" 
		notes st_t2_`var': "Threshld 2, informed by The University of Texas System's research on in-field pay inequities, measures whether students of color, students from low-income backgrounds, and women meet the median earnings of their more advantaged peers (White students, high-income students, or men)." 
		label var cz_t2_`var' "Threshold 2 (T2)" 
		notes cz_t2_`var': "Threshld 2, informed by The University of Texas System's research on in-field pay inequities, measures whether students of color, students from low-income backgrounds, and women meet the median earnings of their more advantaged peers (White students, high-income students, or men)." 
	} 

	label var st_evc_tot "State EVC - All Students"  
	label var st_evc_inc_low "State EVC for low-income students" 
	label var st_evc_inc_mid "State EVC for middle-income students"  
	label var st_evc_inc_high "State EVC for high-income students" 
	label var st_evc_women "State EVC for women"  
	label var st_evc_men "State EVC for men"  
	label var st_evc_tot_p "st_evc_tot / st_evc_tot"  
	label var st_evc_inc_low_p "st_evc_inc_low / st_evc_tot" 
	label var st_evc_inc_mid_p "st_evc_inc_mid / st_evc_tot"  
	label var st_evc_inc_high_p "st_evc_inc_high / st_evc_tot" 
	label var st_evc_women_p "st_evc_women / st_evc_tot"  
	label var st_evc_men_p "st_evc_men / st_evc_tot"  
	
	label var cz_evc_tot "CZ EVC - All Students"  
	label var cz_evc_inc_low "CZ EVC for low-income students" 
	label var cz_evc_inc_mid "CZ EVC for middle-income students"  
	label var cz_evc_inc_high "CZ EVC for high-income students" 
	label var cz_evc_women "CZ EVC for women"  
	label var cz_evc_men "CZ EVC for men"  
	label var cz_evc_tot_p "cz_evc_tot / cz_evc_tot"  
	label var cz_evc_inc_low_p "cz_evc_inc_low / cz_evc_tot" 
	label var cz_evc_inc_mid_p "cz_evc_inc_mid / cz_evc_tot"  
	label var cz_evc_inc_high_p "cz_evc_inc_high / cz_evc_tot" 
	label var cz_evc_women_p "cz_evc_women / cz_evc_tot"  
	label var cz_evc_men_p "cz_evc_men / cz_evc_tot"  
	
	foreach x in st cz {
		label var `x'_mdearn_t0 "Median Earnings (10 Years Post-Entry) Minus T0, negatives recoded to missing" 
		label var `x'_mdearn_t0_inc1 "Median Earnings (10 Years Post-Entry) Minus T0 for low-income dependents, negatives recoded to missing" 
		label var `x'_mdearn_t0_inc2 "Median Earnings (10 Years Post-Entry) Minus T0 for mid-income dependents, negatives recoded to missing" 
		label var `x'_mdearn_t0_inc3 "Median Earnings (10 Years Post-Entry) Minus T0 for hi-income dependents, negatives recoded to missing" 
		label var `x'_mdearn_t0_women "Median Earnings (10 Years Post-Entry) Minus T0 for women, negatives recoded to missing" 
		label var `x'_mdearn_t0_men "Median Earnings (10 Years Post-Entry) Minus T0 for men, negatives recoded to missing" 
	}

	label var st_pct_t0_sum_n "% of Institutions Passing T0 in State"
	label var st_pct_t1_sum_n "% of Institutions Passing T1 in State"
	label var st_pct_t3_sum_n "% of Institutions Passing T3 in State"
	label var cz_pct_t0_sum_n "% of Institutions Passing T0 in CZ"
	label var cz_pct_t1_sum_n "% of Institutions Passing T1 in CZ"
	label var cz_pct_t3_sum_n "% of Institutions Passing T3 in CZ"
	
	save "$master\roi_combined.dta", replace
	
*------------------------------------------------------------------------------|
**# [9] Further remove variables that are not relevant to any product

	gen sector4 = 0
	replace sector4 = 1 if control == 1 & inlist(preddeg,1,2)
	replace sector4 = 2 if control == 1 & preddeg == 3
	replace sector4 = 3 if control == 2
	replace sector4 = 4 if control == 3
	replace sector4 = . if sector4 == 0
	lab define sector4_lab 1 "Public 2-Year" 2 "Public 4-Year" ///
		3 "Non-Profit" 4 "For-Profit"
	lab val sector4 sector4_lab
	
	order sector4, after(control)
	
	drop highdeg sector inexpfte pctnonpell efy* cs_* menonly womenonly ///
		relaffil religious prgmofr progreporter md_earn_wne_p10_sfemale ///
		md_earn_wne_p10_smale totprice_oc totprice_wf totprice_nwf p_on ///
		p_off p_owf p_onwf _completers* c150_nonpell ttd* st_t0_rwhite ///
		st_t0_rblack st_t0_rhisp st_t0_rasian st_t0_raian st_t0_roth ///
		st_t0_smale st_t0_sfemale st_t1_rwhite st_t1_rblack st_t1_rhisp ///
		st_t1_rasian st_t1_raian st_t1_roth st_t1_smale st_t1_sfemale ///
		st_t2* st_t3_rwhite st_t3_rblack st_t3_rhisp st_t3_rasian ///
		st_t3_raian st_t3_roth st_t3_smale st_t3_sfemale st_mdearn_t0* ///
		st_evc* st_pass_t0_rblack_amount st_pass_t0_rhisp_amount ///
		st_pass_t0_roth_amount st_pass_t0_rasian_amount ///
		st_pass_t0_raian_amount st_pass_t0_rwhite_amount ///
		st_pass_t0_smale_amount st_pass_t0_sfemale_amount ///
		st_pass_t1_rblack_amount st_pass_t1_rhisp_amount ///
		st_pass_t1_roth_amount st_pass_t1_rasian_amount ///
		st_pass_t1_raian_amount st_pass_t1_rwhite_amount ///
		st_pass_t1_smale_amount st_pass_t1_sfemale_amount st_pass_t2* ///
		st_pass_t3_rblack_amount st_pass_t3_rhisp_amount ///
		st_pass_t3_roth_amount st_pass_t3_rasian_amount ///
		st_pass_t3_raian_amount st_pass_t3_rwhite_amount ///
		st_pass_t3_smale_amount st_pass_t3_sfemale_amount ///
		st_pass_t0_smale_amount_disag st_pass_t0_smale_amount_overal ///
		st_pass_t0_sfemale_amount_disag st_pass_t0_sfemale_amount_overal ///
		st_pass_t1_smale_amount_disag st_pass_t1_smale_amount_overal ///
		st_pass_t1_sfemale_amount_disag st_pass_t1_sfemale_amount_overal ///
		st_pass_t2_smale_amount_disag st_pass_t2_smale_amount_overal ///
		st_pass_t2_sfemale_amount_disag st_pass_t2_sfemale_amount_overal ///
		st_pass_t3_smale_amount_disag st_pass_t3_smale_amount_overal ///
		st_pass_t3_sfemale_amount_disag st_pass_t3_sfemale_amount_overal ///
		threshold_displayed_race cz_mdearnposhs_rwhite_smale ///
		cz_mdearnposhs_rblack_smale cz_mdearnposhs_rhisp_smale ///
		cz_mdearnposhs_rasian_smale cz_mdearnposhs_raian_smale ///
		cz_mdearnposhs_roth_smale cz_mdearnposhs_rall_smale ///
		cz_mdearnposhs_rwhite_sfemale cz_mdearnposhs_rblack_sfemale ///
		cz_mdearnposhs_rhisp_sfemale cz_mdearnposhs_rasian_sfemale ///
		cz_mdearnposhs_raian_sfemale cz_mdearnposhs_roth_sfemale ///
		cz_mdearnposhs_rall_sfemale cz_mdearnposhs_rwhite_sall ///
		cz_mdearnposhs_rblack_sall cz_mdearnposhs_rhisp_sall ///
		cz_mdearnposhs_rasian_sall cz_mdearnposhs_raian_sall ///
		cz_mdearnposhs_roth_sall cz_mdearnposaa_rwhite_smale ///
		cz_mdearnposaa_rblack_smale cz_mdearnposaa_rhisp_smale ///
		cz_mdearnposaa_rasian_smale cz_mdearnposaa_raian_smale ///
		cz_mdearnposaa_roth_smale cz_mdearnposaa_rall_smale ///
		cz_mdearnposaa_rwhite_sfemale cz_mdearnposaa_rblack_sfemale ///
		cz_mdearnposaa_rhisp_sfemale cz_mdearnposaa_rasian_sfemale ///
		cz_mdearnposaa_raian_sfemale cz_mdearnposaa_roth_sfemale ///
		cz_mdearnposaa_rall_sfemale cz_mdearnposaa_rwhite_sall ///
		cz_mdearnposaa_rblack_sall cz_mdearnposaa_rhisp_sall ///
		cz_mdearnposaa_rasian_sall cz_mdearnposaa_raian_sall ///
		cz_mdearnposaa_roth_sall cz_mdearnposba_rwhite_smale ///
		cz_mdearnposba_rblack_smale cz_mdearnposba_rhisp_smale ///
		cz_mdearnposba_rasian_smale cz_mdearnposba_raian_smale ///
		cz_mdearnposba_roth_smale cz_mdearnposba_rall_smale ///
		cz_mdearnposba_rwhite_sfemale cz_mdearnposba_rblack_sfemale ///
		cz_mdearnposba_rhisp_sfemale cz_mdearnposba_rasian_sfemale ///
		cz_mdearnposba_raian_sfemale cz_mdearnposba_roth_sfemale ///
		cz_mdearnposba_rall_sfemale cz_mdearnposba_rwhite_sall ///
		cz_mdearnposba_rblack_sall cz_mdearnposba_rhisp_sall ///
		cz_mdearnposba_rasian_sall cz_mdearnposba_raian_sall ///
		cz_mdearnposba_roth_sall cz_p60earnpos_rwhite_smale ///
		cz_p60earnpos_rblack_smale cz_p60earnpos_rhisp_smale ///
		cz_p60earnpos_rasian_smale cz_p60earnpos_raian_smale ///
		cz_p60earnpos_roth_smale cz_p60earnpos_rall_smale ///
		cz_p60earnpos_rwhite_sfemale cz_p60earnpos_rblack_sfemale ///
		cz_p60earnpos_rhisp_sfemale cz_p60earnpos_rasian_sfemale ///
		cz_p60earnpos_raian_sfemale cz_p60earnpos_roth_sfemale ///
		cz_p60earnpos_rall_sfemale cz_p60earnpos_rwhite_sall ///
		cz_p60earnpos_rblack_sall cz_p60earnpos_rhisp_sall ///
		cz_p60earnpos_rasian_sall cz_p60earnpos_raian_sall ///
		cz_p60earnpos_roth_sall cz_nearnposhs_rwhite_smale ///
		cz_nearnposhs_rblack_smale cz_nearnposhs_rhisp_smale ///
		cz_nearnposhs_rasian_smale cz_nearnposhs_raian_smale ///
		cz_nearnposhs_roth_smale cz_nearnposhs_rall_smale ///
		cz_nearnposhs_rwhite_sfemale cz_nearnposhs_rblack_sfemale ///
		cz_nearnposhs_rhisp_sfemale cz_nearnposhs_rasian_sfemale ///
		cz_nearnposhs_raian_sfemale cz_nearnposhs_roth_sfemale ///
		cz_nearnposhs_rall_sfemale cz_nearnposhs_rwhite_sall ///
		cz_nearnposhs_rblack_sall cz_nearnposhs_rhisp_sall ///
		cz_nearnposhs_rasian_sall cz_nearnposhs_raian_sall ///
		cz_nearnposhs_roth_sall cz_nearnposaa_rwhite_smale ///
		cz_nearnposaa_rblack_smale cz_nearnposaa_rhisp_smale ///
		cz_nearnposaa_rasian_smale cz_nearnposaa_raian_smale ///
		cz_nearnposaa_roth_smale cz_nearnposaa_rall_smale ///
		cz_nearnposaa_rwhite_sfemale cz_nearnposaa_rblack_sfemale ///
		cz_nearnposaa_rhisp_sfemale cz_nearnposaa_rasian_sfemale ///
		cz_nearnposaa_raian_sfemale cz_nearnposaa_roth_sfemale ///
		cz_nearnposaa_rall_sfemale cz_nearnposaa_rwhite_sall ///
		cz_nearnposaa_rblack_sall cz_nearnposaa_rhisp_sall ///
		cz_nearnposaa_rasian_sall cz_nearnposaa_raian_sall ///
		cz_nearnposaa_roth_sall cz_nearnposba_rwhite_smale ///
		cz_nearnposba_rblack_smale cz_nearnposba_rhisp_smale ///
		cz_nearnposba_rasian_smale cz_nearnposba_raian_smale ///
		cz_nearnposba_roth_smale cz_nearnposba_rall_smale ///
		cz_nearnposba_rwhite_sfemale cz_nearnposba_rblack_sfemale ///
		cz_nearnposba_rhisp_sfemale cz_nearnposba_rasian_sfemale ///
		cz_nearnposba_raian_sfemale cz_nearnposba_roth_sfemale ///
		cz_nearnposba_rall_sfemale cz_nearnposba_rwhite_sall ///
		cz_nearnposba_rblack_sall cz_nearnposba_rhisp_sall ///
		cz_nearnposba_rasian_sall cz_nearnposba_raian_sall ///
		cz_nearnposba_roth_sall cz_nearnpos_rwhite_smale ///
		cz_nearnpos_rblack_smale cz_nearnpos_rhisp_smale ///
		cz_nearnpos_rasian_smale cz_nearnpos_raian_smale ///
		cz_nearnpos_roth_smale cz_nearnpos_rall_smale ///
		cz_nearnpos_rwhite_sfemale cz_nearnpos_rblack_sfemale ///
		cz_nearnpos_rhisp_sfemale cz_nearnpos_rasian_sfemale ///
		cz_nearnpos_raian_sfemale cz_nearnpos_roth_sfemale ///
		cz_nearnpos_rall_sfemale cz_nearnpos_rwhite_sall ///
		cz_nearnpos_rblack_sall cz_nearnpos_rhisp_sall ///
		cz_nearnpos_rasian_sall cz_nearnpos_raian_sall ///
		cz_nearnpos_roth_sall cz_t0_rwhite cz_t0_rblack cz_t0_rhisp ///
		cz_t0_rasian cz_t0_raian cz_t0_roth cz_t0_smale cz_t0_sfemale ///
		cz_t1_rwhite cz_t1_rblack cz_t1_rhisp cz_t1_rasian cz_t1_raian ///
		cz_t1_roth cz_t1_smale cz_t1_sfemale cz_t2_smale cz_t2_sfemale ///
		cz_t2_rwhite cz_t2_rblack cz_t2_rhisp cz_t2_raian cz_t2_rasian ///
		cz_t2_roth cz_t3_rwhite cz_t3_rblack cz_t3_rhisp cz_t3_rasian ///
		cz_t3_raian cz_t3_roth cz_t3_smale cz_t3_sfemale cz_mdearn_t0_women ///
		cz_mdearn_t0_men cz_evc* cz_pass_t0_rblack_amount ///
		cz_pass_t0_rhisp_amount cz_pass_t0_roth_amount ///
		cz_pass_t0_rasian_amount cz_pass_t0_raian_amount ///
		cz_pass_t0_rwhite_amount cz_pass_t0_smale_amount ///
		cz_pass_t0_sfemale_amount cz_pass_t1_rblack_amount ///
		cz_pass_t1_rhisp_amount cz_pass_t1_roth_amount ///
		cz_pass_t1_rasian_amount cz_pass_t1_raian_amount ///
		cz_pass_t1_rwhite_amount cz_pass_t1_smale_amount ///
		cz_pass_t1_sfemale_amount cz_pass_t2_all_amount ///
		cz_pass_t2_rblack_amount cz_pass_t2_rhisp_amount ///
		cz_pass_t2_roth_amount cz_pass_t2_rasian_amount ///
		cz_pass_t2_raian_amount cz_pass_t2_rwhite_amount ///
		cz_pass_t2_smale_amount cz_pass_t2_sfemale_amount ///
		cz_pass_t3_rblack_amount cz_pass_t3_rhisp_amount ///
		cz_pass_t3_roth_amount cz_pass_t3_rasian_amount ///
		cz_pass_t3_raian_amount cz_pass_t3_rwhite_amount ///
		cz_pass_t3_smale_amount cz_pass_t3_sfemale_amount ///
		cz_pass_t0_smale_amount_disag cz_pass_t0_smale_amount_overal ///
		cz_pass_t0_sfemale_amount_disag cz_pass_t0_sfemale_amount_overal ///
		cz_pass_t1_smale_amount_disag cz_pass_t1_smale_amount_overal ///
		cz_pass_t1_sfemale_amount_disag cz_pass_t1_sfemale_amount_overal ///
		cz_pass_t2_smale_amount_disag cz_pass_t2_smale_amount_overal ///
		cz_pass_t2_sfemale_amount_disag cz_pass_t2_sfemale_amount_overal ///
		cz_pass_t3_smale_amount_disag cz_pass_t3_smale_amount_overal ///
		cz_pass_t3_sfemale_amount_disag cz_pass_t3_sfemale_amount_overal ///
		c150_men c150_women cz_mdearn cz_nearn* cz_p60earnpos ///
		cz_mdearn_t0 cz_mdearn_t0_inc*
	
	save "$master\roi_clean.dta", replace

*------------------------------------------------------------------------------|
**# [9] Produce files for APL

	preserve
		keep 	unitid instnm city stabbr st_fips latitude longitude preddeg ///
				control sector4 hbcu pbi annhi tribal aanapii hsi nanti ///
				adm_rate pctpell ugds* pcipstem pcip13ed pcip51health pcip52bus ///
				c150 md_earn_wne_p10 commuting_zone cz_t0_all cz_t1_all ///
				cz_t3_all cz_pass_t0_all_amount cz_pass_t1_all_amount ///
				cz_pass_t3_all_amount e_r_12mo
		
		gen msi = 0
		replace msi = 1 if hbcu == 1
		replace msi = 2 if pbi == 1
		replace msi = 3 if annhi == 1
		replace msi = 4 if tribal == 1
		replace msi = 5 if aanapii == 1
		replace msi = 6 if hsi == 1
		replace msi = 7 if nanti == 1
		
		lab define msi_lab 0 "Non-MSI" 1 "HBCU" 2 "PBI" 3 "ANNHI" 4 "TCU" ///
							5 "AANAPII" 6 "HSI" 7 "NANTI"
		lab val msi msi_lab
		
		drop if unitid == .
		drop hbcu pbi annhi tribal aanapii hsi nanti
		order sector4 msi, after(control)
		
		save "$master\roi_institution_apl.dta", replace
		export delimited "$master\roi_institution_apl.csv", replace 
	restore 
		
	preserve 
		keep	out10 commuting_zone cz_tot_pop cz_ed_attain_denom ///
				cz_childpov_denom cz_unemploy_denom cz_industry_denom ///
				pct_cz_nonhisp_white pct_cz_nonhisp_black pct_cz_nonhisp_aian ///
				pct_cz_nonhisp_asian pct_cz_nonhisp_nhpi pct_cz_nonhisp_other ///
				pct_cz_nonhisp_twoplus pct_cz_hisp_tot ///
				pct_cz_ed_attain_lessthanhs pct_cz_ed_attain_hs ///
				pct_cz_ed_attain_somecoll pct_cz_ed_attain_assoc ///
				pct_cz_ed_attain_bach pct_cz_ed_attain_grad pct_cz_povunder18 ///
				pct_cz_unemploy pct_cz_rural_pop pct_cz_industry_agri ///
				pct_cz_industry_construct pct_cz_industry_manufacture ///
				pct_cz_industry_wholesale pct_cz_industry_retail ///
				pct_cz_industry_transpo pct_cz_industry_information ///
				pct_cz_industry_finance pct_cz_industry_professional ///
				pct_cz_industry_educational pct_cz_industry_arts ///
				pct_cz_industry_otherserv pct_cz_industry_publicadmin ///
				cz_pct_t0_sum_n cz_pct_t1_sum_n cz_pct_t3_sum_n
		
		order commuting_zone out10
		sort out10
		
		foreach x in cz_pct_t0_sum_n cz_pct_t1_sum_n cz_pct_t3_sum_n {
			egen t_`x' = max(`x'), by(out10)
			drop `x'
			rename t_`x' `x'
		}
		
		duplicates drop out10, force
		drop out10
		
		xtile cz_tot_pop_cat = cz_tot_pop, n(5)
		lab define cz_tot_pop_cat_lab 1 "Smallest CZ Population (< 38k)" ///
			2 "Small CZ Population (38k-102k)" 3 "Medium CZ Population (102k-207k)" ///
			4 "Large CZ Population (207k-538k)" 5 "Largest CZ Population (> 538k)"
		lab val cz_tot_pop_cat cz_tot_pop_cat_lab
		
		save "$master\roi_cz_apl.dta", replace
		export delimited "$master\roi_cz_apl.csv", replace 
	restore


