/*
Title: Combine IPEDS and Scorecard data for EVE analysis
Author: Jared Colston
Date Created: 9.2.2024
*/


*------------------------------------------------------------------------------|
**# [i] Package dependencies

	/*
		ssc install fs
	*/
	
*------------------------------------------------------------------------------|
**# [ii] File pathways

	global project "T:\projects_current\roi_gates"									// Use project folder as main filepath
	global raw "$project\analysis\01_raw"
	global temp "$project\analysis\03_temp"
	global master "$project\analysis\04_master"

*------------------------------------------------------------------------------|
**# [iii] Notes



*------------------------------------------------------------------------------|
**# [1] Get all IPEDS data used in EVE, make transformed vars and store

	cd "$raw"

// Institutional Characteristics-------------------------------------
	*copy "https://nces.ed.gov/ipeds/datacenter/data/HD2021.zip" . , replace
	*unzipfile HD2021.zip, replace
	import delimited hd2021.csv, varn(1) clear
	keep unitid countycd
	tempfile hd2021 
	save `hd2021'

// Program year charges (for cert programs etc.)---------------------
	*copy "https://nces.ed.gov/ipeds/datacenter/data/IC2021_PY.zip" . , replace
	*unzipfile IC2021_PY.zip, replace
	import delimited ic2021_py.csv, varn(1) clear
	gen charges_survey = "PY"
	rename chg*py* chg*ay*
	tempfile ic2021_py
	save `ic2021_py'
	
// Academic year charges (for in-district tuition)-------------------
	*copy "https://nces.ed.gov/ipeds/datacenter/data/IC2021_AY.zip" . , replace
	*unzipfile IC2021_AY.zip, replace
	import delimited ic2021_ay.csv, varn(1) clear
	append using `ic2021_py'
	
	gen totprice_oc = chg1ay3 + chg4ay3 + chg5ay3 + chg6ay3 						// includes on campus tuition&fees, books&supplies, room&board, oth expenses 
	gen totprice_wf = chg1ay3 + chg4ay3 + chg9ay3 									// includes w/family, tuition&fees, books&supplies, wf oth expenses
	gen totprice_nwf = chg1ay3 + chg4ay3 + chg7ay3 + chg8ay3 						// includes not w/family, tuition&fees, books&supplies, nwf room&board, nwf oth expenses

	gen totprice_oc_free = chg4ay3 + chg5ay3 + chg6ay3 								// includes books&supplies, room&board, oth expenses
	gen totprice_wf_free = chg4ay3 + chg9ay3 										// includes w/family, books&supplies, wf oth expenses
	gen totprice_nwf_free = chg4ay3 + chg7ay3 + chg8ay3 							// includes not w/family, books&supplies, nwf room&board, nwf oth expenses

	gen tuition = chg1ay3
	
		replace charges_survey = "AY" if charges_survey == ""
		encode charges_survey, gen(survey_num)
		twoway scatter totprice_wf totprice_nwf, colorvar(survey_num) ///
			colordiscrete coloruseplegend colorfillonly mlcolor(gs10)				// Check differences in price between PY and AY
	
	keep unitid totprice* tuition
	
	tempfile ic2021_ay
	save `ic2021_ay'
	
// Student financial aid---------------------------------------------
	*copy "https://nces.ed.gov/ipeds/datacenter/data/SFA2021.zip" . , replace
	*unzipfile SFA2021.zip, replace
	import delimited sfa2021.csv, varn(1) clear
	
	gen aga_ftft = agrnt_t / scugffn 
	gen apa_ftft_pell = pgrnt_t / scugffn
	egen dp_agrnt_t = rowtotal(pgrnt_t agrnt_t), missing
	gen aga_ftft_pell = dp_agrnt_t / scugffn
	gen aga_ftft_pell_alt = aga_ftft + apa_ftft_pell
	
	egen totp = rowtotal(giston2 gistwf2 gistof2), missing  						// publics 
	foreach x in giston2 gistwf2 gistof2 { 
		replace `x' = 0 if `x' == . & totp != . 
	} 

	gen p_on = giston2 / totp 
	gen p_off = 1 - p_on 
	gen p_owf = gistwf2 / totp 
	gen p_onwf = gistof2 / totp 

	egen tot = rowtotal(grnton2 grntwf2 grntof2), missing  							// private, fp 

	foreach x in grnton2 grntwf2 grntof2 { 
		replace `x' = 0 if `x' == . & tot != . 
	} 
	
	replace aga_ftft_pell = aga_ftft if pgrnt_t == .
	replace p_on = grnton2/tot if p_on == . 
	replace p_off = 1-p_on if p_off == .
	replace p_owf = grntwf2/tot if p_owf == .
	replace p_onwf = grntof2/tot if p_onwf == . 
	
	keep unitid aga_* apa_* p_on p_off p_owf p_onwf 
	
	tempfile sfa2021
	save `sfa2021'
	
// Graduation rates - standard---------------------------------------
	forvalues i = 2020/2021 {														// Pool 2018/2019 cohorts for low sample sizes
		*copy "https://nces.ed.gov/ipeds/datacenter/data/GR`i'.zip" . , replace
		*unzipfile GR`i'.zip, replace
		import delimited gr`i'.csv, varn(1) clear
		keep unitid grtype grtotl* gr*t   											// number and type (see below) of completers

		keep if inlist(grtype,2,3,8,9,13,14,15,29,30,35)  
		
		gen grasiant = grasiat + grnhpit 											// modify to 5 race categories
		gen grotht = gr2mort + grunknt + grnralt 

		drop grasiat grnhpit gr2mort grunknt grnralt 

		rename grwhitt grwhitet 
		rename grbkaat grblackt
		gen ipedsyear = `i' 

		if `i' == 2020 {
			tempfile gr`i' 
			save `gr`i'' 
		}
	}
	
	append using `gr2020' 

	collapse (sum) grtotl* graiant-grotht, by(unitid grtype) 						// unfortunately sum of all missing evaluates to zero

	gen totcheck = grtotlm + grtotlw 

	local vars "grtotlm grtotlw graiant-grotht" 

	foreach v in 	grtotlm grtotlw grblackt grhispt grwhitet grasiant ///
					grotht graiant { 
		replace `v' = . if totcheck	!= grtotlt 
	} 

	drop totcheck 

	reshape wide grtotl* grblackt grhispt grwhitet grasiant grotht graiant, i(unitid) j(grtype)
	
	/* Bach or equiv. 
		grtype 2 = 4 year, Adjusted cohorts
		grtype 3 = 4 year, Completers w/in 150% time
		grtype 8 = BA subcohort adjusted cohort
		grtype 9 = completers w/in 150% of normal time 
		grtype 13 = completers w/in 4 years for BA
		grtype 14 = addtl completers w/in 5 years for BA
		grtype 15 = addtl completers w/in 6 tears for BA 
	*/

	local rsnamest = "totlt totlm totlw whitet blackt hispt asiant otht aiant" 
	
	foreach r of local rsnamest { 
		replace gr`r'3 = 0 if gr`r'3 == . & gr`r'2 != . 							// Overall 150% grad rate (not degree specific)
		gen c150_4_`r' = gr`r'3 / gr`r'2  											// 4 yr school grad rate 
		gen d150_4_`r' = gr`r'2  													// denominator for 4 yr grad rate 
		gen _completers_`r' = gr`r'3 												// TTD estimate for subcohort grad rate for BA entrants 
																						// if cohort is not missing, missing completers means zero completers 
		replace gr`r'13 = 0 if gr`r'13 == . & gr`r'8 != . 
		replace gr`r'14 = 0 if gr`r'14 == . & gr`r'8 != . 
		replace gr`r'15 = 0 if gr`r'15 == . & gr`r'8 != . 

		egen bacomps_6_`r' = rowtotal(gr`r'13 gr`r'14 gr`r'15), missing 			// ba completers over 6 years 
		gen ttd_ba_`r' =	(gr`r'13 / bacomps_6_`r') * 4 + ///
							(gr`r'14 / bacomps_6_`r') * 5 + ///
							(gr`r'15 / bacomps_6_`r') * 6  
		replace ttd_ba_`r' = 6 if ttd_ba_`r' == . & gr`r'8 != . 					// max of 6 if no completers in 150 % time 

		/* 2-year schools
			grtype 
			29: degree/cert-seeking adjusted cohort 2-yr institutions
			30: completers w/in 150 of normal time 2-yr
			31+32 = 30  150% completers split into <2 and 2-<4 programs (not used here)
			35: completers w/in 100
			36+37 = 35  100% completers split into <2 and 2-<4 programs (not used here)
		*/
		
		replace gr`r'30 = 0 if gr`r'30 == . & gr`r'29 != . 							// if cohort is not missing, missing completers means zero completers
		replace gr`r'35 = 0 if gr`r'35 == . & gr`r'29 != .  


		gen c150_2_`r' = gr`r'30 / gr`r'29 
		replace _completers_`r' = gr`r'30 if _completers_`r' == . & gr`r'30 != .
		gen d150_2_`r' = gr`r'29 

		gen ttd_2yr_`r' = (gr`r'35 / gr`r'30) * 2 + ((gr`r'30 - gr`r'35) / gr`r'30) * 3 
		gen twoyr_cohort_`r' = gr`r'30 

		replace ttd_2yr_`r' = 3 if ttd_2yr_`r' == . & gr`r'29 != .  				// max of 3 if no completers in 150 % time
	} 

	keep unitid ttd* *cohort* c150* bacomps* _completers* twoyr_*

	foreach var of local rsnamest {
		gen c150_`var' = c150_4_`var' if c150_4_`var' != .
		replace c150_`var' = c150_2_`var' if c150_2_`var' != .
	}
	foreach var in totlm totlw whitet blackt hispt asiant otht aiant {
		replace _completers_`var' = _completers_`var' / _completers_totlt
	}

	drop c150_2_* c150_4_*

	rename c150_totlm c150_men
	rename c150_totlw c150_women
	rename c150_whitet c150_white
	rename c150_blackt c150_black
	rename c150_hispt c150_hisp 
	rename c150_asiant c150_asian
	rename c150_aiant c150_aian

	tempfile gr_temp
	save `gr_temp'

// Graduation rates - Pell-------------------------------------------
	*copy "https://nces.ed.gov/ipeds/datacenter/data/GR2020_PELL_SSL.zip" . , replace
	*unzipfile GR2020_PELL_SSL.zip, replace
	import delimited gr2020_pell_ssl.csv, varn(1) clear
	
	/* Pell graduation rates
		psgrtype 
		1: completers within 150 percent of normal time 
		pgrevct - revised cohort
		pgadjct - adjusted cohort (minus exclusions)
		pgcmtot - pell completers
		sscmtot - sub staff but no pell completers
		nrcmtot - no sub loan or pell completers 
	*/

	keep if inlist(psgrtype, 1, 4)													// if cohort is not missing, missing completers means zero completers 
	keep unitid pgrevct pgadjct pgcmtot sscmtot nrcmtot ssadjct nradjct 
	gen non_pell = sscmtot + nrcmtot 
	gen c150_pell = pgcmtot / pgadjct 
	gen c150_nonpell = non_pell / (ssadjct + nradjct) 
	gen _completers_nonpell = non_pell / (non_pell + pgcmtot)
	gen _completers_pell = pgcmtot / (non_pell + pgcmtot)

	keep unitid _completers_* c150_*
	
	tempfile gr_pell_temp
	save `gr_pell_temp'

// Graduation rates - Less than 2-year-------------------------------
	forvalues i = 2020/2021 {
		*copy "https://nces.ed.gov/ipeds/datacenter/data/GR`i'_L2.zip" . , replace
		*unzipfile GR`i'_L2.zip, replace
		import delimited gr`i'_l2.csv, varn(1) clear
		gen ipedsyear = `i' 

		if `i'==2020 { 
			tempfile gr`i'_l2 
			save `gr`i'_l2' 
		} 
	} 

	append using `gr2020_l2'   														// 2019: 2013 and 2016 cohorts

	sort unitid ipedsyear 
	keep unitid line_50 line_55 line_11 ipedsyear   								// adj. cohort, 100 and 150% time completers, respectively

	collapse (sum) line_*, by(unitid) 

	gen ttd_l2 = 1 if line_50 > 0 & line_50~=. 
	gen l2_cohort = line_11  														// 150% completers 
	gen c150_l2 = line_11/line_50 													// no missing values here
	gen c150_totlt = line_11/line_50

	keep unitid ttd* l2_cohort c150* 
	
	tempfile gr_l2_temp
	save `gr_l2_temp'
	
// 12-month enrollment-----------------------------------------------
	*copy "https://nces.ed.gov/ipeds/datacenter/data/EFFY2021.zip" . , replace
	*unzipfile EFFY2021.zip, replace
	import delimited effy2021.csv, varn(1) clear
	capture drop x* 
	keep if effylev == 2 															// undergrads only

	local races "aian asia bkaa hisp nhpi whit 2mor unkn nral" 

	gen efy_men = efytotlm / efytotlt 
	gen efy_women = efytotlw / efytotlt 

	foreach r of local races { 
		gen efy_`r' = efy`r't / efytotlt 
	} 

	replace efy_asia = efy_asia + efy_nhpi 
	gen efy_oth = efy_2mor + efy_unkn + efy_nral 

	rename efy_whit efy_white 
	rename efy_bkaa efy_black  
	rename efy_asia efy_asian  

	keep 	unitid efytotlt efy_men efy_women efy_white efy_black ///
			efy_hisp efy_asian efy_aian efy_oth 
	sort unitid 
	
	tempfile effy2021
	save `effy2021'
	
// Fall enrollment in exclusively distance ed------------------------
	*copy "https://nces.ed.gov/ipeds/datacenter/data/EF2021A_DIST.zip" . , replace 
	*unzipfile EF2021A_DIST.zip, replace
	import delimited ef2021a_dist.csv, varn(1) clear
	keep if efdelev == 2															// Only undergraduates
	egen distexcl_outstate = rowtotal(efdeex2 efdeex3 efdeex4 efdeex5), missing
	gen pct_distexcl_outstate = (distexcl_outstate / efdetot)
	replace pct_distexcl_outstate = 0 if pct_distexcl_outstate == .					// Cannot rule on missings
	keep unitid pct_distexcl_outstate
	tempfile ef2021a_dist
	save `ef2021a_dist'

// Completions by subgroup-------------------------------------------
	*copy "https://nces.ed.gov/ipeds/datacenter/data/C2021_C.zip" . , replace
	*unzipfile C2021_C.zip, replace
	import delimited c2021_c.csv, varn(1) clear
	drop x* 
	drop if inlist(awlevel,7,9,10)  												// drop grad awards (mast, doct, grad certs) 
	drop csund18-csunkn 

	collapse (sum) cs*, by(unitid) 

	gen cs_asian = csasiat + csnhpit 
	gen cs_oth =  cs2mort + csunknt + csnralt 
	gen cs_white = cswhitt 
	gen cs_black = csbkaat  
	gen cs_hisp = cshispt  
	gen cs_aian = csasiat  

	gen cs_men = cstotlm 
	gen cs_women = cstotlw 
	gen cs_tot = cstotlt 

	keep unitid cs_* 
	tempfile c2021 
	save `c2021'
	
// E&R Expenditures--------------------------------------------------
	*copy "https://nces.ed.gov/ipeds/datacenter/data/F2021_F1A.zip" . , replace
		*unzipfile F2021_F1A.zip, replace
	*copy "https://nces.ed.gov/ipeds/datacenter/data/F2021_F2.zip" . , replace
		*unzipfile F2021_F2.zip, replace
	*copy "https://nces.ed.gov/ipeds/datacenter/data/F2021_F3.zip" . , replace
		*unzipfile F2021_F3.zip, replace
		
	import delimited f2021_f1a.csv, varn(1) clear 
		tempfile f2021_f1a 
		save `f2021_f1a'
	import delimited f2021_f2.csv, varn(1) clear
		tempfile f2021_f2 
		save `f2021_f2'
	import delimited f2021_f3.csv, varn(1) clear
	
	append using `f2021_f1a', force
	append using `f2021_f2', force
	sort unitid 
		
	gen instruct = f1c011
		replace instruct = f2e011 if (instruct == . & f2e011 != .)
		replace instruct = f3e011 if (instruct == . & f3e011 != .)
	gen stud_serv = f1c061
		replace stud_serv = f2e051 if (stud_serv == . & f2e051 != .)
		replace stud_serv = f3e03b1 if (stud_serv == . & f3e03b1 != .)
	gen research = f1c021
		replace research = f2e021 if (research == . & f2e021 != .)
		replace research = f3e02a1 if (research == . & f3e02a1 != .)
	gen pub_serv = f1c031
		replace pub_serv = f2e031 if (pub_serv == . & f2e031 != .)
		replace pub_serv = f3e02b1 if (pub_serv == . & f3e02b1 != .)
	gen inst_supp = f1c071
		replace inst_supp = f2e061 if (inst_supp == . & f2e061 != .)
		replace inst_supp = f3e03c1 if (inst_supp == . & f3e03c1 != .)
	gen acad_supp = f1c051
		replace acad_supp = f2e041 if (acad_supp == . & f2e041 != .)
		replace acad_supp = f3e03a1 if (acad_supp == . & f3e03a1 != .)
		
	gen educ_share = (instruct + stud_serv) / (instruct + stud_serv + research + pub_serv)
	gen e_r_exp = instruct + stud_serv + (educ_share*(acad_supp + inst_supp))
	
	keep unitid e_r_exp
	format e_r_exp %15.2fc
	tempfile e_r_exp 
	save `e_r_exp'
	
*------------------------------------------------------------------------------|
**# [2] Get scorecard data and create institutional sample

	*copy "https://ed-public-download.app.cloud.gov/downloads/College_Scorecard_Raw_Data_06102024.zip" . , replace
	*unzipfile College_Scorecard_Raw_Data_06102024.zip, replace 
	import delimited Most-Recent-Cohorts-Institution.csv, varn(1) clear
	keep unitid latitude longitude hbcu-relaffil d150_4_pooled d150_l4_pooled ///
			d150_4_pell d150_l4_pell faminc	md_faminc								// Get vars that are empty in old data
	tempfile mr_sc
	save `mr_sc'
	
	import delimited MERGED2021_22_PP.csv, varn(1) clear 
	keep 	unitid-adm_rate_all sat_avg-pcip54 ugds-ugds_unkn ///
			npt4_pub-npt4_other prgmofr pctpell d150_4 d150_l4 ///
			d150_4_pooled d150_l4_pooled cdr* c150*pell ///
			d150_4_pell d150_l4_pell inexpfte distanceonly openadmp
			
	replace adm_rate = "1" if openadmp == "1"
	drop openadmp
			
	drop locale-relaffil d150_4_pooled d150_l4_pooled ///
			d150_4_pell d150_l4_pell
			
	merge 1:1 unitid using `mr_sc'
	drop if _merge == 2																// Only keep the historical data
	drop _merge 																	// 194 institutions in historical and not in mry
	
/* Trim sample-------------------------------------------------------
		51 states + DC; 
		preddeg is not graduate;
		undergrad enrollment is positive
		exclusive online enrollment above 66% from outside state excluded
*/
	drop if distanceonly == "1"														// drops schools who identify as distance only

	merge 1:1 unitid using `ef2021a_dist'
	drop if _merge == 2
	drop _merge 
	drop if pct_distexcl_outstate > .66 & pct_distexcl_outstate != .				// 54 institutions with more than 2/3 ug enroll exclusively online from out of state
	drop pct_distexcl_outstate distanceonly												// only 3 public institutions, one of which is Purdue Global

	drop if region == 9 															// drops schools in "outlying areas" (PR, Pac Islands, etc.) 
	drop if inrange(preddeg,1,3) == 0 												// drops 0 (unclassified) and 4 (grad degrees only)

	ds *
	foreach x in `r(varlist)' {
		cap replace `x' = "" if `x' == "NA"
		cap replace `x' = "" if `x' == "PS"
		destring `x', replace
	}

	keep if !inlist(ugds,.,0) 														// drop schools with no undergrad enrollments
	gen pnonwhite = 1 - ugds_white 

	* drop "program reporters" (is this the best way?) OFF for now;
	gen progreporter = prgmofr != .													// prgmofr is always null for non-program reporters (rptmth==2-->prgmofr~="null")
	
// Transform Scorecard variables for analysis------------------------
	egen sector = group(preddeg control)
	
	gen msi = 0 
	foreach v of varlist hbcu-nanti { 
		replace msi = 1 if `v' == 1 
	} 
	*gen othMSI = msi == 1 & hbcu==0 

	gen religious = relaffil != .

	replace region = 2 if inlist(unitid,164155,197027,197036) 						// US Merchant Marine Academy, Naval Academy, and Military Academy
	replace region = 7 if unitid == 128328											// US Airforce Academy
	replace region = 1 if unitid == 130624											// US Coast Guard

/* Coarse 2-digit def of stem:
	Computer and Information Sciences and Support Services, Engineering, 
	Engineering Technologies and Engineering-Related Fields, Biological and 
	Biomedical Sciences, Mathematics and Statistics, Military Technologies and 
	Applied Sciences, Physical Sciences, Science Technologies/Technicians
*/

	egen pcipSTEM = rowtotal(pcip11 pcip14 pcip15 pcip26 pcip27 ///
							 pcip29 pcip40 pcip41) 									// comp/info, engr, bio, math, phys sci  
	rename (pcip51 pcip52 pcip13 pcip50 pcip39) ///
		   (pcip51Health pcip52Bus pcip13Ed pcip50Art pcip39Theology)

	gen netpricet4 = npt4_pub 
		replace netpricet4 = npt4_priv if netprice==. 
		replace netpricet4 = npt4_prog if netprice==. 
		replace netpricet4 = npt4_other if netprice==. 
	
	egen d150 = rowmax(d150_4 d150_l4) 
	egen d150_pooled = rowmax(d150_4_pooled d150_l4_pooled) 
	egen d150_pell = rowmax(d150_4_pell d150_l4_pell) 

	drop d150_4* d150_l4* ug npcurl npt4_*
	sort unitid
	
// Get fixed lat/lon for missing-------------------------------------
	gen miss_lat = 1 if lat == .
	preserve
		import delimited hd2021.csv, varn(1) clear
		keep unitid latitude longitud
		rename (longitud latitude) (long_new lat_new)
		gen miss_lat = 1
		tempfile miss_lat 
		save `miss_lat'
	restore 
	merge 1:1 unitid miss_lat using `miss_lat'
	drop if _merge == 2
	replace latitude = lat_new if miss_lat == 1
	replace longitude = long_new if miss_lat == 1
	drop miss_lat long_new lat_new _merge
	
	tempfile sc_chars
	save `sc_chars'
	
// Get earnings data from 2020-21 to replicate-----------------------
	import delimited MERGED2020_21_PP.csv, varn(1) clear							// 2019-20 earnings no longer available
	local earnvars "md_earn_wne_p* mn_earn_wne_p* pct25_earn_wne_p* pct75_earn_wne_p* md_earn_wne_inc*_p* count_wne_inc*_p* md_earn_wne_male0_p* count_wne_male*_p* md_earn_wne_male1_p* count_wne_p10" 

	keep unitid `earnvars'

	destring `earnvars', replace force 
	drop *p7 *p9 *p11
	sort unitid
	tempfile sc_earnings
	save `sc_earnings'

*------------------------------------------------------------------------------|
**# [3] Merge institutional data together
	
	use `sc_chars', clear															// This is the base of the sample 
	
	merge 1:1 unitid using `sc_earnings'
	drop if _merge == 2
	drop _merge
	
	merge 1:1 unitid using `effy2021'
	drop if _merge == 2
	drop _merge
	
	merge 1:1 unitid using `c2021'
	drop if _merge == 2
	drop _merge
	
	merge 1:1 unitid using `hd2021'
	drop if _merge == 2
	drop _merge 
	
	merge 1:1 unitid using `ic2021_ay'
	drop if _merge == 2
	drop _merge
	
	merge 1:1 unitid using `sfa2021'
	drop if _merge == 2
	drop _merge
	
	merge 1:1 unitid using `gr_temp'
	drop if _merge == 2
	drop _merge	
	
	merge 1:1 unitid using `gr_pell_temp'
	drop if _merge == 2
	drop _merge	
	
	merge 1:1 unitid using `gr_l2_temp'
	drop if _merge == 2
	drop _merge	
	
	merge 1:1 unitid using `e_r_exp'
	drop if _merge == 2
	drop _merge
	
*------------------------------------------------------------------------------|
**# [4] Adjust all cost estimates to 2022 dollars using CPI

	matrix CPIU = 	(177.0666667, 179.875, 183.9583333, 188.8833333, ///
					 195.2916667, 201.5916667, 207.3424167, 215.3025, ///
					 214.537, 218.0555, 224.9391667, 229.5939167, ///
					 232.9570833, 236.7361667, 237.017, 240.0071667, ///
					 245.1195833, 251.1068333, 255.657, 258.811, ///
					 270.970, 292.655) 
	gen d2122 = CPIU[1,22] / CPIU[1,21] 												// deflator to go from 2021 to 2022 dollars
	
	replace totprice_oc = totprice_oc * d2122
	replace totprice_wf = totprice_wf * d2122
	replace totprice_nwf = totprice_nwf * d2122

	replace totprice_oc_free = totprice_oc_free * d2122
	replace totprice_wf_free = totprice_wf_free * d2122
	replace totprice_nwf_free = totprice_nwf_free * d2122

	replace aga_ftft=aga_ftft * d2122 
	replace apa_ftft_pell=apa_ftft_pell * d2122
	
	replace e_r_exp = e_r_exp * d2122
	
	foreach v of varlist `earnvars' { 
		replace `v' = `v' * d2122
	} 

*------------------------------------------------------------------------------|
**# [5] Create transformed variables using combined files

**# [5.1] Total average yearly cost of attendance (for FTFT)

// Status quo & EVE COA----------------------------------------------
	
	gen aveprice = p_on*totprice_oc + p_off*totprice_nwf - aga_ftft 				// weighted 'net price' assigning all off-campus students the nwf price
	replace aveprice = totprice_oc - aga_ftft if p_on == 1 & aveprice == . 			// off campus prices are generally missing if p_on is 1 
	replace aveprice = totprice_nwf - aga_ftft if p_on == 0 & aveprice == . 		// on campus prices generally missing if p_on is 0 
	replace aveprice = totprice_nwf - aga_ftft if aveprice == . & totprice_nwf != . // logical imputation of aveprice (if charges missing for on campus, assume all are off campus) - recovers 168 schools where sfa data are missing 
	
// Double Pell COA---------------------------------------------------

	gen aveprice_pell = p_on * totprice_oc + p_off * totprice_nwf - aga_ftft_pell 	// weighted 'net price' assinging all off-campus students the nwf price and double pell
	replace aveprice_pell = totprice_oc - aga_ftft_pell if p_on == 1 & aveprice_pell == . // off campus prices are generally missing if p_on is 1 and double pell
	replace aveprice_pell = totprice_nwf - aga_ftft_pell if p_on == 0 & aveprice_pell == . // * on campus prices generally missing if p_on is 0 and double pell 
	replace aveprice_pell = totprice_nwf - aga_ftft_pell if aveprice_pell == . & totprice_nwf != . //logical imputation of aveprice (if charges missing for on campus, assume all are off campus) - recovers 168 schools where sfa data are missing 
	replace aveprice_pell = 0 if aveprice_pell < 0 
	replace aveprice_pell = aveprice if aveprice_pell == .

// First Dollar COA--------------------------------------------------

	gen aveprice_free = p_on * totprice_oc_free + p_off * totprice_nwf_free - aga_ftft // weighted 'net price' assinging all off-campus students the nwf price and first dollar free college 
	replace aveprice_free = totprice_oc_free - aga_ftft if p_on == 1 & aveprice_free == . // off campus prices are generally missing if p_on is 1 and double pell
	replace aveprice_free = totprice_nwf_free - aga_ftft if p_on == 0 & aveprice_free == . // on campus prices generally missing if p_on is 0 and double pell 
	replace aveprice_free = totprice_nwf_free - aga_ftft if aveprice_free == . & totprice_nwf_free != .
	replace aveprice_free = 0 if aveprice_free < 0 
	replace aveprice_free = aveprice if aveprice_free == .

// Last Dollar COA---------------------------------------------------

	gen aga_ld = tuition - aga_ftft
	replace aga_ld = 0 if aga_ld < 0
	gen aveprice_ld = p_on * totprice_oc + p_off * totprice_nwf - aga_ftft 			// weighted 'net price' assinging all off-campus students the nwf price 
	replace aveprice_ld = totprice_oc - aga_ftft if p_on == 1 & aveprice_ld == . 	// off campus prices are generally missing if p_on is 1 
	replace aveprice_ld = totprice_nwf - aga_ftft if p_on == 0 & aveprice_ld == . 	// on campus prices generally missing if p_on is 0 
	replace aveprice_ld = totprice_nwf - aga_ftft if aveprice_ld == . & totprice_nwf != .  // 35 real changes made
	replace aveprice_ld = aveprice_ld - aga_ld if aga_ld > 0 & aga_ld != . 			// subtracting out the additional grant aid received under a last dollar program 
	replace aveprice_ld = 0 if aveprice_ld < 0 										// cannot let net price be less than zero - this does not affect any institutions
	replace aveprice_ld = aveprice if aveprice_ld == . 								// 0 real changes made 

**# [5.2] Total cost of attendance (Yearly Ave Price x TTD) estimate

* Define TTD based on predominant degree and impute = 125% time 
* where aveprice is nonmissing 

	local rsnamest = "totlt totlm totlw whitet blackt hispt asiant otht aiant" 

	foreach r of local rsnamest { 
		* imputation flag "I" means components (GR) of TTD missing, but aveprice is nonmissing. "D" for small cohorts (<25)
		* ttd is imputed for less than 2 year schools so has flag "L" whenever ave price is non missing  
		gen i_ttd_`r' = "" 

		gen ttd_`r' = ttd_ba_`r' if preddeg == 3 
		replace i_ttd_`r' = i_ttd_`r' + "D" if preddeg == 3 & bacomps_6_`r' < 25 
		replace i_ttd_`r' = i_ttd_`r' + "I" if preddeg == 3 & ttd_`r' == . & aveprice !=. 
		replace ttd_`r' = 5 if preddeg == 3 & ttd_`r' == . & aveprice != . 


		replace ttd_`r' = ttd_2yr_`r' if preddeg == 2 
		replace i_ttd_`r' = i_ttd_`r' + "D" if preddeg == 2 & twoyr_cohort_`r' < 25 
		replace i_ttd_`r' = i_ttd_`r' + "I" if preddeg == 2 & ttd_`r' == . & aveprice != . 
		replace ttd_`r' = 2.5 if preddeg == 2 & ttd_`r' == . & aveprice != . 

		replace ttd_`r' = ttd_l2 if preddeg == 1  
		replace i_ttd_`r' = i_ttd_`r' + "L" if preddeg == 1 & aveprice != . 		// TTD assumed to be 1 for all institutions with non missing ave price - no subgroup data available 
		replace ttd_`r' = 1 if preddeg == 1 & ttd_`r' == . & aveprice != . 
	} 

	gen totprice = ttd_totlt * aveprice 
	gen totprice_pell = ttd_totlt * aveprice_pell 
	gen totprice_free = ttd_totlt * aveprice_free
	gen totprice_ld = ttd_totlt * aveprice_ld

* Generate total price annual payments, assuming financed over 10-20 years at DUL interest rate ;
	local aru = .0373 																// annual interest rate undergrad DUL for AY2020-2021  

	local r = `aru' / 12 															// monthly interest

	forvalues T=10(5)20 { 															// T payment period (years)
		local p = `T'*12 
		gen adebtpay_ug`T' = 12 * (totprice*((`r'*(1+`r')^`p')/(((1+`r')^`p')-1))) 
		gen adebtpay_ug`T'_pell = 12 * (totprice_pell*((`r'*(1+`r')^`p')/(((1+`r')^`p')-1))) 
		gen adebtpay_ug`T'_free = 12 * (totprice_free*((`r'*(1+`r')^`p')/(((1+`r')^`p')-1))) 
		gen adebtpay_ug`T'_ld = 12 * (totprice_ld*((`r'*(1+`r')^`p')/(((1+`r')^`p')-1))) 
	} 

	foreach r of local rsnamest { 
		* imputation flag "I" means components (GR) of TTD missing, but aveprice is nonmissing. "D" for small cohorts (<25) ;
		* ttd is imputed for less than 2 year schools so has flag "L" whenever ave price is non missing  
		replace ttd_`r' = . if i_ttd_`r' !="" 
		label variable ttd_`r' "See notes"
		notes ttd_`r': "Estimated average time to credential among students who complete. Derived from IPEDS-GR 2020 and 2021 surveys."
		label variable i_ttd_`r' "See notes"
		notes i_ttd_`r': "Imputation flag I means components (GR) of TTD missing, but ave price is nonmissing. D is for small cohorts (<25). TTD is imputed for less than 2 year schools so has flag L whenever ave price is non missing."
	} 	
	
// E&R Expenditures per enrollment-----------------------------------
	gen e_r_12mo = e_r_exp / efytotlt

*------------------------------------------------------------------------------|
**# [6] Label values & save temp file 

	label define seclbl 1 "Pub <2 Yr" 2 "Priv <2 Yr" 3 "Prop <2 Yr" ///
						4 "Pub 2 Yr" 5 "Priv 2 Yr" 6 "Prop 2 Yr" ///
						7 "Pub 4+ Yr" 8 "Priv 4+ Yr"  9 "Prop 4+ Yr" 
	label values sector seclbl 

	label define controllbl 1 "Public" 2 "Private" 3 "Proprietary" 
	label values control controllbl 

	label define deglbl 1 "Predominantly Certificates" ///
						2 "Predominantly Associates" 3 "Predominantly Bachelors" 
	label values preddeg deglbl 
	
	save "$temp\scorecard_ipeds.dta", replace

*------------------------------------------------------------------------------|
**# [7] Clean up raw file folder

	fs *.zip 
	foreach file in `r(files)' {
		rm `file'
	}
	
	fs *_rv.csv
	foreach file in `r(files)' {
		rm `file'
	}
	
	
	

	