/*
Purpose: Get IPUMS earnings by state and PUMA, total and by subgroups
Author: Jared Colston
Date Created: 8.23.24
*/


*------------------------------------------------------------------------------|
**# [i] Package dependencies

	/*
	
	*/
	
*------------------------------------------------------------------------------|
**# [ii] File pathways

	global project "T:\projects_current\roi_gates"									// Use project folder as main filepath
	global raw "$project\analysis\01_raw"
	global temp "$project\analysis\03_temp"
	global master "$project\analysis\04_master"

*------------------------------------------------------------------------------|
**# [iii] Notes



*------------------------------------------------------------------------------|
**# [1] Import and transform IPUMS data

	cd "$raw"

	clear
	quietly infix                 ///
	  int     year       1-4      ///
	  int     multyear   5-8      ///
	  long    sample     9-14     ///
	  double  serial     15-22    ///
	  double  cbserial   23-35    ///
	  double  hhwt       36-45    ///
	  double  cluster    46-58    ///
	  double  strata     59-70    ///
	  byte    gq         71-71    ///
	  byte    statefip   72-73    ///
	  int     countyicp  74-77    ///
	  int     countyfip  78-80    ///
	  long    puma       81-85    ///
	  int     cpuma0010  86-89    ///
	  byte    metro      90-90    ///
	  int     pernum     91-94    ///
	  double  perwt      95-104   ///
	  byte    sex        105-105  ///
	  int     age        106-108  ///
	  byte    race       109-109  ///
	  int     raced      110-112  ///
	  byte    hispan     113-113  ///
	  int     hispand    114-116  ///
	  byte    school     117-117  ///
	  byte    educ       118-119  ///
	  int     educd      120-122  ///
	  byte    degfield   123-124  ///
	  int     degfieldd  125-128  ///
	  long    inctot     129-135  ///
	  long    incwage    136-141  ///
	  long    incearn    142-148  ///
	  using `"usa_00009.dat"'														// Restricts by age and school in extract
																					// Extract from usa.ipums.org

	replace hhwt      = hhwt      / 100
	replace perwt     = perwt     / 100

	format serial    %8.0f
	format cbserial  %13.0f
	format hhwt      %10.2f
	format cluster   %13.0f
	format strata    %12.0f
	format perwt     %10.2f
	
*------------------------------------------------------------------------------|
**# [2] Trim sample and create group/transformed variables 
	
	keep if sample == 202103														// 2017-2021 ACS 5-year estimates
	
	gen female = sex == 2 															
	gen metarea = inlist(metro,2,3,4) == 1 

	gen HS = inlist(educd,63,64) 													// HS degree and GED combined
	gen geSC = inrange(educd,65,116) 												// Any college experience
	gen AA = educd== 81  															// Assoc degree
	gen BA = educd== 101  															// Bach degree
	
	gen racecat6 = 	1 * (race==1 & hispan==0) + ///
					2 * (race==2 & hispan==0) + ///
					3 * (hispan~=0) + ///
					4 * (inlist(race,4,5,6) & hispan==0) + ///
					5 * (race==3 & hispan==0) + ///
					6 * (inlist(race,7,8,9) & hispan==0) 
					
	gen earnpos = incearn if incearn > 0 & incearn != .  							// measure positive earnings only
	
	gen str_puma = string(puma, "%05.0f")
	gen str_stfips = string(statefip, "%02.0f")
	
** generate dash "-"
** generate "g_id" using serial, dash, pernum
					
	keep 	statefip countyicp countyfip puma cpuma0010 perwt degfield ///
			degfieldd female-str_stfips
					
*------------------------------------------------------------------------------|
**# [3] Label vars and values 
								
	lab define racecatlbl6 	1 "White NH" 2 "Black NH" 3 "Hispanic" ///
							4 "Asian NH" 5 "AIAN" 6 "Mixed, or Other Race"   
	label values racecat6 racecatlbl6
	
	label var statefip  `"State (FIPS code)"'
	label var countyicp `"County (ICPSR code, identifiable counties only)"'
	label var countyfip `"County (FIPS code, identifiable counties only)"'
	label var puma      `"Public Use Microdata Area"'
	label var cpuma0010 `"Consistent PUMA, 2000-2010"'
	label var perwt     `"Person weight"'
	label var degfield  `"Field of degree [general version]"'
	label var degfieldd `"Field of degree [detailed version]"'

	label define statefip_lbl 01 `"Alabama"'
	label define statefip_lbl 02 `"Alaska"', add
	label define statefip_lbl 04 `"Arizona"', add
	label define statefip_lbl 05 `"Arkansas"', add
	label define statefip_lbl 06 `"California"', add
	label define statefip_lbl 08 `"Colorado"', add
	label define statefip_lbl 09 `"Connecticut"', add
	label define statefip_lbl 10 `"Delaware"', add
	label define statefip_lbl 11 `"District of Columbia"', add
	label define statefip_lbl 12 `"Florida"', add
	label define statefip_lbl 13 `"Georgia"', add
	label define statefip_lbl 15 `"Hawaii"', add
	label define statefip_lbl 16 `"Idaho"', add
	label define statefip_lbl 17 `"Illinois"', add
	label define statefip_lbl 18 `"Indiana"', add
	label define statefip_lbl 19 `"Iowa"', add
	label define statefip_lbl 20 `"Kansas"', add
	label define statefip_lbl 21 `"Kentucky"', add
	label define statefip_lbl 22 `"Louisiana"', add
	label define statefip_lbl 23 `"Maine"', add
	label define statefip_lbl 24 `"Maryland"', add
	label define statefip_lbl 25 `"Massachusetts"', add
	label define statefip_lbl 26 `"Michigan"', add
	label define statefip_lbl 27 `"Minnesota"', add
	label define statefip_lbl 28 `"Mississippi"', add
	label define statefip_lbl 29 `"Missouri"', add
	label define statefip_lbl 30 `"Montana"', add
	label define statefip_lbl 31 `"Nebraska"', add
	label define statefip_lbl 32 `"Nevada"', add
	label define statefip_lbl 33 `"New Hampshire"', add
	label define statefip_lbl 34 `"New Jersey"', add
	label define statefip_lbl 35 `"New Mexico"', add
	label define statefip_lbl 36 `"New York"', add
	label define statefip_lbl 37 `"North Carolina"', add
	label define statefip_lbl 38 `"North Dakota"', add
	label define statefip_lbl 39 `"Ohio"', add
	label define statefip_lbl 40 `"Oklahoma"', add
	label define statefip_lbl 41 `"Oregon"', add
	label define statefip_lbl 42 `"Pennsylvania"', add
	label define statefip_lbl 44 `"Rhode Island"', add
	label define statefip_lbl 45 `"South Carolina"', add
	label define statefip_lbl 46 `"South Dakota"', add
	label define statefip_lbl 47 `"Tennessee"', add
	label define statefip_lbl 48 `"Texas"', add
	label define statefip_lbl 49 `"Utah"', add
	label define statefip_lbl 50 `"Vermont"', add
	label define statefip_lbl 51 `"Virginia"', add
	label define statefip_lbl 53 `"Washington"', add
	label define statefip_lbl 54 `"West Virginia"', add
	label define statefip_lbl 55 `"Wisconsin"', add
	label define statefip_lbl 56 `"Wyoming"', add
	label define statefip_lbl 61 `"Maine-New Hampshire-Vermont"', add
	label define statefip_lbl 62 `"Massachusetts-Rhode Island"', add
	label define statefip_lbl 63 `"Minnesota-Iowa-Missouri-Kansas-Nebraska-S.Dakota-N.Dakota"', add
	label define statefip_lbl 64 `"Maryland-Delaware"', add
	label define statefip_lbl 65 `"Montana-Idaho-Wyoming"', add
	label define statefip_lbl 66 `"Utah-Nevada"', add
	label define statefip_lbl 67 `"Arizona-New Mexico"', add
	label define statefip_lbl 68 `"Alaska-Hawaii"', add
	label define statefip_lbl 72 `"Puerto Rico"', add
	label define statefip_lbl 97 `"Military/Mil. Reservation"', add
	label define statefip_lbl 99 `"State not identified"', add
	label values statefip statefip_lbl

	label define degfield_lbl 00 `"N/A"'
	label define degfield_lbl 11 `"Agriculture"', add
	label define degfield_lbl 13 `"Environment and Natural Resources"', add
	label define degfield_lbl 14 `"Architecture"', add
	label define degfield_lbl 15 `"Area, Ethnic, and Civilization Studies"', add
	label define degfield_lbl 19 `"Communications"', add
	label define degfield_lbl 20 `"Communication Technologies"', add
	label define degfield_lbl 21 `"Computer and Information Sciences"', add
	label define degfield_lbl 22 `"Cosmetology Services and Culinary Arts"', add
	label define degfield_lbl 23 `"Education Administration and Teaching"', add
	label define degfield_lbl 24 `"Engineering"', add
	label define degfield_lbl 25 `"Engineering Technologies"', add
	label define degfield_lbl 26 `"Linguistics and Foreign Languages"', add
	label define degfield_lbl 29 `"Family and Consumer Sciences"', add
	label define degfield_lbl 32 `"Law"', add
	label define degfield_lbl 33 `"English Language, Literature, and Composition"', add
	label define degfield_lbl 34 `"Liberal Arts and Humanities"', add
	label define degfield_lbl 35 `"Library Science"', add
	label define degfield_lbl 36 `"Biology and Life Sciences"', add
	label define degfield_lbl 37 `"Mathematics and Statistics"', add
	label define degfield_lbl 38 `"Military Technologies"', add
	label define degfield_lbl 40 `"Interdisciplinary and Multi-Disciplinary Studies (General)"', add
	label define degfield_lbl 41 `"Physical Fitness, Parks, Recreation, and Leisure"', add
	label define degfield_lbl 48 `"Philosophy and Religious Studies"', add
	label define degfield_lbl 49 `"Theology and Religious Vocations"', add
	label define degfield_lbl 50 `"Physical Sciences"', add
	label define degfield_lbl 51 `"Nuclear, Industrial Radiology, and Biological Technologies"', add
	label define degfield_lbl 52 `"Psychology"', add
	label define degfield_lbl 53 `"Criminal Justice and Fire Protection"', add
	label define degfield_lbl 54 `"Public Affairs, Policy, and Social Work"', add
	label define degfield_lbl 55 `"Social Sciences"', add
	label define degfield_lbl 56 `"Construction Services"', add
	label define degfield_lbl 57 `"Electrical and Mechanic Repairs and Technologies"', add
	label define degfield_lbl 58 `"Precision Production and Industrial Arts"', add
	label define degfield_lbl 59 `"Transportation Sciences and Technologies"', add
	label define degfield_lbl 60 `"Fine Arts"', add
	label define degfield_lbl 61 `"Medical and Health Sciences and Services"', add
	label define degfield_lbl 62 `"Business"', add
	label define degfield_lbl 64 `"History"', add
	label values degfield degfield_lbl

	label define degfieldd_lbl 0000 `"N/A"'
	label define degfieldd_lbl 1100 `"General Agriculture"', add
	label define degfieldd_lbl 1101 `"Agriculture Production and Management"', add
	label define degfieldd_lbl 1102 `"Agricultural Economics"', add
	label define degfieldd_lbl 1103 `"Animal Sciences"', add
	label define degfieldd_lbl 1104 `"Food Science"', add
	label define degfieldd_lbl 1105 `"Plant Science and Agronomy"', add
	label define degfieldd_lbl 1106 `"Soil Science"', add
	label define degfieldd_lbl 1199 `"Miscellaneous Agriculture"', add
	label define degfieldd_lbl 1300 `"Environment and Natural Resources"', add
	label define degfieldd_lbl 1301 `"Environmental Science"', add
	label define degfieldd_lbl 1302 `"Forestry"', add
	label define degfieldd_lbl 1303 `"Natural Resources Management"', add
	label define degfieldd_lbl 1401 `"Architecture"', add
	label define degfieldd_lbl 1501 `"Area, Ethnic, and Civilization Studies"', add
	label define degfieldd_lbl 1900 `"Communications"', add
	label define degfieldd_lbl 1901 `"Communications"', add
	label define degfieldd_lbl 1902 `"Journalism"', add
	label define degfieldd_lbl 1903 `"Mass Media"', add
	label define degfieldd_lbl 1904 `"Advertising and Public Relations"', add
	label define degfieldd_lbl 2001 `"Communication Technologies"', add
	label define degfieldd_lbl 2100 `"Computer and Information Systems"', add
	label define degfieldd_lbl 2101 `"Computer Programming and Data Processing"', add
	label define degfieldd_lbl 2102 `"Computer Science"', add
	label define degfieldd_lbl 2105 `"Information Sciences"', add
	label define degfieldd_lbl 2106 `"Computer Information Management and Security"', add
	label define degfieldd_lbl 2107 `"Computer Networking and Telecommunications"', add
	label define degfieldd_lbl 2201 `"Cosmetology Services and Culinary Arts"', add
	label define degfieldd_lbl 2300 `"General Education"', add
	label define degfieldd_lbl 2301 `"Educational Administration and Supervision"', add
	label define degfieldd_lbl 2303 `"School Student Counseling"', add
	label define degfieldd_lbl 2304 `"Elementary Education"', add
	label define degfieldd_lbl 2305 `"Mathematics Teacher Education"', add
	label define degfieldd_lbl 2306 `"Physical and Health Education Teaching"', add
	label define degfieldd_lbl 2307 `"Early Childhood Education"', add
	label define degfieldd_lbl 2308 `"Science  and Computer Teacher Education"', add
	label define degfieldd_lbl 2309 `"Secondary Teacher Education"', add
	label define degfieldd_lbl 2310 `"Special Needs Education"', add
	label define degfieldd_lbl 2311 `"Social Science or History Teacher Education"', add
	label define degfieldd_lbl 2312 `"Teacher Education:  Multiple Levels"', add
	label define degfieldd_lbl 2313 `"Language and Drama Education"', add
	label define degfieldd_lbl 2314 `"Art and Music Education"', add
	label define degfieldd_lbl 2399 `"Miscellaneous Education"', add
	label define degfieldd_lbl 2400 `"General Engineering"', add
	label define degfieldd_lbl 2401 `"Aerospace Engineering"', add
	label define degfieldd_lbl 2402 `"Biological Engineering"', add
	label define degfieldd_lbl 2403 `"Architectural Engineering"', add
	label define degfieldd_lbl 2404 `"Biomedical Engineering"', add
	label define degfieldd_lbl 2405 `"Chemical Engineering"', add
	label define degfieldd_lbl 2406 `"Civil Engineering"', add
	label define degfieldd_lbl 2407 `"Computer Engineering"', add
	label define degfieldd_lbl 2408 `"Electrical Engineering"', add
	label define degfieldd_lbl 2409 `"Engineering Mechanics, Physics, and Science"', add
	label define degfieldd_lbl 2410 `"Environmental Engineering"', add
	label define degfieldd_lbl 2411 `"Geological and Geophysical Engineering"', add
	label define degfieldd_lbl 2412 `"Industrial and Manufacturing Engineering"', add
	label define degfieldd_lbl 2413 `"Materials Engineering and Materials Science"', add
	label define degfieldd_lbl 2414 `"Mechanical Engineering"', add
	label define degfieldd_lbl 2415 `"Metallurgical Engineering"', add
	label define degfieldd_lbl 2416 `"Mining and Mineral Engineering"', add
	label define degfieldd_lbl 2417 `"Naval Architecture and Marine Engineering"', add
	label define degfieldd_lbl 2418 `"Nuclear Engineering"', add
	label define degfieldd_lbl 2419 `"Petroleum Engineering"', add
	label define degfieldd_lbl 2499 `"Miscellaneous Engineering"', add
	label define degfieldd_lbl 2500 `"Engineering Technologies"', add
	label define degfieldd_lbl 2501 `"Engineering and Industrial Management"', add
	label define degfieldd_lbl 2502 `"Electrical Engineering Technology"', add
	label define degfieldd_lbl 2503 `"Industrial Production Technologies"', add
	label define degfieldd_lbl 2504 `"Mechanical Engineering Related Technologies"', add
	label define degfieldd_lbl 2599 `"Miscellaneous Engineering Technologies"', add
	label define degfieldd_lbl 2600 `"Linguistics and Foreign Languages"', add
	label define degfieldd_lbl 2601 `"Linguistics and Comparative Language and Literature"', add
	label define degfieldd_lbl 2602 `"French, German, Latin and Other Common Foreign Language Studies"', add
	label define degfieldd_lbl 2603 `"Other Foreign Languages"', add
	label define degfieldd_lbl 2901 `"Family and Consumer Sciences"', add
	label define degfieldd_lbl 3200 `"Law"', add
	label define degfieldd_lbl 3201 `"Court Reporting"', add
	label define degfieldd_lbl 3202 `"Pre-Law and Legal Studies"', add
	label define degfieldd_lbl 3300 `"English Language, Literature, and Composition"', add
	label define degfieldd_lbl 3301 `"English Language and Literature"', add
	label define degfieldd_lbl 3302 `"Composition and Speech"', add
	label define degfieldd_lbl 3400 `"Liberal Arts and Humanities"', add
	label define degfieldd_lbl 3401 `"Liberal Arts"', add
	label define degfieldd_lbl 3402 `"Humanities"', add
	label define degfieldd_lbl 3501 `"Library Science"', add
	label define degfieldd_lbl 3600 `"Biology"', add
	label define degfieldd_lbl 3601 `"Biochemical Sciences"', add
	label define degfieldd_lbl 3602 `"Botany"', add
	label define degfieldd_lbl 3603 `"Molecular Biology"', add
	label define degfieldd_lbl 3604 `"Ecology"', add
	label define degfieldd_lbl 3605 `"Genetics"', add
	label define degfieldd_lbl 3606 `"Microbiology"', add
	label define degfieldd_lbl 3607 `"Pharmacology"', add
	label define degfieldd_lbl 3608 `"Physiology"', add
	label define degfieldd_lbl 3609 `"Zoology"', add
	label define degfieldd_lbl 3611 `"Neuroscience"', add
	label define degfieldd_lbl 3699 `"Miscellaneous Biology"', add
	label define degfieldd_lbl 3700 `"Mathematics"', add
	label define degfieldd_lbl 3701 `"Applied Mathematics"', add
	label define degfieldd_lbl 3702 `"Statistics and Decision Science"', add
	label define degfieldd_lbl 3801 `"Military Technologies"', add
	label define degfieldd_lbl 4000 `"Interdisciplinary and Multi-Disciplinary Studies (General)"', add
	label define degfieldd_lbl 4001 `"Intercultural and International Studies"', add
	label define degfieldd_lbl 4002 `"Nutrition Sciences"', add
	label define degfieldd_lbl 4003 `"Neuroscience"', add
	label define degfieldd_lbl 4005 `"Mathematics and Computer Science"', add
	label define degfieldd_lbl 4006 `"Cognitive Science and Biopsychology"', add
	label define degfieldd_lbl 4007 `"Interdisciplinary Social Sciences"', add
	label define degfieldd_lbl 4008 `"Multi-disciplinary or General Science"', add
	label define degfieldd_lbl 4101 `"Physical Fitness, Parks, Recreation, and Leisure"', add
	label define degfieldd_lbl 4801 `"Philosophy and Religious Studies"', add
	label define degfieldd_lbl 4901 `"Theology and Religious Vocations"', add
	label define degfieldd_lbl 5000 `"Physical Sciences"', add
	label define degfieldd_lbl 5001 `"Astronomy and Astrophysics"', add
	label define degfieldd_lbl 5002 `"Atmospheric Sciences and Meteorology"', add
	label define degfieldd_lbl 5003 `"Chemistry"', add
	label define degfieldd_lbl 5004 `"Geology and Earth Science"', add
	label define degfieldd_lbl 5005 `"Geosciences"', add
	label define degfieldd_lbl 5006 `"Oceanography"', add
	label define degfieldd_lbl 5007 `"Physics"', add
	label define degfieldd_lbl 5008 `"Materials Science"', add
	label define degfieldd_lbl 5098 `"Multi-disciplinary or General Science"', add
	label define degfieldd_lbl 5102 `"Nuclear, Industrial Radiology, and Biological Technologies"', add
	label define degfieldd_lbl 5200 `"Psychology"', add
	label define degfieldd_lbl 5201 `"Educational Psychology"', add
	label define degfieldd_lbl 5202 `"Clinical Psychology"', add
	label define degfieldd_lbl 5203 `"Counseling Psychology"', add
	label define degfieldd_lbl 5205 `"Industrial and Organizational Psychology"', add
	label define degfieldd_lbl 5206 `"Social Psychology"', add
	label define degfieldd_lbl 5299 `"Miscellaneous Psychology"', add
	label define degfieldd_lbl 5301 `"Criminal Justice and Fire Protection"', add
	label define degfieldd_lbl 5400 `"Public Affairs, Policy, and Social Work"', add
	label define degfieldd_lbl 5401 `"Public Administration"', add
	label define degfieldd_lbl 5402 `"Public Policy"', add
	label define degfieldd_lbl 5403 `"Human Services and Community Organization"', add
	label define degfieldd_lbl 5404 `"Social Work"', add
	label define degfieldd_lbl 5500 `"General Social Sciences"', add
	label define degfieldd_lbl 5501 `"Economics"', add
	label define degfieldd_lbl 5502 `"Anthropology and Archeology"', add
	label define degfieldd_lbl 5503 `"Criminology"', add
	label define degfieldd_lbl 5504 `"Geography"', add
	label define degfieldd_lbl 5505 `"International Relations"', add
	label define degfieldd_lbl 5506 `"Political Science and Government"', add
	label define degfieldd_lbl 5507 `"Sociology"', add
	label define degfieldd_lbl 5599 `"Miscellaneous Social Sciences"', add
	label define degfieldd_lbl 5601 `"Construction Services"', add
	label define degfieldd_lbl 5701 `"Electrical and Mechanic Repairs and Technologies"', add
	label define degfieldd_lbl 5801 `"Precision Production and Industrial Arts"', add
	label define degfieldd_lbl 5901 `"Transportation Sciences and Technologies"', add
	label define degfieldd_lbl 6000 `"Fine Arts"', add
	label define degfieldd_lbl 6001 `"Drama and Theater Arts"', add
	label define degfieldd_lbl 6002 `"Music"', add
	label define degfieldd_lbl 6003 `"Visual and Performing Arts"', add
	label define degfieldd_lbl 6004 `"Commercial Art and Graphic Design"', add
	label define degfieldd_lbl 6005 `"Film, Video and Photographic Arts"', add
	label define degfieldd_lbl 6006 `"Art History and Criticism"', add
	label define degfieldd_lbl 6007 `"Studio Arts"', add
	label define degfieldd_lbl 6099 `"Miscellaneous Fine Arts"', add
	label define degfieldd_lbl 6100 `"General Medical and Health Services"', add
	label define degfieldd_lbl 6102 `"Communication Disorders Sciences and Services"', add
	label define degfieldd_lbl 6103 `"Health and Medical Administrative Services"', add
	label define degfieldd_lbl 6104 `"Medical Assisting Services"', add
	label define degfieldd_lbl 6105 `"Medical Technologies Technicians"', add
	label define degfieldd_lbl 6106 `"Health and Medical Preparatory Programs"', add
	label define degfieldd_lbl 6107 `"Nursing"', add
	label define degfieldd_lbl 6108 `"Pharmacy, Pharmaceutical Sciences, and Administration"', add
	label define degfieldd_lbl 6109 `"Treatment Therapy Professions"', add
	label define degfieldd_lbl 6110 `"Community and Public Health"', add
	label define degfieldd_lbl 6199 `"Miscellaneous Health Medical Professions"', add
	label define degfieldd_lbl 6200 `"General Business"', add
	label define degfieldd_lbl 6201 `"Accounting"', add
	label define degfieldd_lbl 6202 `"Actuarial Science"', add
	label define degfieldd_lbl 6203 `"Business Management and Administration"', add
	label define degfieldd_lbl 6204 `"Operations, Logistics and E-Commerce"', add
	label define degfieldd_lbl 6205 `"Business Economics"', add
	label define degfieldd_lbl 6206 `"Marketing and Marketing Research"', add
	label define degfieldd_lbl 6207 `"Finance"', add
	label define degfieldd_lbl 6209 `"Human Resources and Personnel Management"', add
	label define degfieldd_lbl 6210 `"International Business"', add
	label define degfieldd_lbl 6211 `"Hospitality Management"', add
	label define degfieldd_lbl 6212 `"Management Information Systems and Statistics"', add
	label define degfieldd_lbl 6299 `"Miscellaneous Business and Medical Administration"', add
	label define degfieldd_lbl 6402 `"History"', add
	label define degfieldd_lbl 6403 `"United States History"', add
	label values degfieldd degfieldd_lbl
	
	label define metarea_lbl 0 `"Not in metro area"'
	label define metarea_lbl 1 `"In metro area"', add
	label values metarea metarea_lbl 
	
	label define female_lbl 0 `"Male"'
	label define female_lbl 1 `"Female"', add
	label values female female_lbl
	
	label define hs_lbl 0 `"No HS diploma or GED"'
	label define hs_lbl 1 `"HS Diploma or GED"', add
	label values HS hs_lbl
	
	label define geSC_lbl 0 `"No college experience"'
	label define geSC_lbl 1 `"Some college experience"', add
	label values geSC geSC_lbl
	
	label define AA_lbl 0 `"No associate degree"'
	label define AA_lbl 1 `"Associate degree"', add
	label values AA AA_lbl
	
	label define BA_lbl 0 `"No bachelor degree"'
	label define BA_lbl 1 `"Bachelor degree"', add
	label values BA BA_lbl
	
*------------------------------------------------------------------------------|
**# [4] Convert earnings to 2022 dollars

	matrix CPIU = 	(177.0666667, 179.875, 183.9583333, 188.8833333, 195.2916667, ///
					 201.5916667, 207.3424167, 215.3025, 214.537, 218.0555, ///
					 224.9391667, 229.5939167, 232.9570833, 236.7361667, 237.017, ///
					 240.0071667, 245.1195833, 251.1068333, 255.657, 258.811, ///
					 270.970, 292.655) 
					 
	gen d2122 = CPIU[1,22]	/	CPIU[1,21] 											// deflator to go from 2021 to 2022 dollars 
	
	replace earnpos = earnpos * d2122
	drop d2122
	
*------------------------------------------------------------------------------|
**# [5] Generate earnings by total and sub-group

	local mn "mnearn = earnpos" 													// generate mean, median, count, and 60th percentile
	local md "mdearn = earnpos" 
	local N "Nearn = earnpos" 
	local p60 "p60earnpos = earnpos" 
	local N2 "" 

	local edcat "HS AA BA" 															// generate by educational level

	foreach e of local edcat { 
		forvalues f=0/2 { 
			if `f' == 0 local sex "male" 
			if `f' == 0 | `f'==1 local si "female==`f'" 
			if `f' == 1 local sex "female" 
			if `f' == 2 local sex "all" 
			if `f' == 2 local si "inlist(female,0,1)"

			forvalues r = 1/7 { 
				if `r'==1 local race "white" 
				if `r'==2 local race "black" 
				if `r'==3 local race "hisp" 
				if `r'==4 local race "asian" 
				if `r'==5 local race "aian"
				if `r'==6 local race "oth"
				if `r'>=1 & `r'<=6 local ri "racecat6 ==`r'" 
				if `r'==7 local race "all" 
				if `r'>=1 & `r'<=6 local ri "racecat6 ==`r'" 
				if `r'==7 local ri "inrange(racecat6,1,6)" 


				gen earnpos`e'_r`race'_s`sex' = earnpos if `e'==1 & `si' & `ri'  


				if "`e'" == "HS" { ; 												// only do this first time - not for each e 
					gen earnpos_r`race'_s`sex' = earnpos if `si' & `ri' 
					local p60 "`p60' p60earnpos_r`race'_s`sex' = earnpos_r`race'_s`sex'" 
					local N2 "`N2' Nearnpos_r`race'_s`sex' = earnpos_r`race'_s`sex'" 
				} 

				local mn "`mn' mnearnpos`e'_r`race'_s`sex' = earnpos`e'_r`race'_s`sex'" 
				local md "`md' mdearnpos`e'_r`race'_s`sex' = earnpos`e'_r`race'_s`sex'" 
				local N "`N' Nearnpos`e'_r`race'_s`sex' = earnpos`e'_r`race'_s`sex'" 
			} 
		} 
	} 

*------------------------------------------------------------------------------|
**# [6] Collapse into state and PUMA-specific datasets

	preserve
		collapse `mn' (median) `md' (p60) `p60' (count) `N' `N2' [aw=perwt]
		gen str_stfips = "00"
		gen str_puma = "00000"
		
		tempfile national
		save `national'
	restore 
	
	preserve 
		collapse `mn' (median) `md' (p60) `p60' (count) `N' `N2' [aw=perwt], by(str_stfips)  		
		gen str_puma = "00000"
		
		tempfile state 
		save `state'
	restore

	collapse `mn' (median) `md' (p60) `p60' (count) `N' `N2' [aw=perwt], by(str_stfips str_puma) 
	append using `national'
	append using `state'
	sort str_stfips str_puma
	order str_stfips

	tempfile ipums_collapsed
	save `ipums_collapsed'
	
*------------------------------------------------------------------------------|
**# [7] Match microdata to PUMA-County crosswalk

	*Crosswalk generated at https://mcdc.missouri.edu/applications/geocorr2022.html
	*Using all states selected, PUMA as source, County as target, 2020 census pop as weight
	
	import delimited "$raw\geocorr2022_2411909757(puma12).csv", varn(1) clear
	
	drop if _n == 1																	// Remove var labels as a row
	destring pop20-afact, replace
	drop if state == "72"
	rename (puma12 state) (str_puma str_stfips)
	keep county str_stfips str_puma afact
	replace str_puma = trim(str_puma)
	
	gen num_puma = str_puma															// Assign PUMA to empty ones for 5 counties
	destring num_puma, replace
	egen max_puma = max(num_puma), by(county)
	gen max_pumastr = string(max_puma, "%05.0f")
	replace str_puma = max_pumastr if str_puma == ""
	drop num_puma max_puma max_pumastr
	
	merge m:1 str_stfips str_puma using `ipums_collapsed'
	drop _merge																		// Only state & national estimates not merged
	
	replace county = "00000" if county == ""
	replace afact = 1 if afact == .
	
*------------------------------------------------------------------------------|
**# [8] Collapse PUMA estimates to county level using crosswalk weight
	
	ds mne*
	local avgs `r(varlist)'
	ds mde*
	local meds `r(varlist)'
	ds p60*
	local p60 `r(varlist)'
	ds Nea*
	local N `r(varlist)'
	
	*use avg of avgs, median of medians, take median of p60s, and sum the count
	collapse `avgs' (median) `meds' (median) `p60' (sum) `N' [aw=afact], by(str_stfips county) 
	
*------------------------------------------------------------------------------|
**# [9] Store national and state estimates separately for wide format 

	preserve 
		keep if str_stfips == "00"
		drop county str_stfips
		ds
		foreach x in `r(varlist)' {
			rename `x' ntl_`x'
		}
		
		gen ntl_n = 1
		
		tempfile ntl_merge
		save `ntl_merge'
	restore
		
	preserve 
		keep if county == "00000" & str_stfips != "00"
		drop county
		ds str_stfips, not
		foreach x in `r(varlist)' {
			rename `x' st_`x'
		}
	
		gen ntl_n = 1
		
		merge m:1 ntl_n using `ntl_merge'
		tempfile ntl_state_temp
		save `ntl_state_temp'
	restore 
	
	drop if county == "00000"
	
	tempfile ipums_county
	save `ipums_county'
	save "$temp\ipums_county.dta", replace											// Save county file for future use

*------------------------------------------------------------------------------|
**# [8] Match to CZ IDs to estimate proportions

	use "U:\data\psu_cz\psu_cz.dta", clear
	gen county = string(FIPS, "%05.0f")
	keep county OUT10
	merge 1:m county using `ipums_county'
	replace OUT10 = 29 if inlist(county,"02063","02066")							// Valdez-Kordova, AK county split
	replace OUT10 = 30 if inlist(county,"02158")									// Kusilvak, AK change
	replace OUT10 = 377 if inlist(county,"46102")									// Oglala Lakota, SD change
	
	drop if _merge == 1
	drop _merge
	sort str_stfips county
	order str_stfips county
	
*------------------------------------------------------------------------------|
**# [9] Get total county pops of 22-40 year olds for CZ weighting

	preserve
		getcensus 	B01001_010 B01001_011 B01001_012 B01001_013 ///
					B01001_034 B01001_035 B01001_036 B01001_037, ///
						year(2021) sample(5) geo(county) key("$acskey") ///
						nolabel noerror clear
	
		ds b*
		egen pop_2240 = rowtotal(`r(varlist)'), missing
		gen countyfip = state + county
		keep countyfip pop_2240
		rename countyfip county
		tempfile county_pop
		save `county_pop'
	restore
	
	merge 1:1 county using `county_pop'
	drop if _merge == 2																// Puerto rico
	drop _merge 
	
*------------------------------------------------------------------------------|
**# [10] Collapse into CZ estimates using pop weight

	ds mne*
	local avgs `r(varlist)'
	ds mde*
	local meds `r(varlist)'
	ds p60*
	local p60 `r(varlist)'
	ds Nea*
	local N `r(varlist)'
	
	*use avg of avgs, median of medians, take median of p60s, and sum the count
	collapse `avgs' (median) `meds' (median) `p60' (sum) `N' [aw=pop_2240], by(OUT10) 
	
	ds OUT10, not
	foreach x in `r(varlist)' {
		rename `x' cz_`x'
	}
	
	tempfile cz_temp
	save `cz_temp'
	
*------------------------------------------------------------------------------|
**# [11] Get CZ-level descriptives for the mapping tool

// Get ACS estimates-------------------------------------------------
	local options "years(2021) sample(5) geography(county) key($acskey) clear"
	local pop_age "DP05_0001 DP05_0019"
	local pov "DP03"
	local ed_attain "DP02_0059 DP02_0060 DP02_0061 DP02_0062 DP02_0063 DP02_0064 DP02_0065 DP02_0066"
	local race_nonhisp "B03002_003 B03002_004 B03002_005 B03002_006 B03002_007 B03002_008 B03002_009"
	local race_hisp "B03002_013 B03002_014 B03002_015 B03002_016 B03002_017 B03002_018 B03002_019"

	getcensus `pop_age', `options'
	drop *m																			// don't need MOE
	tempfile pop_age_file
		save `pop_age_file'
	
	set timeout1 60																	// Prevent Stata from kicking connection
	set timeout2 360
	getcensus `pov', `options'
	keep year-name dp03_0008e dp03_0009pe dp03_0129pe dp03_0032e-dp03_0045pe		// Keep only relevant vars
	rename dp03_0129pe dp03_0129p
	rename dp03_0009pe dp03_0009p
	drop *m *pe
	tempfile pov_file
		save `pov_file'
	
	getcensus `ed_attain', `options'
	drop *m
	tempfile ed_attain_file
		save `ed_attain_file'
		
	getcensus `race_nonhisp', `options'
	drop *m
	tempfile race_nonhisp_file
		save `race_nonhisp_file'
		
	getcensus `race_hisp', `options'
	egen hisp_tot = rowtotal(b03002_013e b03002_014e b03002_015e b03002_016e ///
								b03002_017e b03002_018e b03002_019e)				// All hisp/lat, regardless of race							
	keep year-name hisp_tot							
	tempfile race_hisp_file
		save `race_hisp_file'
								
	merge 1:1 state county using `pop_age_file'					
		drop _merge
	merge 1:1 state county using `pov_file'
		drop _merge 
	merge 1:1 state county using `ed_attain_file'
		drop _merge 
	merge 1:1 state county using `race_nonhisp_file'
		drop _merge

// Clean up and rename vars------------------------------------------
	gen povunder18 = (dp03_0129p/100) * dp05_0019e
	gen unemploy_n = (dp03_0009p/100) * dp03_0008e
	gen ed_attain_lessthanhs = dp02_0060e + dp02_0061e
	drop dp03_0129p dp02_0060e dp02_0061e dp03_0009p
								
	order year state county name geo_id dp05_0001e b03002_003e b03002_004e ///
		b03002_005e b03002_006e b03002_007e b03002_008e b03002_009e	hisp_tot ///
		dp02_0059e ed_attain_lessthanhs dp02_0062e dp02_0063e dp02_0064e ///
		dp02_0065e dp02_0066e dp05_0019e povunder18 dp03_0008e unemploy_n

	rename dp05_0001e tot_pop
	rename b03002_003e nonhisp_white
	rename b03002_004e nonhisp_black
	rename b03002_005e nonhisp_aian
	rename b03002_006e nonhisp_asian
	rename b03002_007e nonhisp_nhpi
	rename b03002_008e nonhisp_other
	rename b03002_009e nonhisp_twoplus
	rename dp02_0059e ed_attain_denom
	rename dp02_0062e ed_attain_hs
	rename dp02_0063e ed_attain_somecoll
	rename dp02_0064e ed_attain_assoc
	rename dp02_0065e ed_attain_bach
	rename dp02_0066e ed_attain_grad
	rename dp05_0019e childpov_denom
	rename dp03_0008e unemploy_denom
	rename dp03_0032e industry_denom
	rename dp03_0033e industry_agri
	rename dp03_0034e industry_construct
	rename dp03_0035e industry_manufacture
	rename dp03_0036e industry_wholesale
	rename dp03_0037e industry_retail
	rename dp03_0038e industry_transpo
	rename dp03_0039e industry_information
	rename dp03_0040e industry_finance
	rename dp03_0041e industry_professional
	rename dp03_0042e industry_educational
	rename dp03_0043e industry_arts
	rename dp03_0044e industry_otherserv
	rename dp03_0045e industry_publicadmin
	
// Match CZ IDs and prep for merge-----------------------------------
	drop if state == "72"															// Remove PR

	*RUCC
		gen countyfips = state + county
		merge 1:1 countyfips using "C:\Users\colston\Desktop\public_data\rucc\rucc.dta"
	
		// Correct fips changes
			replace rucc = 9 if inlist(countyfips,"02063","02066")					// Valdez-Cordova split
			drop if countyfips == "02261"
	
		drop _merge

	*PSU
		destring countyfips, replace
		rename countyfips FIPS

		merge 1:1 FIPS using "C:\Users\colston\Desktop\public_data\psu_cz\psu_cz.dta", keepusing(OUT10)
		replace OUT10 = 29 if inlist(FIPS,2063,2066)								// Valdez-Cordova
		drop if FIPS == 2261
		replace OUT10 = 30 if FIPS == 2158											// Renamed Kusilvak
		drop if FIPS == 2270
		replace OUT10 = 377 if FIPS == 46102										// Renamed Oglala
		drop if FIPS == 46113

		drop _merge
		
		order year state county FIPS rucc name geo_id

// Create combined estimates-----------------------------------------
	*rural CZ pop 
		gen rural_pop = tot_pop if inrange(rucc,4,9)
		recode rural_pop (.=0)
		order OUT10, last
	
	ds tot_pop-rural_pop
	foreach x in `r(varlist)' {
		egen st_`x' = sum(`x'), by(state)
		egen cz_`x' = sum(`x'), by(OUT10)
	}

	*generate state-level dataset
		preserve
			keep state st_*
			duplicates drop state, force
			ds st_nonhisp_white-st_hisp_tot
			foreach y in `r(varlist)' {
				gen pct_`y' = `y' / st_tot_pop
				drop `y'
			}

			ds st_ed_attain_lessthanhs-st_ed_attain_grad
			foreach y in `r(varlist)' {
				gen pct_`y' = `y' / st_ed_attain_denom
				drop `y'
			}

			gen pct_st_povunder18 = st_povunder18 / st_childpov_denom
			gen pct_st_unemploy = st_unemploy_n / st_unemploy_denom
			gen pct_st_rural_pop = st_rural_pop / st_tot_pop
				drop st_povunder18 st_unemploy_n st_rural_pop
			
			ds st_industry_agri-st_industry_publicadmin
			foreach y in `r(varlist)' {
				gen pct_`y' = `y' / st_industry_denom
				drop `y'
			}
			
			rename state str_stfips
			
			tempfile roi_acs_state
			save `roi_acs_state'
		restore

	*generate CZ-level dataset
		preserve
			keep OUT10 cz_*
			duplicates drop OUT10, force
			ds cz_nonhisp_white-cz_hisp_tot
			foreach y in `r(varlist)' {
				gen pct_`y' = `y' / cz_tot_pop
				drop `y'
			}

			ds cz_ed_attain_lessthanhs-cz_ed_attain_grad
			foreach y in `r(varlist)' {
				gen pct_`y' = `y' / cz_ed_attain_denom
				drop `y'
			}

			gen pct_cz_povunder18 = cz_povunder18 / cz_childpov_denom
			gen pct_cz_unemploy = cz_unemploy_n / cz_unemploy_denom
			gen pct_cz_rural_pop = cz_rural_pop / cz_tot_pop
				drop cz_povunder18 cz_unemploy_n cz_rural_pop
			
			ds cz_industry_agri-cz_industry_publicadmin
			foreach y in `r(varlist)' {
				gen pct_`y' = `y' / cz_industry_denom
				drop `y'
			}
			
			gen commuting_zone = string(OUT10, "%03.0fc")

			tempfile roi_acs_cz
			save `roi_acs_cz'
		restore

// Match back with earnings data-------------------------------------
	
	use `ntl_state_temp', clear
	drop _merge
	merge 1:1 str_stfips using `roi_acs_state', nogenerate
	save "$temp\ntl_state_earnings.dta", replace								// No tempfile because must be referenced in another .do file
	
	use `cz_temp', clear
	merge 1:1 OUT10 using `roi_acs_cz', nogenerate
	save "$temp\cz_earnings.dta", replace											// No tempfile because must be referenced in another .do file
	

*------------------------------------------------------------------------------|
**# [11] Compare this new weighted approach to Qi's original

/* No need to run this each time
	
	use "$temp\cz_earnings.dta", clear
	
	rename OUT10 commuting_zone
	tostring commuting_zone, replace
	merge 1:1 commuting_zone using "U:\projects_current\role_of_geography_gates\data\raw\acs_subgroup_wages_cmzone_level.dta"
	
	gen mnearn_diff = cz_mnearn - mnearn
	gen mdearn_diff = cz_mdearn - mdearn
	gen p60earn_diff = cz_p60earnpos - p60earnpos
	
	gen mnearnposHS_rwhite_smale_diff = cz_mnearnposHS_rwhite_smale - mnearnposHS_rwhite_smale
	gen mnearnposHS_rblack_smale_diff = cz_mnearnposHS_rblack_smale - mnearnposHS_rblack_smale
	gen mnearnposHS_rhisp_smale_diff = cz_mnearnposHS_rhisp_smale - mnearnposHS_rhisp_smale
	
	gen mdearnposHS_rwhite_smale_diff = cz_mdearnposHS_rwhite_smale - mdearnposHS_rwhite_smale
	gen mdearnposHS_rblack_smale_diff = cz_mdearnposHS_rblack_smale - mdearnposHS_rblack_smale
	gen mdearnposHS_rhisp_smale_diff = cz_mdearnposHS_rhisp_smale - mdearnposHS_rhisp_smale
	
	gen p60earnpos_rwhite_smale_diff = cz_p60earnpos_rwhite_smale - p60earnpos_rwhite_smale
	gen p60earnpos_rblack_smale_diff = cz_p60earnpos_rblack_smale - p60earnpos_rblack_smale
	gen p60earnpos_rhisp_smale_diff = cz_p60earnpos_rhisp_smale - p60earnpos_rhisp_smale
	
	ds mnearn*diff mdearn*diff p60earnpos*diff
	foreach x in `r(varlist)' {
		sum `x', d
		hist `x' if `x' != ., freq addlabels ylabel(, nolabel notick) ytitle("") ///
			name(`x', replace) nodraw
	}
	
	ds mnearn*diff
	graph combine `r(varlist)', rows(2) title("Mean earnings difference", size(small)) name(mnearn, replace)
	
	ds mdearn*diff
	graph combine `r(varlist)', rows(2) title("Median earnings difference", size(small)) name(mdearn, replace)
	
	ds mnearn*diff
	graph combine `r(varlist)', rows(2) title("P60 earnings difference", size(small)) name(p60earn, replace)
	
	
	
	
	
	
	
	use "$temp\ntl_state_earnings.dta", clear
	rename str_stfips statefip
	destring statefip, replace

	drop _merge
	
	merge 1:1 statefip using "U:\projects_current\role_of_geography_gates\data\raw\acs_subgroup_wages_state_level.dta"
	gen mnearn_diff = st_mnearn - mnearn
	gen mdearn_diff = st_mdearn - mdearn
	gen p60earn_diff = st_p60earnpos - p60earnpos
	
	*identical
	
	
*/
	
	
	
	
	
	
	
	
	
	
	
