/*

	Project: Uneven Growth

	Purpose: Extract detailed distribution of income from NBER server

	Data: NBER/IRS TAXSIM Public Use Files

*/

clear all
cls

********************************************************************************
* - NBER SERVER PART -
********************************************************************************

/*
cd /homes/nber/bmoll/UG

*ssc install gtools



* ------------------------------------------------------------------------------
* - Extract and transform data -
* ------------------------------------------------------------------------------

* - data directory -
local data /home/data/soi/taxsim/dta

* - Define variable lists -
local marriage data2
local wages data11
local capital_gross_income data12 data13 data14 data73 data74 data76 data77 data78
local capital_gains data18 data19 data68 data70 data89
local entre_income data17 data21 data75 data79
local partnership data75 // Partnership income
local scorp data79 // S-Corporation income
local schedc data17 data21 // Schedule C (Sole-proprietorship) + farm income
local interest_paid data53 data56 data57 // Consists of mortage, investment, and other interest

/*
	Note: For a variable description see https://www.nber.org/taxsim-ndx.txt
*/

local start_year 1979
local end_year 2016




* ------------------------------------------------------------------------------
* - Calculate aggregate size of each component -
* ------------------------------------------------------------------------------

forvalues year = `start_year'/`end_year' {

	di "  "
	di "Current Tax Year: " `year'

	* - Load data -
	use data1 data15 `marriage' `wages' `capital_gross_income' `capital_gains' `entre_income' `interest_paid' using `data'/x`year'.dta, clear
	
	* - Observation weight -
	rename data1 weight
	rename data15 agi
	
	* - Adjust for population size -
	foreach var in agi `wages' `capital_gross_income' `capital_gains' `entre_income' `interest_paid' {
		qui replace `var' = `var' * weight/ 10^9
	}
		
	* - Aggregate Item By Item -
	gcollapse (sum) weight agi `wages'  `capital_gross_income' `capital_gains' `entre_income' `interest_paid'
		
	* - Generate Aggregate Items -
	foreach var in wages capital_gross_income capital_gains entre_income partnership scorp schedc interest_paid {
		egen `var' = rowtotal(``var'')
	}
	gen capital_net_income = capital_gross_income - interest_paid
	
	gen year = `year'
		
	sort year agi wages capital_gross_income capital_gains entre_income partnership scorp schedc interest_paid
		
	if (`year' == `start_year') {
		qui save clean_data/taxsim_aggregate.dta,replace
	}
	else {
		append using clean_data/taxsim_aggregate.dta
			
		* - Label variables in last year-
		if (`year' == `end_year') {
			lab var year "Tax Year"
			lab var wages "Avg. Wage Income"
			lab var capital_net_income "Avg. Capital Income"
			lab var capital_gross_income "Avg. Gross Capital Income"
			lab var capital_gains "Avg. Capital Gains"
			lab var entre_income "Avg. Entrepreneurial Income"
			lab var partnership "Avg. Partnership Income"
			lab var schedc "Avg. Schedule C and Farm Income"
			lab var scorp "Avg. S-Corporation Income"
			lab var interest_paid "Avg. Interest Paid"
			lab var agi "Annual Gross Income (NBER TAXSIM)"
		}
		qui save clean_data/taxsim_aggregate.dta,replace		
	}
		
}


* ------------------------------------------------------------------------------
* - Calculate Percentiles -
* ------------------------------------------------------------------------------

forvalues year = `start_year'/`end_year' {
	
	di "  "
	di "Current Tax Year: " `year'

	* - Load data and define variables -----------------------------------------
	
	* - Load data -
	use data1 data15 `marriage' `wages' `capital_gross_income' `capital_gains' `entre_income' `interest_paid' using `data'/x`year'.dta, clear
	
	* - Observation weight -
	rename data1 weight
	rename data15 agi
	
	* - Define variables -
	foreach var in wages capital_gross_income capital_gains entre_income partnership scorp schedc interest_paid {
		egen `var' = rowtotal(``var'')
		replace `var' = `var'/2 if inlist(data2,2)
	}

	drop data*
	* - Define capital income -
	gen capital_net_income = capital_gross_income - interest_paid
	
	* - Define total income -
	gen gross_income = wages + capital_gross_income + entre_income
	gen net_income = wages + capital_net_income + entre_income
	
	* - Keep only observations with positive income -
	*keep if total_income > 0 
	
	* - Store cleaned raw data temporarily -
	qui save clean_data/temp.dta,replace
	
	* - Generate percentiles ---------------------------------------------------
	di "Start calculating percentiles"
	*foreach var in gross_income net_income wages capital_net_income entre_income {
	foreach var in gross_income net_income  {
	
		use clean_data/temp.dta,clear 
	
		* - Define percentile with additional detail above 95th percentile (starting at 95.1) and above 99th percentile (starting at 99.01)-
		qui xtile `var'_percentile = `var' [aw = weight], n(100)
		qui xtile `var'_permille = `var' [aw = weight],n(1000)
		*qui xtile `var'_per105 = `var' [aw = weight],n(10000)
		
		qui replace `var'_percentile = `var'_permille/10 if `var'_percentile > 99 
		*qui replace `var'_percentile = `var'_per105/100 if `var'_percentile > 99 
	
		qui gcollapse(mean) agi gross_income net_income wages capital_gross_income capital_net_income capital_gains entre_income partnership scorp schedc interest_paid [aw=weight], by(`var'_percentile)
	
		* - Generate tax year -
		gen year = `year'
	
		order year `var'_percentile gross_income net_income wages capital_net_income entre_income capital_gains agi
		sort year `var'_percentile
		
		* - Store results -
		if (`year' == `start_year') {
			qui save clean_data/nbertaxsim_`var'_percentile.dta, replace
		}
		else {
			append using clean_data/nbertaxsim_`var'_percentile.dta
			sort year `var'_percentile
		
			* - Label variables in last year-
			if (`year' == `end_year') {
				lab var year "Tax Year"
				lab var `var'_percentile "Percentile"
				lab var gross_income "Avg. Gross Income"
				lab var net_income "Avg. Net Income"
				lab var wages "Avg. Wage Income"
				lab var capital_net_income "Avg. Capital Income"
				lab var capital_gross_income "Avg. Gross Capital Income"
				lab var capital_gains "Avg. Capital Gains"
				lab var entre_income "Avg. Entrepreneurial Income"
				lab var partnership "Avg. Partnership Income"
				lab var schedc "Avg. Schedule C and Farm Income"
				lab var scorp "Avg. S-Corporation Income"
				lab var interest_paid "Avg. Interest Paid"
				lab var agi "Annual Gross Income (NBER TAXSIM)"
			}
		
			qui save clean_data/nbertaxsim_`var'_percentile.dta, replace
		}	
	}
	
	* - Erase temporary files -
	erase clean_data/temp.dta
}

*/
********************************************************************************
* - LOCAL PART -
********************************************************************************

* ------------------------------------------------------------------------------
* - Calculate relevant measures from cleaned data -
* ------------------------------------------------------------------------------


foreach stub in gross {
foreach start_year in 1980 {
foreach end_year in 2007 2012 {			
			
	* ------------------------------------------------------------------------------
	* - Join with NBER TAXSIM data and prepare data -
	* ------------------------------------------------------------------------------

	* - Load percentile data -------------------------------------------------------
	use clean_data/nbertaxsim_`stub'_income_percentile.dta if inlist(year,`start_year',`end_year') ,clear

	rename `stub'_income_percentile percentile

	merge n:1 year using clean_data/deflator.dta, nogen keep(1 3)

	sort year percentile

	* - Apply deflator -
	foreach var of varlist gross_income-interest_paid{
		replace `var' = `var'*(100/gdp_deflator)
	}

	* ------------------------------------------------------------------------------
	* - Generate changes -
	* ------------------------------------------------------------------------------

	egen period = group(year)

	xtset percentile period


	* - Generate changes -
	gen `stub'_income_gr = ((`stub'_income/L.`stub'_income)^(1/(`end_year'-`start_year')) - 1)*100

	* - Construct different measures of capital and labor component ----------------
	gen labor_income = wages + 0.7 * (partnership + schedc)
	gen capital_income = capital_`stub'_income + 0.3 * (partnership + schedc ) + 0.25 * scorp
	gen scorp_income = scorp*0.75

	* - Contributions for baseline -
	foreach var in  labor_income capital_income scorp_income {
		gen `var'_gr = D.`var'/D.`stub'_income * `stub'_income_gr 
	}
	
	keep if year == `end_year'
	
	keeporder percentile `stub'_income_gr labor_income_gr scorp_income_gr capital_income_gr

	*save clean_data/irs_clean_`start_year'_`end_year'.dta,replace
	export delimited clean_data/irs_clean_`start_year'_`end_year'.csv, replace

}
}
}
