/*

	Project: Uneven Growth

	Purpose: Calculate income distribution and growth from files

	Data: Piketty, Saez, and Zucman (2018) files as well as DINAs

*/

cls

clear all

cd ~/Dropbox/RA_Pascual/UNEVEN_GROWTH/IRS_SZ_Figure

local data raw_data/PSZ2017Dinafiles

* ------------------------------------------------------------------------------
* - Get GDP and PCE deflators from FRED -
* ------------------------------------------------------------------------------

freduse GDPDEF DPCERD3Q086SBEA, clear 
gen year = real(substr(date,1,4)) 

rename GDPDEF gdp_deflator
rename DPCERD3Q086SBEA pce_deflator

gcollapse(mean) *_deflator, by(year)

* - Normalize to 1 in 2012 -
foreach var in pce_deflator gdp_deflator {
	qui sum `var' if year == 2012
	replace `var' = `var'/r(mean)
}

save clean_data/deflator.dta,replace


* ------------------------------------------------------------------------------
* - Corporate/S-corporate adoption factors -
* ------------------------------------------------------------------------------

* - Dividend data --------------------------------------------------------------
import excel "raw_data/November2017/PSZ2017AppendixTablesI(Macro).xlsx", sheet("TSA6") cellrange(A7:D116) firstrow clear

rename A year
rename Dividendspaidbycorporationso ccorp_dividend
rename DividendspaidbyScorporations scorp_dividend

keep year *_dividend

keep if (year >= 1966)&(year<=2015) // looks like a reasonable range

save clean_data/dina_aggregate_corp.dta,replace

* - Retained Earnings ----------------------------------------------------------
import excel "raw_data/November2017/PSZ2017AppendixTablesI(Macro).xlsx", sheet("TA6") cellrange(A7:Q116) firstrow clear

rename A year
rename Retainedearnings ccorp_retained

keep year ccorp_retained

keep if (year >= 1966)&(year<=2015) // looks like a reasonable range

merge 1:1 year using clean_data/dina_aggregate_corp.dta, nogen

save clean_data/dina_aggregate_corp.dta,replace

* - National Income ------------------------------------------------------------
import excel "raw_data/November2017/PSZ2017AppendixTablesI(Macro).xlsx", sheet("TA0") cellrange(A7:B116) firstrow clear

rename A year
rename NationalincomeYt nat_inc

keep year nat_inc

keep if (year >= 1966)&(year<=2015) // looks like a reasonable range

merge 1:1 year using clean_data/dina_aggregate_corp.dta, nogen

replace ccorp_retained = ccorp_retained * nat_inc

save clean_data/dina_aggregate_corp.dta,replace

* - Wealth ---------------------------------------------------------------------
import excel "raw_data/November2017/PSZ2017AppendixTablesI(Macro).xlsx", sheet("TB1") cellrange(A8:J116) firstrow clear

rename A year
rename OtherthanScorporations ccorp_wealth
rename Scorporations scorp_wealth

keep year *_wealth

keep if (year >= 1966)&(year<=2015) // looks like a reasonable range


merge 1:1 year using clean_data/dina_aggregate_corp.dta, nogen

gen scorp_factor = scorp_dividend/scorp_wealth
gen ccorp_factor = (ccorp_dividend + ccorp_retained)/ccorp_wealth

save clean_data/dina_aggregate_corp.dta,replace


* ------------------------------------------------------------------------------
* - Calculate percentiles for relevant years -
* ------------------------------------------------------------------------------

* - Select variables -
local base id 
local all fainc ptinc peinc govin npinc prisupen invpen
local labor plinc plcon plbel flinc flemp flmil flprl
local capital pkinc pkpen pkbek fkinc fkhou fkequ fkfix fkbus fkpen fkdeb schcinc scorinc partinc fibus
local stock hwhou hwequ hwfix hwbus hwpen hwdeb partw soleprop scorw

* - Loop specification -
local start_year 1980
local end_years 2007 2012
*local years 1980 2007 2012

foreach weight in dweght {
	foreach income in peinc {
		di "Current variable: `income'"
		
		foreach year in  `start_year' `end_years' {

			di "  "
			di "Current Year: " `year'

			* - Load data -
			use `base' `weight' `all' `labor' `capital' `stock' using `data'/usdina`year'.dta, clear
			
			* - Collapse data by tax id -
			gcollapse(sum) `weight' (mean) `all' `labor' `capital' `stock' , by(id)
			
			* - Transform values of dweght to more standard range -
			qui replace `weight' = `weight'/10^6
		
		
			* - Generate percentiles/ permilles - 
			xtile percentile = `income' [aw = `weight'], n(100)
			xtile permille = `income' [aw = `weight'], n(1000)
			qui replace percentile = permille/10 if percentile > 99
		
			drop permille
		
			* - Total stock and income by asset class for return calculation-
			foreach var in hou equ fix bus deb pen {
				egen tot_hw`var' = total(hw`var')
				egen tot_fk`var' = total(fk`var')
				*gen ret_`var' = tot_fk`var'/tot_hw`var'
				*drop tot_*
			}
		
		
			* - Collapse variables -
			gcollapse(mean) `all' `labor' `capital' `stock'  tot_* [aw = `weight'], by(percentile)
		
			* - Generate observation year -
			gen year = `year'
		
			* - Store data -
			if (year == `start_year') {
				qui save clean_data/dina_percentile_`income'_`weight'.dta,replace
			}
			else {
				qui append using clean_data/dina_percentile_`income'_`weight'.dta
			
				qui save clean_data/dina_percentile_`income'_`weight'.dta,replace
			}
		}
		
		* ----------------------------------------------------------------------
		* - Add deflators and adjustment factors -
		* ----------------------------------------------------------------------

		use clean_data/dina_percentile_`income'_`weight'.dta, clear

		merge n:1 year using clean_data/deflator.dta,keep(1 3) nogen
		merge n:1 year using clean_data/dina_aggregate_corp.dta, keep(1 3) nogen

		save clean_data/dina_percentile_`income'_`weight'.dta, replace

	}
}

********************************************************************************
* - PRE-TAX NATIONAL INCOME DISTRIBUTION -
********************************************************************************



foreach end_year in `end_years'{

	
	use clean_data/dina_percentile_peinc_dweght.dta if inlist(year,`start_year',`end_year'), clear

	* - Define components -
	gen peinck = pkinc + govin + npinc + prisupen + invpen // Assume all pension and non-profit income is capital
	gen peincl = plinc

	* - Deflate appropriately -
	foreach var in peinc peinck peincl scorinc fkequ {
		replace `var' = `var' / pce_deflator
	}

	* - Generate time variable -
	sort year 
	egen period = group(year)

	xtset percentile period
	
	* - Generate growth rates and capital/labor components for standard 70% adjustment-
	replace peinck = peinck - 0.75 * scorinc
	gen scorin = 0.75 * scorinc

	order year percentile peinc peinck peincl
	
	xtset percentile period
	
	* - Generate growth rates and capital/labor components for baseline-
	gen peinc_gr = ((peinc/L.peinc)^(1/(`end_year'-`start_year')) - 1) *100
	
	gen capital_gr_base = (D.peinck/D.peinc) * peinc_gr
	gen scor_gr_base = (D.scorin/D.peinc) * peinc_gr
	gen labor_gr_base = (D.peincl/D.peinc) * peinc_gr
	
	
	* - Generate alternative scor income measure -	
	replace peinck = peinck - 0.75 * fkequ * (scorw * scorp_factor)/(scorw * scorp_factor + (hwequ-scorw)* ccorp_factor)
	replace scorin = 0.75 * fkequ * (scorw * scorp_factor)/(scorw * scorp_factor + (hwequ-scorw)* ccorp_factor)
	
	xtset percentile period
	
	gen capital_gr_alt = (D.peinck/D.peinc) * peinc_gr
	gen scor_gr_alt = (D.scorin/D.peinc) * peinc_gr

	keep if (year == `end_year')
	
	keep percentile peinc_gr labor_gr_base capital_gr_base scor_gr_base capital_gr_alt scor_gr_alt
	
	save clean_data/psz_clean_`start_year'_`end_year'.dta,replace
	export delimited clean_data/psz_clean_`start_year'_`end_year'.csv, replace
	
}

