/*

	Project: Unequal Growth
	
	Purpose: Create Return on US Household Wealth
	
	Data: BEA NIPA Table 2.1 and IMA Table S.3
	
*/

clear all
cls

cd ~/Dropbox/data_nils/replication_mrr_may_2020/s4b_measuring_returns

********************************************************************************
********************************************************************************
* - DATA CONSTRUCTION -
********************************************************************************
********************************************************************************

/*
	Note: 
	This code uses some of the data constructed in the portfolio_shares_v01 file. 
	The data is stored in the raw_data folder (dina_imanipa_dfa_data_v01) such 
	that this code can run without imperiment.
*/

* ------------------------------------------------------------------------------
* - Load and format NIPA data -
* ------------------------------------------------------------------------------

import excel raw_data/NIPA_Section2All_xls.xlsx, sheet("T20100-A") cellrange(A8:CO54) clear


* - Extract firm observation year -
local start_year = real(C[1])
di `start_year'

* - Select lines -
keep if inlist(A,"Line","2","3","9","12")|inlist(A,"13","14","15","26","30")

drop B C

sxpose,clear

* - Get names from lines -
foreach var of varlist * {
	local stub = `var'[1]
	di "`stub'"
	rename `var' nipa_`stub'
}

drop if _n == 1

* - Make numeric -
foreach var of varlist * {
	qui replace `var' = "" if `var' == "---"
	qui destring `var', replace
	replace `var' = 0 if missing(`var')
}


/*
gen date = qofd(mdy(3*real(substr(_var1,6,1)), 1, real(substr(_var1,1,4))))
format date %tq
drop _var1
order date
*/

rename nipa_Line	year

save clean_data/nipa_ima_clean.dta,replace

* ------------------------------------------------------------------------------
* - Corporate Capital Share from NIPA tables -
* ------------------------------------------------------------------------------

import excel raw_data/NIPA_Section1All_xls.xlsx, sheet("T11300-A") cellrange(A8:BV97) clear

keep if inlist(A,"Line","3","4","9")

drop B C

sxpose,clear

* - Get names from lines -
foreach var of varlist * {
	local stub = `var'[1]
	di "`stub'"
	rename `var' nipa1_`stub'
}

drop if _n == 1

foreach var of varlist * {
	destring `var', replace
}

rename nipa1_Line	year

tsset year

gen capital_share = 1 - nipa1_4/(nipa1_3 - nipa1_9)
gen capital_share_avg5 = (1/5)*(L2.capital_share + L.capital_share + capital_share + F.capital_share + F2.capital_share)


lab var capital_share "Corporate Sector Capital Share (NIPA)"
lab var capital_share_avg5 "Corporate Sector Capital Share - 5-Year Average (NIPA)"

keep year capital_share*

merge 1:1 year using clean_data/nipa_ima_clean.dta, nogen keep(3)

save clean_data/nipa_ima_clean.dta,replace

* ------------------------------------------------------------------------------
* - Load and format IMA data -
* ------------------------------------------------------------------------------

import excel raw_data/IMA_Section1All_xls.xlsx, sheet("IMAtS3.a-A") cellrange(A8:BJ157) clear

* - Select lines -
keep if inlist(A,"Line","19","84","89","90","91","92","93","94") | inlist(A,"99","104","109","114","117","121","123")

drop B C

sxpose,clear

* - Get lines from NIPA as variables names -
foreach var of varlist * {
	local stub = `var'[1]
	di "`stub'"
	rename `var' ima_`stub'
}

drop if _n == 1

* - Make numeric -
foreach var of varlist * {
	qui replace `var' = "" if `var' == "---" | `var' == "....."
	qui destring `var', replace
	replace `var' = 0 if missing(`var')
}

rename ima_Line year

merge 1:1 year using clean_data/nipa_ima_clean.dta, nogen keep(3)

* ------------------------------------------------------------------------------
* - Variable definitions -
* ------------------------------------------------------------------------------

* - Income ---------------------------------------------------------------------
gen wage_inc = nipa_2
gen debt_inc = nipa_14
gen eqpu_inc = nipa_15
gen eqpr_inc = nipa_9 * 0.283 // Note from GRR(2011)
gen eqpR_inc = nipa_9 * capital_share // Note from NIPA tables
gen real_inc = nipa_12 + (ima_19 - nipa_30)

gen equi_inc = eqpu_inc + eqpr_inc
gen equI_inc = eqpu_inc + eqpR_inc
gen fina_inc = debt_inc + equi_inc
gen asst_inc = fina_inc + real_inc

* - Revaluations ---------------------------------------------------------------
gen debt_rev = ima_89 + ima_94
gen eqpu_rev = ima_90 + ima_91 + ima_93
gen eqpr_rev = ima_92
gen eqpR_rev = ima_92
gen real_rev = ima_84

gen equi_rev = eqpu_rev + eqpr_rev
gen equI_rev = eqpu_rev + eqpR_rev
gen fina_rev = debt_rev + equi_rev
gen asst_rev = fina_rev + real_rev

* - Stocks ---------------------------------------------------------------------
gen debt_stk = ima_104 + ima_109 + ima_114 + ima_123
gen eqpu_stk = ima_117 + ima_121
gen eqpr_stk = ima_121
gen eqpR_stk = ima_121
gen real_stk = ima_99

gen equi_stk = eqpu_stk + eqpr_stk
gen equI_stk = eqpu_stk + eqpr_stk
gen fina_stk = debt_stk + equi_stk
gen asst_stk = fina_stk + real_stk

* - Average Tax Rate -----------------------------------------------------------
gen avg_tax = nipa_26/(nipa_3 + nipa_9 + nipa_12 + nipa_13)

* - Adjust timing of stock variables -
tsset year

drop ima* nipa*

* - Keep only observations with all variables non-missing -
foreach var of varlist * {
	qui drop if missing(`var')
}

* - Post - tax incomes ---------------------------------------------------------
foreach var of varlist *_inc {
	gen `var'_post = `var' * (1-avg_tax)
}

* - Adjust numeric format -
foreach var of varlist *_stk *_rev *_inc {
	format `var' %12.0fc
}

save clean_data/nipa_ima_clean.dta,replace


* ------------------------------------------------------------------------------
* - PCE Price Deflator -
* ------------------------------------------------------------------------------

freduse DPCERG3A086NBEA,clear

rename DPCERG3A086NBEA deflator

* - Generate important variables -
gen year = real(substr(date,1,4))

tsset year

keep year deflator 

lab var deflator "PCE Index"
* - Merge with other variables -
merge 1:1 year using clean_data/nipa_ima_clean.dta, keep(3) nogen

foreach var of varlist *stk *rev *inc* {
	replace `var' = `var' * 100/deflator
}

save clean_data/nipa_ima_clean.dta,replace

* ------------------------------------------------------------------------------
* - Calculate returns -
* ------------------------------------------------------------------------------

foreach var in debt eqpu eqpr eqpR real equi equI fina asst {
	
	gen `var'_reval = `var'_rev/L.`var'_stk
	
	qui sum `var'_reval
	
	* - Pre-tax returns -
	gen `var'_ret_flow = (`var'_inc / L.`var'_stk )
	gen `var'_ret_avgrev = (`var'_inc / L.`var'_stk + `r(mean)' )
	gen `var'_ret_full = ((`var'_inc + `var'_rev)/L.`var'_stk )
	
	* - Post-tax returns -
	gen `var'_ret_flow_post = (`var'_inc_post/L.`var'_stk)
	gen `var'_ret_avgrev_post = (`var'_inc_post/L.`var'_stk + `r(mean)' )
	gen `var'_ret_full_post = ((`var'_inc_post + `var'_rev)/L.`var'_stk)
	
	drop `var'_reval
	
}

save clean_data/nipa_ima_clean.dta,replace


* ------------------------------------------------------------------------------
* - Merge with Gomme et al (2011) -
* ------------------------------------------------------------------------------


* import data
 import delimited raw_data/GRR_usdata.csv, ///
 numericcols(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26) clear

* Generate quarterly date variable starting "1947-01-01"
gen date=tq(1947q1)+_n-1
format date %tq
lab var date "Date"
order date // Makes date first variable in dataset

* Returns: Business capital
rename returntobusinesscapitalpretax rk_bs
rename returntobusinesscapitalpretaxnoc rk_bs_noc
rename returntobusinesscapitalaftertax rk_bs_post
rename returntobusinesscapitalaftertaxn rk_bs_post_noc

* Returns: All capital
rename returntoallcapitalpretax rk
rename returntoallcapitalpretaxnocapita rk_noc
rename returntoallcapitalaftertax rk_post
rename returntoallcapitalaftertaxnocapi rk_post_noc

* Returns: Housing capital
rename returntohousingcapitalpretax rk_hm
rename returntohousingcapitalpretaxnoca rk_hm_noc
rename returntohousingcapitalaftertax rk_hm_post
rename returntohousingcapitalaftertaxno rk_hm_post_noc

keep date r* 
gen year = yofd(dofq(date))

gcollapse (mean) rk*, by(year)

foreach var of varlist rk* {
	replace `var' = `var'/100
}

tsset year

foreach var of varlist rk_bs* {
	tssmooth ma `var'_ma5 = `var', window(2 1 2)
	tssmooth ma `var'_ma11 = `var', window(5 1 5)
}


merge 1:1 year using clean_data/nipa_ima_clean , nogen keep(2 3)

save clean_data/nipa_ima_clean.dta,replace


* ------------------------------------------------------------------------------
* - Merge with r* data provided by Lukasz -
* ------------------------------------------------------------------------------

import delimited raw_data/figure_grr.csv,clear

keep v1 v10 // Lukasz said that the last column was r*; the file is not labelled
replace v1 = round(v1)

rename v1 year
rename v10 r_star

gcollapse(mean) r_star, by(year)

merge 1:1 year using clean_data/nipa_ima_clean.dta, keep(2 3)

* - Adjust for average revaluation differences -
qui sum rk_bs_post_noc
local mean_noc = `r(mean)'

qui sum rk_bs_post
local mean_wtc = `r(mean)'

gen rk_bs_post_avgrev = rk_bs_post_noc + (`mean_wtc' - `mean_noc')
gen rk_bs_post_adjrevl = rk_bs_post - rk_bs_post_avgrev
gen rk_bus_post_reval = rk_bs_post - rk_bs_post_noc


foreach var of varlist rk_* *_ret_* {
	replace `var' = `var' * 100
}

foreach var in eqpu equI eqpR {
	gen `var'_reval = `var'_ret_full_post - `var'_ret_avgrev_post
	gen `var'_reval2 = `var'_ret_full_post - `var'_ret_flow_post
}

keeporder year eqpu_ret_avgrev_post eqpR_ret_avgrev_post equi_ret_avgrev_post rk_bs_post_avgrev r_star eqpu_ret_avgrev_post eqpR_ret_avgrev_post rk_bs_post_avgrev eqpu_reval eqpR_reval rk_bs_post_adjrevl eqpr_ret_avgrev_post eqpu_ret_avgrev_post


save clean_data/nipa_ima_clean.dta, replace

* ------------------------------------------------------------------------------
* - Merge with growth rate data and portfolio shares-
* ------------------------------------------------------------------------------

import excel raw_data/US_trend_growth.xlsx, sheet("CBO Data") cellrange(A13:C86) clear firstrow
gen year=1959+_n
rename GDPPOT_POPTHM grate
keep year grate
tempfile growth
save `growth', replace

use raw_data/dina_imanipa_dfa_data_v01.dta, clear 
tempfile returns
save `returns', replace

use clean_data/nipa_ima_clean , clear
merge 1:1 year using `returns', nogenerate
merge 1:1 year using `growth', nogenerate
keep if year<=2016


* compute measures of returns *
gen rk_measure=equi_ret_avgrev_post
gen rb_measure=r_star

* average returns adjusting for growth (smoothed) *
rangestat (mean) grate, interval(year -5 5) 

* average returns *
foreach source in dina imanipa dfa {
gen rw_`source'=(`source'_sh_equ_fass/100)*rk_measure+(1-`source'_sh_equ_fass/100)*rb_measure
gen rw_`source'_adj1=(`source'_sh_equ_fass/100)*(rk_measure-grate_mean)+(1-`source'_sh_equ_fass/100)*(rb_measure-grate_mean)
gen rw_`source'_adj2=(`source'_sh_equ_fass/100)*(rk_measure-2*grate_mean)+(1-`source'_sh_equ_fass/100)*(rb_measure-2*grate_mean)

	foreach stub in nopen copen {
		gen rw_`source'_`stub'=(`source'_sh_equ_fass_`stub'/100)*rk_measure+(1-`source'_sh_equ_fass_`stub'/100)*rb_measure
		gen rw_`source'_adj1_`stub'=(`source'_sh_equ_fass_`stub'/100)*(rk_measure-grate_mean)+(1-`source'_sh_equ_fass_`stub'/100)*(rb_measure-grate_mean)
		gen rw_`source'_adj2_`stub'=(`source'_sh_equ_fass_`stub'/100)*(rk_measure-2*grate_mean)+(1-`source'_sh_equ_fass_`stub'/100)*(rb_measure-2*grate_mean)
	}
}

* returns for top 1 *
foreach source in dina dfa {
gen rw_`source'_top1=(`source'_sh_equ_fass_cen/100)*(rk_measure)+(1-`source'_sh_equ_fass_cen/100)*(rb_measure)
gen rw_`source'_adj1_top1=(`source'_sh_equ_fass_cen/100)*(rk_measure-grate_mean)+(1-`source'_sh_equ_fass_cen/100)*(rb_measure-grate_mean)
gen rw_`source'_adj2_top1=(`source'_sh_equ_fass_cen/100)*(rk_measure-2*grate_mean)+(1-`source'_sh_equ_fass_cen/100)*(rb_measure-2*grate_mean)


	foreach stub in nopen copen {
		gen rw_`source'_`stub'_top1=(`source'_sh_equ_fass_`stub'_cen/100)*(rk_measure)+(1-`source'_sh_equ_fass_`stub'_cen/100)*(rb_measure)
		gen rw_`source'_adj1_`stub'_top1=(`source'_sh_equ_fass_`stub'_cen/100)*(rk_measure-grate_mean)+(1-`source'_sh_equ_fass_`stub'_cen/100)*(rb_measure-grate_mean)
		gen rw_`source'_adj2_`stub'_top1=(`source'_sh_equ_fass_`stub'_cen/100)*(rk_measure-2*grate_mean)+(1-`source'_sh_equ_fass_`stub'_cen/100)*(rb_measure-2*grate_mean)
	}
}

* - Select variables -----------------------------------------------------------
local ima_returns eqpu_ret_avgrev_post eqpR_ret_avgrev_post rk_bs_post_avgrev r_star eqpu_ret_avgrev_post eqpR_ret_avgrev_post rk_bs_post_avgrev eqpu_reval eqpR_reval rk_bs_post_adjrevl
local wealth_returns rw_*
local shares *fass *fass_copen *fass_nopen *fass_cen *fass_copen_cen *fass_nopen_cen

keeporder year `ima_returns' `wealth_returns' `shares'
drop *iwr* *_pen_*

* - Rename variables -----------------------------------------------------------
rename rw_dina_nopen rw_dina_np
rename rw_dina_adj1_nopen rw_dina_adj1_np
rename rw_dina_adj2_nopen rw_dina_adj2_np
rename rw_dina_copen rw_dina_cp
rename rw_dina_adj1_copen rw_dina_adj1_cp
rename rw_dina_adj2_copen rw_dina_adj2_cp

rename rw_imanipa rw_ima
rename rw_imanipa_adj1 rw_ima_adj1
rename rw_imanipa_adj2 rw_ima_adj2
rename rw_imanipa_nopen rw_ima_np
rename rw_imanipa_adj1_nopen rw_ima_adj1_np
rename rw_imanipa_adj2_nopen rw_ima_adj2_np
rename rw_imanipa_copen rw_ima_cp
rename rw_imanipa_adj1_copen rw_ima_adj1_cp
rename rw_imanipa_adj2_copen rw_ima_adj2_cp 
rename rw_dfa_nopen rw_dfa_np
rename rw_dfa_adj1_nopen rw_dfa_adj1_np
rename rw_dfa_adj2_nopen rw_dfa_adj2_np
rename rw_dfa_copen rw_dfa_cp
rename rw_dfa_adj1_copen rw_dfa_adj1_cp
rename rw_dfa_adj2_copen rw_dfa_adj2_cp
rename rw_dina_nopen_top1 rw_dina_np_top1
rename rw_dina_adj1_nopen_top1 rw_dina_adj1_np_top1
rename rw_dina_adj2_nopen_top1 rw_dina_adj2_np_top1
rename rw_dina_copen_top1 rw_dina_cp_top1
rename rw_dina_adj1_copen_top1 rw_dina_adj1_cp_top1
rename rw_dina_adj2_copen_top1 rw_dina_adj2_cp_top1
rename rw_dfa_nopen_top1 rw_dfa_np_top1 
rename rw_dfa_adj1_nopen_top1 rw_dfa_adj1_np_top1 
rename rw_dfa_adj2_nopen_top1 rw_dfa_adj2_np_top1 
rename rw_dfa_copen_top1 rw_dfa_cp_top1 
rename rw_dfa_adj1_copen_top1 rw_dfa_adj1_cp_top1 
rename rw_dfa_adj2_copen_top1 rw_dfa_adj2_cp_top1 

rename eqpu_ret_avgrev_post ret_corp
rename eqpR_ret_avgrev_post ret_noncorp
rename rk_bs_post_avgrev ret_grr
rename eqpu_reval rev_corp
rename eqpR_reval rev_noncorp
rename rk_bs_post_adjrevl rev_grr

rename dina_sh_equ_fass dina_eqsh
rename imanipa_sh_equ_fass ima_eqsh
rename dfa_sh_equ_fass  dfa_eqsh
rename dina_sh_equ_fass_copen dina_eqsh_cp
rename imanipa_sh_equ_fass_copen ima_eqsh_cp
rename dfa_sh_equ_fass_copen dfa_eqsh_cp
rename dina_sh_equ_fass_nopen dina_eqsh_np
rename imanipa_sh_equ_fass_nopen ima_eqsh_np
rename dfa_sh_equ_fass_nopen dfa_eqsh_np
rename dina_sh_equ_fass_cen dina_eqsh_top1
rename dfa_sh_equ_fass_cen dfa_eqsh_top1
rename dina_sh_equ_fass_copen_cen dina_eqsh_top1_cp
rename dfa_sh_equ_fass_copen_cen dfa_eqsh_top1_cp
rename dina_sh_equ_fass_nopen_cen dina_eqsh_top1_np
rename dfa_sh_equ_fass_nopen_cen dfa_eqsh_top1_np

keep if year >= 1963

export delimited clean_data/nipa_ima_clean.csv,replace
erase clean_data/nipa_ima_clean.dta
