/*

	Project: Unequal Growth
	
	Purpose: Create portfolio composition figure
	
	Data: IMA Table S.3, NIPA Table 2.1, DINAs, DFAs
	
	v06: include direct lending to HHs.
	
*/

clear all
cls

cd "/Users/lukaszrachel/Dropbox/Lukasz 2017 MacBook Pro/Documents/Research/Data"

global data_ima 	IMA_NIPA_US
global data_dina 	DINA_US/PSZ2020Dinafiles
global data_dfa 	DFA_US

********************************************************************************
* ------------------------------------------------------------------------------
* - NIPA/IMA data -
* ------------------------------------------------------------------------------
********************************************************************************

* ------------------------------------------------------------------------------
* - Load and format NIPA data -
* ------------------------------------------------------------------------------

import excel ${data_ima}/NIPA_Section2All_xls.xlsx, sheet("T20100-A") cellrange(A8:CQ54) clear


* - Extract firm observation year -
local start_year = real(C[1])
di `start_year'

* - Select lines -
keep if inlist(A,"Line","3","9","12")|inlist(A,"13","14","15","26","30")

drop B C

sxpose,clear

* - Get names from lines -
foreach var of varlist * {
	local stub = `var'[1]
	di "`stub'"
	rename `var' nipa_`stub'
}

drop if _n == 1

* - Make numeric -
foreach var of varlist * {
	qui replace `var' = "" if `var' == "---"
	qui destring `var', replace
	replace `var' = 0 if missing(`var')
}

rename nipa_Line	year

save clean_data/imanipa_temp.dta,replace

* ------------------------------------------------------------------------------
* - Corporate Capital Share from NIPA tables -
* ------------------------------------------------------------------------------

import excel ${data_ima}/NIPA_Section1All_xls.xlsx, sheet("T11300-A") cellrange(A8:BX93) clear

keep if inlist(A,"Line","3","4","9")

drop B C

sxpose,clear

* - Get names from lines -
foreach var of varlist * {
	local stub = `var'[1]
	di "`stub'"
	rename `var' nipa1_`stub'
}

drop if _n == 1

foreach var of varlist * {
	destring `var', replace
}

rename nipa1_Line	year

tsset year

gen capital_share = 1 - nipa1_4/(nipa1_3 - nipa1_9)
gen capital_share_avg5 = (1/5)*(L2.capital_share + L.capital_share + capital_share + F.capital_share + F2.capital_share)


lab var capital_share "Corporate Sector Capital Share (NIPA)"

keep year capital_share*

merge 1:1 year using clean_data/imanipa_temp.dta, nogen keep(3)

save clean_data/imanipa_temp.dta,replace

* ------------------------------------------------------------------------------
* - Load and format IMA data -
* ------------------------------------------------------------------------------

* - Household sector data - 

import excel ${data_ima}/IMA_Section1All_xls.xlsx, sheet("IMAtS3.a-A") cellrange(A8:BL157) clear

* - Select lines -
keep if inlist(A,"Line","19","99","103","104")|inlist(A,"109","114","117","121","123","131")

drop B C

sxpose,clear

* - Get lines from NIPA as variables names -
foreach var of varlist * {
	local stub = `var'[1]
	di "`stub'"
	rename `var' ima_`stub'
}

drop if _n == 1

* - Make numeric -
foreach var of varlist * {
	qui replace `var' = "" if `var' == "---" | `var' == "....."
	qui destring `var', replace
	replace `var' = 0 if missing(`var')
}

rename ima_Line year

merge 1:1 year using clean_data/imanipa_temp.dta, nogen keep(3)

save clean_data/imanipa_temp.dta,replace

* - Corporate sector data - 


import excel ${data_ima}/IMA_Section1All_xls.xlsx, sheet("IMAtS5.a-A") cellrange(A8:BL182) clear

* - Select lines -
keep if inlist(A,"Line","145","148","149","150","157")

drop B C

sxpose,clear

* - Get lines from NIPA as variables names -
foreach var of varlist * {
	local stub = `var'[1]
	di "`stub'"
	rename `var' imac_`stub'
}

drop if _n == 1

* - Make numeric -
foreach var of varlist * {
	qui replace `var' = "" if `var' == "---" | `var' == "....."
	qui destring `var', replace
	replace `var' = 0 if missing(`var')
}

rename imac_Line year

merge 1:1 year using clean_data/imanipa_temp.dta, nogen keep(3)

save clean_data/imanipa_temp.dta,replace



* - Non-corporate sector data - 


import excel ${data_ima}/IMA_Section1All_xls.xlsx, sheet("IMAtS4.a-A") cellrange(A8:BL145) clear

* - Select lines -
keep if inlist(A,"Line","115","123")

drop B C

sxpose,clear

* - Get lines from NIPA as variables names -
foreach var of varlist * {
	local stub = `var'[1]
	di "`stub'"
	rename `var' imanc_`stub'
}

drop if _n == 1

* - Make numeric -
foreach var of varlist * {
	qui replace `var' = "" if `var' == "---" | `var' == "....."
	qui destring `var', replace
	replace `var' = 0 if missing(`var')
}

rename imanc_Line year

merge 1:1 year using clean_data/imanipa_temp.dta, nogen keep(3)

save clean_data/imanipa_temp.dta,replace


* - PCE Price Deflator ---------------------------------------------------------

freduse DPCERG3A086NBEA,clear

rename DPCERG3A086NBEA deflator

* - Generate important variables -
gen year = real(substr(date,1,4))

keep year deflator

lab var deflator "PCE Index"

* - Merge with other variables -
merge 1:1 year using clean_data/imanipa_temp.dta, nogen keep(3)

* - Adjust date and value of IMA stock variables with deflator
foreach var of varlist ima* {
	replace `var' = `var' * (100/deflator)
	gen L`var' = L.`var'
	replace `var' = L`var' * (deflator/100)
	drop L`var'
}

drop deflator

save clean_data/imanipa_temp.dta,replace



* ------------------------------------------------------------------------------
* - Generate variables -
* ------------------------------------------------------------------------------

use clean_data/imanipa_temp.dta,clear

* Corporate 
gen cbon  = imac_148
gen cequ  = imac_157
gen clon  = imac_150

* Non-corporate 
gen nclon = imanc_115
gen ncequ = imanc_123

* Total
gen imanipa_sh_deb2equ = (cbon+clon+nclon)/(cequ+ncequ)
gen imanipa_sh_cequ = cequ+ncequ
gen imanipa_sh_clon = clon+nclon

keeporder year imanipa_sh_* 

save clean_data/imanipa_temp.dta,replace


********************************************************************************
* ------------------------------------------------------------------------------
* - DINA data -  
* ------------------------------------------------------------------------------
********************************************************************************

/* THIS PIECE OF THE CODE EXTRACTS DATA FROM THE DINAS, IT TAKES A WHILE TO RUN, SO RUN IT ONCE AND SAVE THE RESULTS INTO TEMP.DTA, THEN JUST USE THAT FILE. 

* - Select variables -
local base id peinc hw* fk* pa* so* sc* no* muni* curr* mmbo* nonm*

* - Loop specification -
local start_year 1966
local end_year 2018

qui{
foreach weight in dweght {
	foreach income in   peinc {
		di "Current variable: `income'"
		
		forvalues year = `start_year'/`end_year' {

			di "  "
			di "Current Year: " `year'

			* - Load data -
			use `weight' `base'  using $data_dina/usdina`year'.dta, clear
			
			* - Collapse data by tax id -
			gcollapse (sum) `weight' (mean) peinc  hw* fk* muni* curr* mmbo* nonm*, by(id)
			
			* - Transform values of dweght to more standard range -
			qui replace `weight' = `weight'/10^5
		
			* - Generate percentiles/ permilles - 
			xtile decile = `income' [aw = `weight'], n(10)
			xtile percentile = `income' [aw = `weight'], n(100)
			xtile permille = `income' [aw = `weight'], n(1000)
			
			foreach var of varlist hw*  fk* muni* curr* mmbo* nonm* `weight' {
				gen `var'_dec = `var' if decile == 10
				gen `var'_cen = `var' if percentile == 100
				gen `var'_mil = `var' if permille == 1000
			}
		
			* - Collapse variables -
			gcollapse(rawsum)  `weight' (sum) hw* fk* muni* curr* mmbo* nonm* [iw = `weight']
		
			* - Generate observation year -
			gen year = `year'
		
			* - Store data -
			if (year == `start_year') {
				qui save clean_data/temp.dta,replace
			}
			else {
				qui append using clean_data/temp.dta			
				qui save clean_data/temp.dta,replace
			}
		}
	}
}
}

*/


* use the dataset that is generated by the commented out bit above (which takes a while to run)
use clean_data/temp.dta,clear

merge 1:1 year using clean_data/imanipa_temp.dta

foreach var of varlist hw* fk* muni* curr* mmbo* non* {
	replace `var' = `var'/10^12
}

* - Generate variables ---------------------------------------------------------

gen dina_sh_deb2equ = (hwfix-(muni+currency+mmbondfund))/(hwequ+hwbus)

* - Disaggregated measures -
foreach stub in dec cen mil {
	* how much equity captured by the 'investors'
    gen dina_equity_share_`stub' = (hwequ_`stub' + hwbus_`stub')/(hwequ+hwbus)
	* safe assets to equity ratio
   	gen safe_`stub' = (muni_`stub'+currency_`stub'+mmbondfund_`stub')/(hwequ_`stub' + hwbus_`stub')
    * proxy for corporate bonds + all loans to equity ratio
	gen debt_`stub' = (hwfix_`stub' - (muni_`stub'+currency_`stub'+mmbondfund_`stub'))/(hwequ_`stub' + hwbus_`stub')
	* non-mortgage borrowing (liability) - with a minus sign already
	gen lend_`stub' = nonmort_`stub' / (hwequ_`stub' + hwbus_`stub')
	
	
	* - kappa_I - *
	
	gen dina_sh_KI_`stub' = (1+dina_sh_deb2equ)/(1+safe_`stub'+debt_`stub'+lend_`stub') 
	
    gen dina_sh_KI_no_safe_`stub' = (1+dina_sh_deb2equ)/(1+debt_`stub'+lend_`stub') 
	
    gen dina_sh_KI_no_lend_`stub' = (1+dina_sh_deb2equ)/(1+debt_`stub') 
	
}

drop hw* dweght* fk*

keeporder year dina_sh_* dina_equity*

save clean_data/dina_temp.dta,replace


********************************************************************************
* ------------------------------------------------------------------------------
* - DFA data -
* ------------------------------------------------------------------------------
********************************************************************************

* ------------------------------------------------------------------------------
* - Statistics by income percentiles -
* ------------------------------------------------------------------------------

* - Aggregate levels -----------------------------------------------------------

import delimited ${data_dfa}/dfa-income-levels-detail.csv, clear

gcollapse (sum) assets-networth, by(date)

gen hwequ = corporateequitiesandmutualfundsh
gen hwbus = equityinnoncorpoatebusiness
gen hwpen = pensionentitlements + lifeinsurancereserves
gen hwbon = corporateandforeignbonds
gen hwfin = financialassets
gen hwfix = financialassets - hwequ - hwbus - hwpen
gen hwhou = realestate
gen hwdeb = liabilities - homemortgages
gen hwlon = timedepositsandshortterminvestme

keep date hw*

save clean_data/dfa_temp.dta ,replace

* - Top 1% --------------------------------------------------------------------

import delimited ${data_dfa}/dfa-income-levels-detail.csv, clear
*import delimited ${data_dfa}/dfa-income-levels-detail.csv, clear

keep if inlist(category,"pct99to100")
*keep if inlist(category,"pct99to100","pct80to99")

gcollapse (sum) assets-networth, by(date)

gen hwequ_cen = corporateequitiesandmutualfundsh
gen hwbus_cen = equityinnoncorpoatebusiness
gen hwpen_cen = pensionentitlements + lifeinsurancereserves
gen hwbon_cen = corporateandforeignbonds
gen hwfin_cen = financialassets
gen hwhou_cen = realestate
gen hwdeb_cen = loansliabilities - homemortgages - consumercredit
gen hwlon_cen = timedepositsandshortterminvestme
gen hwsaf_cen = usgovernmentandmunicipalsecuriti + moneymarketfundshares


keep date hw*

merge 1:1 date using clean_data/dfa_temp.dta, keep(3) nogen

save clean_data/dfa_temp.dta ,replace

* - Collect all data -----------------------------------------------------------

* - Annualize -
gen year = real(substr(date,1,4))

gcollapse(mean) hw* ,by(year)

merge 1:1 year using clean_data/imanipa_temp.dta

* Debt to equity ratio in the DFA

gen dfa_sh_deb2equ = (hwbon+hwlon)/(hwequ+hwbus)

* - By income group -
foreach stub in cen {

	* how much equity captured by the 'investors'
    gen dfa_equity_share_`stub' = (hwequ_`stub'+hwbus_`stub')/(hwequ+hwbus)
	
    * components 
	gen safe_`stub' = hwsaf_`stub'/(hwequ_`stub'+hwbus_`stub')
	gen bond_`stub' = (hwbon_`stub' + hwlon_`stub')/(hwequ_`stub'+hwbus_`stub')
	gen debt_`stub' = hwdeb_`stub'/(hwequ_`stub'+hwbus_`stub')
	
	* - kappa_I - *
	gen dfa_sh_KI_`stub'         = (1+dfa_sh_deb2equ)/(1+safe_`stub'+bond_`stub'-debt_`stub') 
	gen dfa_sh_KI_no_safe_`stub' = (1+dfa_sh_deb2equ)/(1+bond_`stub'-debt_`stub') 
		
}


keeporder year dfa_sh_* dfa_equity*

save clean_data/dfa_temp_income.dta,replace


* ------------------------------------------------------------------------------
* - Statistics by net worth percentiles -
* ------------------------------------------------------------------------------

* - Aggregate levels -----------------------------------------------------------

import delimited ${data_dfa}/dfa-networth-levels-detail.csv, clear

gcollapse (sum) assets-networth, by(date)

gen hwequ = corporateequitiesandmutualfundsh
gen hwbus = equityinnoncorpoatebusiness
gen hwpen = pensionentitlements + lifeinsurancereserves
gen hwbon = corporateandforeignbonds
gen hwfin = financialassets
gen hwfix = financialassets - hwequ - hwbus - hwpen
gen hwhou = realestate
gen hwdeb = liabilities - homemortgages
gen hwlon = timedepositsandshortterminvestme

keep date hw*

save clean_data/dfa_temp.dta ,replace

* - Top 1% --------------------------------------------------------------------

import delimited ${data_dfa}/dfa-networth-levels-detail.csv, clear

keep if inlist(category,"Top1")

gcollapse (sum) assets-networth, by(date)

gen hwequ_cen = corporateequitiesandmutualfundsh
gen hwbus_cen = equityinnoncorpoatebusiness
gen hwpen_cen = pensionentitlements + lifeinsurancereserves
gen hwbon_cen = corporateandforeignbonds
gen hwfin_cen = financialassets
gen hwhou_cen = realestate
gen hwdeb_cen = loansliabilities - homemortgages - consumercredit
gen hwlon_cen = timedepositsandshortterminvestme
gen hwsaf_cen = usgovernmentandmunicipalsecuriti + moneymarketfundshares

keep date hw*

merge 1:1 date using clean_data/dfa_temp.dta, keep(3) nogen

save clean_data/dfa_temp.dta ,replace

* - Collect all data -----------------------------------------------------------

* - Annualize -
gen year = real(substr(date,1,4))
gcollapse(mean) hw* ,by(year)
merge 1:1 year using clean_data/imanipa_temp.dta

* debt to equity ratio in the DFA
gen dfa_nw_sh_deb2equ = (hwbon+hwlon)/(hwequ+hwbus)

* - By income group -
foreach stub in cen {
	
	* how much equity captured by the 'investors'
    gen dfa_nw_equity_share_`stub' = (hwequ_`stub'+hwbus_`stub')/(hwequ+hwbus)
	
	* components 
	gen safe_`stub' = hwsaf_`stub'/(hwequ_`stub'+hwbus_`stub')
	gen bond_`stub' = (hwbon_`stub' + hwlon_`stub')/(hwequ_`stub'+hwbus_`stub')
	gen loan_`stub' = hwlon_`stub'/(hwequ_`stub'+hwbus_`stub')

    * - kappa_I - *
	gen dfa_sh_nw_KI_`stub' = (1+dfa_nw_sh_deb2equ)/(1+safe_`stub'+bond_`stub'-loan_`stub') 
	
    gen dfa_sh_nw_KI_no_safe_`stub' = (1+dfa_nw_sh_deb2equ)/(1+bond_`stub'-loan_`stub') 
}

keeporder year dfa_sh_* dfa_nw_*

save clean_data/dfa_temp_networth.dta,replace

********************************************************************************
* ------------------------------------------------------------------------------
* - Merge data -
* ------------------------------------------------------------------------------
*********************************************************************************

use clean_data/dina_temp.dta,clear

merge 1:1 year using clean_data/dina_temp.dta,nogen
merge 1:1 year using clean_data/imanipa_temp.dta, nogen
merge 1:1 year using clean_data/dfa_temp_income.dta, nogen
merge 1:1 year using clean_data/dfa_temp_networth.dta, nogen

order year *_sh_* 

tsset year


save clean_data/imanipa.dta, replace
save clean_data/dina_imanipa_dfa_data_v01.dta, replace

erase clean_data/imanipa_temp.dta
erase clean_data/dfa_temp.dta
erase clean_data/dina_temp.dta

cd "/Users/lukaszrachel/Dropbox/AutomationInequality/replication_october_2021/measuring_returns/raw_data"
save dina_imanipa_dfa_data_v01.dta, replace

cd ..
export delimited clean_data/kappa_shares.csv, replace

