/*********************Stata code generating Stata file gperc_income_1900_2014.dta (long-run national income series generated from fiscal income gpercentile series)*********/


clear
set more off

* John
*global gperc "C:\Users\John\Dropbox\WIDFrance\Papers\GGP2017DINA"
* Thomas
*global gperc "D:\Dropbox\WIDFrance\Papers\GGP2017DINA"
* Bertrand
global gperc "C:\Users\G839276\Dropbox\WIDFrance\Papers\GGP2017DINA"


/**********************importing equalsplit national income vs fiscal income correction factors 1900-2014****************/
	
* Correction factors 1970-2012
	* Pre-tax series Table C13
		cd "$gperc/GGP2017DINAAppendixC/"
		use DataFiles/Results/gperc/gpercr_pretaxCN_equal_19702014.dta, clear
			gen coef_pretax=thr/yav
			gen coef_pretaxtop=ytop/yav
			* Percentile
				gen p =gperc
				replace p=p*10 if gperc<99990
				replace p=p*10 if gperc<9990
				replace p=p*10 if gperc<990
			keep year p coef*
			tempfile temp
			save "`temp'",replace
	* Fiscal income series excl cap Table C4
		use DataFiles/Results/gperc/gpercr_irpp_equal_19702014.dta, clear
		gen coef_fisc=thr/yav
		gen coef_fisctop=ytop/yav
		* Percentile
				gen p =gperc
				replace p=p*10 if gperc<99990
				replace p=p*10 if gperc<9990
				replace p=p*10 if gperc<990
		keep year p coef*
	* Reconciliation
		merge 1:1 year p using "`temp'"
		gen corr=coef_pretax/coef_fisc
		gen cortop=coef_pretaxtop/coef_fisctop
		keep if year>=1970
		keep year p corr cortop
		reshape wide corr cortop,i(p) j(year)
	* Adjustment for bottom 10%
			order p corr????
			replace corr1970=corr1970[12]*corr1971/corr1971[12] if p==10000
			forval i=1970/2014 {
				replace corr`i'=corr`i'[11] if p<10000
			}
			replace corr1979=(corr1978+corr1980)/2 if p>=99998	
			replace cortop1979=(cortop1978+cortop1980)/2 if p>=99998
	* For years 2013 and 2014
		forval i=2013/2014 {
			replace corr`i'=(corr2012+ corr2011+corr2010)/3
			*if (p>=22000 & p<=29000) | 	(p>=33000 & p<=38000)
		}
* Correction factors 1900-1970
	cd "$gperc/GGP2017DINAAppendixB\"	
	foreach num of numlist 1900 1910 1915 { 
		gen corr`num'=1
		gen cortop`num'=.
	}
	forvalues year = 1916/1969{
		gen corr`year'=corr1915+(`year'-1915)*(corr1970-corr1915)/55
		gen cortop`year'=.
	}
reshape long corr cortop,i(p) j(year)
sort year p
save StataFiles/temp/corrfactorsequalsplit.dta, replace



/**********************importing taxunit national income vs fiscal income correction factors 1900-2014****************/
	
	* Correction factors 1970-2014
	********************************
		* Pre-tax series Table C15
			cd "$gperc/GGP2017DINAAppendixC/"
			use DataFiles/Results/gperc/gpercr_pretaxCN_foy_19702014.dta, clear
				gen coef_pretax=thr/yav
				gen coef_pretaxtop=ytop/yav
				* Percentile
					gen p =gperc
					replace p=p*10 if gperc<99990
					replace p=p*10 if gperc<9990
					replace p=p*10 if gperc<990
				keep year p coef*
				tempfile temp
				save "`temp'",replace
		* Fiscal income series excl cap Table C6
			use DataFiles/Results/gperc/gpercr_irpp_foy_19702014.dta, clear
			gen coef_fisc=thr/yav
			gen coef_fisctop=ytop/yav
			* Percentile
					gen p =gperc
					replace p=p*10 if gperc<99990
					replace p=p*10 if gperc<9990
					replace p=p*10 if gperc<990
			keep year p coef*
		* Reconciliation
			merge 1:1 year p using "`temp'"
			gen corrtu=coef_pretax/coef_fisc
			gen cortutop=coef_pretaxtop/coef_fisctop
			keep if year>=1970
			keep year p corrtu cortutop
			reshape wide corrtu cortutop,i(p) j(year)
			* Adjustment for bottom 10%
				order p corrtu????
				forval i=1970/2014 {
					replace corrtu`i'=corrtu`i'[11] if p<10000
				}			
				replace corrtu1979=(corrtu1978+corrtu1980)/2 if p>=99997
				replace cortutop1979=(cortutop1978+cortutop1980)/2 if p>=99997
		
	* Correction factors 1900-1969
	*******************************
		cd "$gperc/GGP2017DINAAppendixB\"	
		foreach num of numlist 1900 1910 1915 { 
			gen corrtu`num'=1
			gen cortutop`num'=.
		}
		forvalues year = 1916/1969{
			gen corrtu`year'=corrtu1915+(`year'-1915)*(corrtu1970-corrtu1915)/55
			gen cortutop`year'=corrtu1915+(`year'-1915)*(corrtu1970-corrtu1915)/55
		}
		reshape long corrtu cortutop,i(p) j(year)
		sort year p
		save StataFiles/temp/corrfactorstaxunit.dta, replace


/****************applying correction factors to fiscal income series in order to obtain pretax national income series****/
	
	* Load data
		cd "$gperc/GGP2017DINAAppendixD\"
		use year ayntu aynad using StataFiles/temp/averages.dta, clear
		merge 1:m year using StataFiles/gperc_fiscalincome_1900_2014.dta, nogen
		cd "$gperc/GGP2017DINAAppendixB\"
		merge 1:1 year p using StataFiles/temp/corrfactorsequalsplit.dta, nogen
		merge 1:1 year p using StataFiles/temp/corrfactorstaxunit.dta, nogen

	* Adjustments for years 1900, 1910 and 1915
		replace ythr_fninc_t=ythr_fiinc_t if year==1900 | year==1910 | year==1915
		replace yint_fninc_t=yint_fiinc_t if year==1900 | year==1910 | year==1915
		replace ytop_fninc_t=ytop_fiinc_t if year==1900 | year==1910 | year==1915
		replace b_fninc_t=b_fiinc_t if year==1900 | year==1910 | year==1915
		replace ythr_fninc_j=ythr_fiinc_j if year==1900 | year==1910 | year==1915
		replace yint_fninc_j=yint_fiinc_j if year==1900 | year==1910 | year==1915
		replace ytop_fninc_j=ytop_fiinc_j if year==1900 | year==1910 | year==1915
		replace b_fninc_j=b_fiinc_j if year==1900 | year==1910 | year==1915
	
	* Adjustments for years 1916-1969
		forvalues year = 1916/1969{
			replace ythr_fninc_t=((`year'-1915)*ythr_fninc_t+(1970-`year')*ythr_fiinc_t)/55 if year==`year'
			replace yint_fninc_t=((`year'-1915)*yint_fninc_t+(1970-`year')*yint_fiinc_t)/55 if year==`year'
			replace ytop_fninc_t=((`year'-1915)*ytop_fninc_t+(1970-`year')*ytop_fiinc_t)/55 if year==`year'
			replace b_fninc_t=((`year'-1915)*b_fninc_t+(1970-`year')*b_fiinc_t)/55 if year==`year'
			replace ythr_fninc_j=((`year'-1915)*ythr_fninc_j+(1970-`year')*ythr_fiinc_j)/55 if year==`year'
			replace yint_fninc_j=((`year'-1915)*yint_fninc_j+(1970-`year')*yint_fiinc_j)/55 if year==`year'
			replace ytop_fninc_j=((`year'-1915)*ytop_fninc_j+(1970-`year')*ytop_fiinc_j)/55 if year==`year'
			replace b_fninc_j=((`year'-1915)*b_fninc_j+(1970-`year')*b_fiinc_j)/55 if year==`year'
		}
	* Name of variables
		rename ythr_fninc_t ythr_ptinc_t
		rename yint_fninc_t yint_ptinc_t
		rename ytop_fninc_t ytop_ptinc_t 
		rename b_fninc_t b_ptinc_t 
		rename ythr_fninc_j ythr_ptinc_j
		rename yint_fninc_j yint_ptinc_j 
		rename ytop_fninc_j ytop_ptinc_j 
		rename b_fninc_j b_ptinc_j
	
	* Apply correction factors
		sort year p
		* Adjustment
			rename corr corr_j
			rename corrtu corr_t
			foreach var of newlist _t _j {
				gen temp`var'=corr`var'*ythr_ptinc`var'
				gen prob`var'=1 if temp`var'<temp`var'[_n-1] & year==year[_n-1]
				gen temp2`var'=2 if prob`var'==1  & prob`var'[_n+1]==.
				forval i=2/30 {
					replace temp2`var'=`i'+1 if (temp`var'[_n-`i']<temp`var')*(temp`var'[_n-`i'+1]>temp`var')  & prob`var'==1 & prob`var'[_n+1]==.
				}
				sum temp2`var'
				local max=r(max)
				gen temp3`var'=0
				forval i=1/`max' {
					local supp=`i'-1
					forval j=1/`supp' {
						replace temp3`var'= temp`var'[_n-`i'+`j']+(ythr_ptinc`var'-ythr_ptinc`var'[_n-`i'+`j'])/(ythr_ptinc`var'[_n+`j']-ythr_ptinc`var'[_n-`i'+`j'])*(temp`var'[_n+`j']-temp`var'[_n-`i'+`j']) if temp2`var'[_n+`j']==`i'
					}
				}
				replace corr`var'=temp3`var'/ythr_ptinc`var' if temp3`var'~=0
				drop temp* prob*
			}
			rename corr_t  corrtu
			rename  corr_j corr
		
		* threshold
			replace ythr_ptinc_t=corrtu*ythr_ptinc_t
			replace ythr_ptinc_j=corr*ythr_ptinc_j
			* check
				foreach var of newlist _t _j {
					gen prob2`var'=1 if ythr_ptinc`var'<ythr_ptinc`var'[_n-1] & year==year[_n-1]
					tab prob2`var'
					drop prob2*
				}
		* Intermediate average
			replace yint_ptinc_t=((corrtu+corrtu[_n+1])/2)*yint_ptinc_t if p<99999 
			replace yint_ptinc_t=corrtu*yint_ptinc_t if p==99999 & year<1970
			replace yint_ptinc_t=cortutop*yint_ptinc_t if p==99999 & year>=1970
			replace yint_ptinc_j=((corr+corr[_n+1])/2)*yint_ptinc_j if p<99999
			replace yint_ptinc_j=corr*yint_ptinc_j if p==99999 & year<1970
			replace yint_ptinc_j=cortop*yint_ptinc_j if p==99999 & year>=1970
			* Adjustment
				foreach var of newlist _t _j {
					replace yint_ptinc`var'=(ythr_ptinc`var'+ythr_ptinc`var'[_n+1])/2 if (yint_ptinc`var'>ythr_ptinc`var'[_n+1] | yint_ptinc`var'<ythr_ptinc`var') & year==year[_n+1]
				}
		* Check
			sort year p
			foreach var of newlist _ptinc_t _ptinc_j _fiinc_t _fiinc_j {
				gen prob=0
				replace prob=1 if ythr`var'>ythr`var'[_n+1] & year==year[_n+1]
				replace prob=1 if yint`var'>ythr`var'[_n+1] & year==year[_n+1]
				replace prob=1 if yint`var'<ythr`var' & year==year[_n+1]
				replace prob=1 if yint`var'>yint`var'[_n+1] & year==year[_n+1]
				tab year if prob==1
				drop prob
			}
		* Ratio of average
			by year: egen aytu=total(f*yint_ptinc_t)
			by year: egen ayad=total(f*yint_ptinc_j)
			replace aytu=aytu/100000
			replace ayad=ayad/100000
			gen ratiotu=ayntu/aytu
			gen ratioad=aynad/ayad
		* Apply ratio
			replace ythr_ptinc_t=ratiotu*ythr_ptinc_t
			replace yint_ptinc_t=ratiotu*yint_ptinc_t
			replace ythr_ptinc_j=ratioad*ythr_ptinc_j
			replace yint_ptinc_j=ratioad*yint_ptinc_j
		* ytop and b
			gsort year -p
			replace ytop_ptinc_t=yint_ptinc_t if p==99999
			replace ytop_ptinc_t=(f*yint_ptinc_t+(100000-p[_n-1])*ytop_ptinc_t[_n-1])/(100000-p) if p<99999
			replace ytop_ptinc_j=yint_ptinc_j if p==99999
			replace ytop_ptinc_j=(f*yint_ptinc_j+(100000-p[_n-1])*ytop_ptinc_j[_n-1])/(100000-p) if p<99999
			replace b_ptinc_t=ytop_ptinc_t/ythr_ptinc_t
			replace b_ptinc_j=ytop_ptinc_j/ythr_ptinc_j
	drop cor* a* ratio*
	sort year p
	save StataFiles/gperc_income_1900_2014.dta, replace
	
	/*********************check income ordering of corrected g-percentiles***********/
	/*
	use StataFiles/gperc_income_1900_2014.dta, clear
	sort year p
	foreach var of newlist _t _j {
		gen x`var'=0
		replace x`var'=1 if p>0 & ythr_ptinc`var'<ythr_ptinc`var'[_n-1]
		ta year if x`var'==1
		gen y`var'=0
		replace y`var'=1 if p<99999 & (yint_ptinc`var'<ythr_ptinc`var' | yint_ptinc`var'>ythr_ptinc`var'[_n+1])
		ta year if y`var'==1
	}
	*/
	/****check 04-27-17: all x,y are equal to zero, except 1 (could be corrected by putting yint in the middle but the point is that smoothing correction factors solves 99.9% of the pb)****/




/***************extracting table B1: summary statistics for distribution of pretax national income among equal-split individuals*/
use StataFiles/gperc_income_1900_2014.dta, clear
keep year p f ytop_ptinc_j ythr_ptinc_j yint_ptinc_j
rename ytop_ptinc_j ytop
rename ythr_ptinc_j ythr
rename yint_ptinc_j yint
sort year p
gen p0=0 if p==0
replace p0=p/1000+(1-yint/(ythr[_n+1]-ythr))*f/100000 if ythr==0 & ythr[_n+1]>0 & year[_n+1]==year
keep if p==0 | p==10000 | p==50000 | p==90000 | p==99000 | p==99900 | p==99990 | p==99999
sort year p 
gen ymean=ytop if p==0
gen bottom10=1-0.9*ytop[_n+1]/ytop if p==0
gen bottom50=1-0.5*ytop[_n+2]/ytop if p==0
gen top10=0.1*ytop[_n+3]/ytop if p==0
gen middle40=1-top10-bottom50 if p==0
gen top1=0.01*ytop[_n+4]/ytop if p==0
gen top01=0.001*ytop[_n+5]/ytop if p==0
gen top001=0.0001*ytop[_n+6]/ytop if p==0
gen top0001=0.00001*ytop[_n+7]/ytop if p==0
gen p10=ythr[_n+1]/ymean if p==0
gen p50=ythr[_n+2]/ymean if p==0
gen p90=ythr[_n+3]/ymean if p==0
gen p99=ythr[_n+4]/ymean if p==0
gen p999=ythr[_n+5]/ymean if p==0
gen p9999=ythr[_n+6]/ymean if p==0
gen p99999=ythr[_n+7]/ymean if p==0
gen b10=ytop[_n+1]/ythr[_n+1] if p==0
gen b50=ytop[_n+2]/ythr[_n+2] if p==0
gen b90=ytop[_n+3]/ythr[_n+3] if p==0
gen b99=ytop[_n+4]/ythr[_n+4] if p==0
gen b999=ytop[_n+5]/ythr[_n+5] if p==0
gen b9999=ytop[_n+6]/ythr[_n+6] if p==0
gen b99999=ytop[_n+7]/ythr[_n+7] if p==0
keep if p==0
keep year ymean bottom10 bottom50 middle40 top10 top1 top01 top001 top0001 p10 p50 p90 p99 p999 p9999 p99999 b10 b50 b90 b99 b999 b9999 b99999 p0 
order year ymean bottom10 bottom50 middle40 top10 top1 top01 top001 top0001 p10 p50 p90 p99 p999 p9999 p99999 b10 b50 b90 b99 b999 b9999 b99999 p0 
save StataFiles/tables/TableB1.dta, replace
export excel using StataFiles/tables/exportresults_DINA_ApB.xlsx, sheet("TB1") firstrow(variables) sheetmodify

/*******************extracting table B2: detailed statistics for distribution of pretax national income among equal-split individuals*/

	use StataFiles/gperc_income_1900_2014.dta, clear
	rename ytop_ptinc_j ytop
	rename ythr_ptinc_j ythr
	rename b_ptinc_j b
	keep year p ythr ytop b
	order year p ythr ytop b
	sort year p
	reshape wide ythr ytop b, i(p) j(year)

gen b1900_10=(b1900+b1910)/2
gen b1915_19=(b1915+b1916+b1917+b1918+b1919)/5
gen b1920_29=(b1920+b1921+b1922+b1923+b1924+b1925+b1926+b1927+b1928+b1929)/10
gen b1930_39=(b1930+b1931+b1932+b1933+b1934+b1935+b1936+b1937+b1938+b1939)/10
gen b1940_49=(b1940+b1941+b1942+b1943+b1944+b1945+b1946+b1947+b1948+b1949)/10
gen b1950_59=(b1950+b1951+b1952+b1953+b1954+b1955+b1956+b1957+b1958+b1959)/10
gen b1960_69=(b1960+b1961+b1962+b1963+b1964+b1965+b1966+b1967+b1968+b1969)/10
gen b1970_79=(b1970+b1971+b1972+b1973+b1974+b1975+b1976+b1977+b1978+b1979)/10
gen b1980_89=(b1980+b1981+b1982+b1983+b1984+b1985+b1986+b1987+b1988+b1989)/10
gen b1990_99=(b1990+b1991+b1992+b1993+b1994+b1995+b1996+b1997+b1998+b1999)/10
gen b2000_09=(b2000+b2001+b2002+b2003+b2004+b2005+b2006+b2007+b2008+b2009)/10
gen b2010_14=(b2010+b2011+b2012+b2013+b2014)/5
save StataFiles/tables/TableB2.dta, replace
export excel using StataFiles/tables/exportresults_DINA_ApB.xlsx, sheet("TB2") firstrow(variables) sheetmodify

/***************extracting table B3: summary statistics for distribution of pretax national income among tax units*/
use StataFiles/gperc_income_1900_2014.dta, clear
keep year p f ytop_ptinc_t ythr_ptinc_t yint_ptinc_t
rename ytop_ptinc_t ytop
rename ythr_ptinc_t ythr
rename yint_ptinc_t yint
sort year p
gen p0=0 if p==0
replace p0=p/1000+(1-yint/(ythr[_n+1]-ythr))*f/100000 if ythr==0 & ythr[_n+1]>0 & year[_n+1]==year
keep if p==0 | p==10000 | p==50000 | p==90000 | p==99000 | p==99900 | p==99990 | p==99999
sort year p 
gen ymean=ytop if p==0
gen bottom10=1-0.9*ytop[_n+1]/ytop if p==0
gen bottom50=1-0.5*ytop[_n+2]/ytop if p==0
gen top10=0.1*ytop[_n+3]/ytop if p==0
gen middle40=1-top10-bottom50 if p==0
gen top1=0.01*ytop[_n+4]/ytop if p==0
gen top01=0.001*ytop[_n+5]/ytop if p==0
gen top001=0.0001*ytop[_n+6]/ytop if p==0
gen top0001=0.00001*ytop[_n+7]/ytop if p==0
gen p10=ythr[_n+1]/ymean if p==0
gen p50=ythr[_n+2]/ymean if p==0
gen p90=ythr[_n+3]/ymean if p==0
gen p99=ythr[_n+4]/ymean if p==0
gen p999=ythr[_n+5]/ymean if p==0
gen p9999=ythr[_n+6]/ymean if p==0
gen p99999=ythr[_n+7]/ymean if p==0
gen b10=ytop[_n+1]/ythr[_n+1] if p==0
gen b50=ytop[_n+2]/ythr[_n+2] if p==0
gen b90=ytop[_n+3]/ythr[_n+3] if p==0
gen b99=ytop[_n+4]/ythr[_n+4] if p==0
gen b999=ytop[_n+5]/ythr[_n+5] if p==0
gen b9999=ytop[_n+6]/ythr[_n+6] if p==0
gen b99999=ytop[_n+7]/ythr[_n+7] if p==0
keep if p==0
keep year ymean bottom10 bottom50 middle40 top10 top1 top01 top001 top0001 p10 p50 p90 p99 p999 p9999 p99999 b10 b50 b90 b99 b999 b9999 b99999 p0 
order year ymean bottom10 bottom50 middle40 top10 top1 top01 top001 top0001 p10 p50 p90 p99 p999 p9999 p99999 b10 b50 b90 b99 b999 b9999 b99999 p0 
save StataFiles/tables/TableB3.dta, replace
export excel using StataFiles/tables/exportresults_DINA_ApB.xlsx, sheet("TB3") firstrow(variables) sheetmodify

/*******************extracting table B4: detailed statistics for distribution of pretax national income among tax units*/

	use StataFiles/gperc_income_1900_2014.dta, clear
	rename ytop_ptinc_t ytop
	rename ythr_ptinc_t ythr
	rename b_ptinc_t b
	keep year p ythr ytop b
	order year p ythr ytop b
	sort year p
	reshape wide ythr ytop b, i(p) j(year)
	
	gen b1900_10=(b1900+b1910)/2
	gen b1915_19=(b1915+b1916+b1917+b1918+b1919)/5
	gen b1920_29=(b1920+b1921+b1922+b1923+b1924+b1925+b1926+b1927+b1928+b1929)/10
	gen b1930_39=(b1930+b1931+b1932+b1933+b1934+b1935+b1936+b1937+b1938+b1939)/10
	gen b1940_49=(b1940+b1941+b1942+b1943+b1944+b1945+b1946+b1947+b1948+b1949)/10
	gen b1950_59=(b1950+b1951+b1952+b1953+b1954+b1955+b1956+b1957+b1958+b1959)/10
	gen b1960_69=(b1960+b1961+b1962+b1963+b1964+b1965+b1966+b1967+b1968+b1969)/10
	gen b1970_79=(b1970+b1971+b1972+b1973+b1974+b1975+b1976+b1977+b1978+b1979)/10
	gen b1980_89=(b1980+b1981+b1982+b1983+b1984+b1985+b1986+b1987+b1988+b1989)/10
	gen b1990_99=(b1990+b1991+b1992+b1993+b1994+b1995+b1996+b1997+b1998+b1999)/10
	gen b2000_09=(b2000+b2001+b2002+b2003+b2004+b2005+b2006+b2007+b2008+b2009)/10
	gen b2010_14=(b2010+b2011+b2012+b2013+b2014)/5
save StataFiles/tables/TableB4.dta, replace
export excel using StataFiles/tables/exportresults_DINA_ApB.xlsx, sheet("TB4") firstrow(variables) sheetmodify

/***************extracting table B5: summary statistics for distribution of pretax fiscal income among equal-split individuals*/
use StataFiles/gperc_income_1900_2014.dta, clear
keep year p f ytop_fiinc_j ythr_fiinc_j yint_fiinc_j
rename ytop_fiinc_j ytop
rename ythr_fiinc_j ythr
rename yint_fiinc_j yint
sort year p
gen p0=0 if p==0
replace p0=p/1000+(1-yint/(ythr[_n+1]-ythr))*f/100000 if ythr==0 & ythr[_n+1]>0 & year[_n+1]==year
keep if p==0 | p==10000 | p==50000 | p==90000 | p==99000 | p==99900 | p==99990 | p==99999
sort year p 
gen ymean=ytop if p==0
gen bottom10=1-0.9*ytop[_n+1]/ytop if p==0
gen bottom50=1-0.5*ytop[_n+2]/ytop if p==0
gen top10=0.1*ytop[_n+3]/ytop if p==0
gen middle40=1-top10-bottom50 if p==0
gen top1=0.01*ytop[_n+4]/ytop if p==0
gen top01=0.001*ytop[_n+5]/ytop if p==0
gen top001=0.0001*ytop[_n+6]/ytop if p==0
gen top0001=0.00001*ytop[_n+7]/ytop if p==0
gen p10=ythr[_n+1]/ymean if p==0
gen p50=ythr[_n+2]/ymean if p==0
gen p90=ythr[_n+3]/ymean if p==0
gen p99=ythr[_n+4]/ymean if p==0
gen p999=ythr[_n+5]/ymean if p==0
gen p9999=ythr[_n+6]/ymean if p==0
gen p99999=ythr[_n+7]/ymean if p==0
gen b10=ytop[_n+1]/ythr[_n+1] if p==0
gen b50=ytop[_n+2]/ythr[_n+2] if p==0
gen b90=ytop[_n+3]/ythr[_n+3] if p==0
gen b99=ytop[_n+4]/ythr[_n+4] if p==0
gen b999=ytop[_n+5]/ythr[_n+5] if p==0
gen b9999=ytop[_n+6]/ythr[_n+6] if p==0
gen b99999=ytop[_n+7]/ythr[_n+7] if p==0
keep if p==0
keep year ymean bottom10 bottom50 middle40 top10 top1 top01 top001 top0001 p10 p50 p90 p99 p999 p9999 p99999 b10 b50 b90 b99 b999 b9999 b99999 p0 
order year ymean bottom10 bottom50 middle40 top10 top1 top01 top001 top0001 p10 p50 p90 p99 p999 p9999 p99999 b10 b50 b90 b99 b999 b9999 b99999 p0 
save StataFiles/tables/TableB5.dta, replace
export excel using StataFiles/tables/exportresults_DINA_ApB.xlsx, sheet("TB5") firstrow(variables) sheetmodify

/*******************extracting table B6: detailed statistics for distribution of pretax fiscal income among equal-split individuals*/
	use StataFiles/gperc_income_1900_2014.dta, clear
	rename ytop_fiinc_j ytop
	rename ythr_fiinc_j ythr
	rename b_fiinc_j b
	keep year p ythr ytop b
	order year p ythr ytop b
	sort year p
	reshape wide ythr ytop b, i(p) j(year)

gen b1900_10=(b1900+b1910)/2
gen b1915_19=(b1915+b1916+b1917+b1918+b1919)/5
gen b1920_29=(b1920+b1921+b1922+b1923+b1924+b1925+b1926+b1927+b1928+b1929)/10
gen b1930_39=(b1930+b1931+b1932+b1933+b1934+b1935+b1936+b1937+b1938+b1939)/10
gen b1940_49=(b1940+b1941+b1942+b1943+b1944+b1945+b1946+b1947+b1948+b1949)/10
gen b1950_59=(b1950+b1951+b1952+b1953+b1954+b1955+b1956+b1957+b1958+b1959)/10
gen b1960_69=(b1960+b1961+b1962+b1963+b1964+b1965+b1966+b1967+b1968+b1969)/10
gen b1970_79=(b1970+b1971+b1972+b1973+b1974+b1975+b1976+b1977+b1978+b1979)/10
gen b1980_89=(b1980+b1981+b1982+b1983+b1984+b1985+b1986+b1987+b1988+b1989)/10
gen b1990_99=(b1990+b1991+b1992+b1993+b1994+b1995+b1996+b1997+b1998+b1999)/10
gen b2000_09=(b2000+b2001+b2002+b2003+b2004+b2005+b2006+b2007+b2008+b2009)/10
gen b2010_14=(b2010+b2011+b2012+b2013+b2014)/5
save StataFiles/tables/TableB6.dta, replace
export excel using StataFiles/tables/exportresults_DINA_ApB.xlsx, sheet("TB6") firstrow(variables) sheetmodify

/***************extracting table B7: summary statistics for distribution of pretax fiscal income among tax units*/
use StataFiles/gperc_income_1900_2014.dta, clear
keep year p f ytop_fiinc_t ythr_fiinc_t yint_fiinc_t
rename ytop_fiinc_t ytop
rename ythr_fiinc_t ythr
rename yint_fiinc_t yint
sort year p
gen p0=0 if p==0
replace p0=p/1000+(1-yint/(ythr[_n+1]-ythr))*f/100000 if ythr==0 & ythr[_n+1]>0 & year[_n+1]==year
keep if p==0 | p==10000 | p==50000 | p==90000 | p==99000 | p==99900 | p==99990 | p==99999
sort year p 
gen ymean=ytop if p==0
gen bottom10=1-0.9*ytop[_n+1]/ytop if p==0
gen bottom50=1-0.5*ytop[_n+2]/ytop if p==0
gen top10=0.1*ytop[_n+3]/ytop if p==0
gen middle40=1-top10-bottom50 if p==0
gen top1=0.01*ytop[_n+4]/ytop if p==0
gen top01=0.001*ytop[_n+5]/ytop if p==0
gen top001=0.0001*ytop[_n+6]/ytop if p==0
gen top0001=0.00001*ytop[_n+7]/ytop if p==0
gen p10=ythr[_n+1]/ymean if p==0
gen p50=ythr[_n+2]/ymean if p==0
gen p90=ythr[_n+3]/ymean if p==0
gen p99=ythr[_n+4]/ymean if p==0
gen p999=ythr[_n+5]/ymean if p==0
gen p9999=ythr[_n+6]/ymean if p==0
gen p99999=ythr[_n+7]/ymean if p==0
gen b10=ytop[_n+1]/ythr[_n+1] if p==0
gen b50=ytop[_n+2]/ythr[_n+2] if p==0
gen b90=ytop[_n+3]/ythr[_n+3] if p==0
gen b99=ytop[_n+4]/ythr[_n+4] if p==0
gen b999=ytop[_n+5]/ythr[_n+5] if p==0
gen b9999=ytop[_n+6]/ythr[_n+6] if p==0
gen b99999=ytop[_n+7]/ythr[_n+7] if p==0
keep if p==0
keep year ymean bottom10 bottom50 middle40 top10 top1 top01 top001 top0001 p10 p50 p90 p99 p999 p9999 p99999 b10 b50 b90 b99 b999 b9999 b99999 p0 
order year ymean bottom10 bottom50 middle40 top10 top1 top01 top001 top0001 p10 p50 p90 p99 p999 p9999 p99999 b10 b50 b90 b99 b999 b9999 b99999 p0 
save StataFiles/tables/TableB7.dta, replace
export excel using StataFiles/tables/exportresults_DINA_ApB.xlsx, sheet("TB7") firstrow(variables) sheetmodify

/*******************extracting table B8: detailed statistics for distribution of pretax fiscal income among tax units*/
	use StataFiles/gperc_income_1900_2014.dta, clear
	rename ytop_fiinc_t ytop
	rename ythr_fiinc_t ythr
	rename b_fiinc_t b
	keep year p ythr ytop b
	order year p ythr ytop b
	sort year p
	reshape wide ythr ytop b, i(p) j(year)

	
gen b1900_10=(b1900+b1910)/2
gen b1915_19=(b1915+b1916+b1917+b1918+b1919)/5
gen b1920_29=(b1920+b1921+b1922+b1923+b1924+b1925+b1926+b1927+b1928+b1929)/10
gen b1930_39=(b1930+b1931+b1932+b1933+b1934+b1935+b1936+b1937+b1938+b1939)/10
gen b1940_49=(b1940+b1941+b1942+b1943+b1944+b1945+b1946+b1947+b1948+b1949)/10
gen b1950_59=(b1950+b1951+b1952+b1953+b1954+b1955+b1956+b1957+b1958+b1959)/10
gen b1960_69=(b1960+b1961+b1962+b1963+b1964+b1965+b1966+b1967+b1968+b1969)/10
gen b1970_79=(b1970+b1971+b1972+b1973+b1974+b1975+b1976+b1977+b1978+b1979)/10
gen b1980_89=(b1980+b1981+b1982+b1983+b1984+b1985+b1986+b1987+b1988+b1989)/10
gen b1990_99=(b1990+b1991+b1992+b1993+b1994+b1995+b1996+b1997+b1998+b1999)/10
gen b2000_09=(b2000+b2001+b2002+b2003+b2004+b2005+b2006+b2007+b2008+b2009)/10
gen b2010_14=(b2010+b2011+b2012+b2013+b2014)/5
save StataFiles/tables/TableB8.dta, replace
export excel using StataFiles/tables/exportresults_DINA_ApB.xlsx, sheet("TB8") firstrow(variables) sheetmodify

