
//pause 
//Careful use the right folder to get latest tax files !


use "$inputdata/revisedtaxdatafinal.dta", clear

/*
break here OFF 
use "/Users/ilucas/Downloads/revisedtaxdatafinal_2205.dta", clear
*/

keep thr p bracketavg average year
replace p=round(p*100000)
save "$inputdata/revisedtaxdatafinal_1.dta", replace

****************************************************
* Interpolate missing and erroneous years linearly *
****************************************************

rename bracketavg bracketavg_tax
rename average average_tax
rename thr thr_tax

reshape wide average_tax thr_tax bracketavg_tax, i(p) j(year)


// 1997 absurd result, we interpolate linearly between 1996 and 1998
replace thr_tax1997=(thr_tax1996+thr_tax1998)/2
replace bracketavg_tax1997=(bracketavg_tax1996+bracketavg_tax1998)/2
// new average for 1997
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
gen bracketavg_tax1997tot=bracketavg_tax1997*pop
egen new2017avg=total(bracketavg_tax1997tot)
replace average_tax1997=new2017avg
drop new2017avg bracketavg_tax1997tot pop


// Interpolate missing years linearly

foreach var in thr_tax bracketavg_tax average_tax {


//Evolution line with survey growth. 
gen `var'1951=`var'1953/0.83601747
gen `var'1952=(`var'1951+`var'1953)/2

/*
// linear between 1950 1952
gen `var'1951=`var'1950+((`var'1953-`var'1950)/3)
gen `var'1952=`var'1951+((`var'1953-`var'1950)/3)

// forward from 1950
gen `var'1951=`var'1950*1.0536506
gen `var'1952=`var'1951*0.98317467
*/
/*
// backwards from 1953
gen `var'1952=`var'1953/1.0916
gen `var'1951=`var'1952/0.98317
*/


gen `var'1942=(`var'1941+`var'1943)/2
gen `var'1946=(`var'1945+`var'1947)/2
gen `var'1963=(`var'1962+`var'1964)/2
replace `var'1966=(`var'1965+`var'1967)/2
gen `var'1969=(`var'1970+`var'1968)/2
gen `var'1972=(`var'1973+`var'1971)/2

}


// YEAR 2005 interpolated on the basis of NSS growth for different percentiles
// between 2005 and 1998, since we don't have any other information

joinby p using "$inputdata/9805interpolation.dta", unmatched(both)
drop _merge
foreach var in thr_tax bracketavg_tax average_tax{
gen `var'2005=`var'1998*ratio9805
replace `var'2005=`var'1998*1.8 if p==99999
}
drop ratio9805



// Interpolate years between 1998 and 2011


// constant growth rate interpolation
foreach var in thr_tax bracketavg_tax average_tax{
gen `var'1999=`var'1998*((`var'2005/`var'1998)^(1/7))
gen `var'2000=`var'1999*((`var'2005/`var'1998)^(1/7))
gen `var'2001=`var'2000*((`var'2005/`var'1998)^(1/7))
gen `var'2002=`var'2001*((`var'2005/`var'1998)^(1/7))
gen `var'2003=`var'2002*((`var'2005/`var'1998)^(1/7))
gen `var'2004=`var'2003*((`var'2005/`var'1998)^(1/7))
}

foreach var in thr_tax bracketavg_tax average_tax{
gen `var'2006=`var'2005*((`var'2011/`var'2005)^(1/6))
gen `var'2007=`var'2006*((`var'2011/`var'2005)^(1/6))
gen `var'2008=`var'2007*((`var'2011/`var'2005)^(1/6))
gen `var'2009=`var'2008*((`var'2011/`var'2005)^(1/6))
gen `var'2010=`var'2009*((`var'2011/`var'2005)^(1/6))
}



reshape long average_tax thr_tax bracketavg_tax, i(p) j(year)

save "$inputdata/revisedtaxdatafinal_2.dta", replace

//graph twoway (line bracketavg_tax year if p==99000)


//Checking the curve profile - if this code is on, need to delete created variables
//at the end
/*
forval x=1951(1)2013 {
use "$user/India_2016/ICPSR/taxintepall.dta", clear
keep if year==`x'

// inequality stats 
// pop stats
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)

//shares

sum bracketavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bracketavg_tax*pop/(meaninc_tax)
//topshares
gsort - p
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/thr_tax)

save "$user/India_2016/ICPSR/taxinte`x'.dta", replace
}

local firstiter=0
forval x=1951(1)2013 {
use "$user/India_2016/ICPSR/taxinte`x'.dta", clear
if `firstiter'!=0{
append using "$user/India_2016/ICPSR/taxinteallshares.dta"
save "$user/India_2016/ICPSR/taxinteallshares.dta", replace
}
save "$user/India_2016/ICPSR/taxinteallshares.dta", replace
local firstiter=1
}

//

// check linearity
*graph twoway (line topshare_tax year if p==90000)


// OK 
save "$user/India_2016/ICPSR/revisedtaxdatafinal_2.dta", replace
*/
********

// Prepare tax data

forval year=1922(1)2014{
use "$inputdata/revisedtaxdatafinal_2.dta", clear
keep if year==`year'
save "$results_interp_1/IndiaInterpTax`year'.dta", replace
}


/*
forval year=1922(1)2013{
use "$pathway/taxintepall.dta", clear
keep if year==`year'
save "$pathway/InterpFromTax/IndiaInterpTax`year'.dta", replace
}
*/
