

// Add interpolations from tabulated years

use "$inputdata/NSS_tab_final_income.dta", clear
drop if year==2010
append using "$inputdata/NSS_post1983_final_income.dta"
save "$inputdata/NSS_allyears_final_income.dta", replace


rename average average_cons_survey



///////////////////////////////
// STRATEGY A selected here //
//////////////////////////////
// WE CHOSE INTERMEDIARY STRATEGY (3) HERE


local strategyA="$strategyA"
if "`strategyA'"=="A0"{
rename estimated_inc3 bracketavg_survey
rename estimated_thr3 thr_survey
rename perc p
}

local strategyA="$strategyA"
if "`strategyA'"=="A1"{
rename estimated_inc1 bracketavg_survey
rename estimated_thr1 thr_survey
rename perc p
}

local strategyA="$strategyA"
if "`strategyA'"=="A2"{
rename estimated_inc2 bracketavg_survey
rename estimated_thr2 thr_survey
rename perc p
}




keep year p thr_survey bracketavg_survey average_cons_survey
reshape wide thr_survey bracketavg_survey average_cons_survey, i(p) j(year)


save "$inputdata/IndiaInterpSurveyFinal_all.dta", replace

use "$inputdata/IndiaInterpSurveyFinal_all.dta", clear


// For years 2005, 2010 and 2011, we use ICPSR averages and NSS implied distribution
// Hence we rescale ICPSR and NSS

gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
sum bracketavg_survey2005 [aw=pop]
gen mean2005=r(mean)

///////////////////////////////
// STRATEGY B selected here //
//////////////////////////////

local strategyB="$strategyB"
if "`strategyB'"=="B1"{

// 17298 is the mean from ICPSR survey for 2005
gen correction2005=17298/mean2005
replace bracketavg_survey2005=bracketavg_survey2005*correction2005
replace thr_survey2005=thr_survey2005*correction2005
replace average_cons_survey2005=average_cons_survey2005*correction2005

drop mean2005 correction2005

 // 39209 is the mean from ICPSR survey for 2011-12, we correct it for growth in 2010 and 2011 (and a part of 2012 growth) to obtain 2010 average implied by ICPSR mean - we however keep distribution implied by NSS
sum bracketavg_survey2010 [aw=pop]
gen mean2010=r(mean)
gen correction2010=(39209/(111/100)/(114/100))/mean2010
replace bracketavg_survey2010=bracketavg_survey2010*correction2010
replace thr_survey2010=thr_survey2010*correction2010
replace average_cons_survey2010=average_cons_survey2010*correction2010

sum bracketavg_survey2010 [aw=pop]
drop mean2010 correction2010
}




// Estimate missing survey data years
// Available years 1951 1952 1953 1954 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1973 1974 1977 1983 1986 1987 1988 1989 1990 1991 1992 1994 2000 2005 2010

// Interpolate survey variables for all years 
foreach var in thr_survey bracketavg_survey average_cons_survey{
//gen `var'1955=(`var'1954+`var'1956)/2
//replace `var'1959=(`var'1958+`var'1960)/2
gen `var'1964=(`var'1963+`var'1965)/2
//replace `var'1966=(`var'1965+`var'1967)/2
//replace `var'1967=`var'1966+(`var'1968-`var'1965)/3
//gen `var'1971=`var'1970+(`var'1973-`var'1970)/3
gen `var'1972=`var'1971+(`var'1973-`var'1970)/3

gen `var'1974=`var'1973*(`var'1978/`var'1973)^(1/5)
gen `var'1975=`var'1974*(`var'1978/`var'1973)^(1/5)
gen `var'1976=`var'1975*(`var'1978/`var'1973)^(1/5)
gen `var'1977=`var'1976*(`var'1978/`var'1973)^(1/5)

//gen `var'1975=`var'1974+(`var'1977-`var'1974)/3
//gen `var'1976=`var'1975+(`var'1977-`var'1974)/3


// interpolate between 1978 and 1983
gen `var'1979=`var'1978*(`var'1983/`var'1978)^(1/5)
gen `var'1980=`var'1979*(`var'1983/`var'1978)^(1/5)
gen `var'1981=`var'1980*(`var'1983/`var'1978)^(1/5)
gen `var'1982=`var'1981*(`var'1983/`var'1978)^(1/5)

/*
// interpolate between 1978 and 1983 - strategy 2
gen `var'1979=`var'1978*(average_cons_survey1983/average_cons_survey1978)^(1/5)
gen `var'1980=`var'1979*(average_cons_survey1983/average_cons_survey1978)^(1/5)
gen `var'1981=`var'1980*(average_cons_survey1983/average_cons_survey1978)^(1/5)
gen `var'1982=`var'1981*(average_cons_survey1983/average_cons_survey1978)^(1/5)
*/

// interpolate between 1983 and 1988
gen `var'1984=`var'1983*(`var'1988/`var'1983)^(1/5)
gen `var'1985=`var'1984*(`var'1988/`var'1983)^(1/5)
gen `var'1986=`var'1985*(`var'1988/`var'1983)^(1/5)
gen `var'1987=`var'1986*(`var'1988/`var'1983)^(1/5)


// interpolate 1988 and 1994

gen `var'1989=`var'1988*(`var'1994/`var'1988)^(1/6)
gen `var'1990=`var'1989*(`var'1994/`var'1988)^(1/6)
gen `var'1991=`var'1990*(`var'1994/`var'1988)^(1/6)
gen `var'1992=`var'1991*(`var'1994/`var'1988)^(1/6)
gen `var'1993=`var'1992*(`var'1994/`var'1988)^(1/6)

// interpolatie between 1994 and 2000
gen `var'1995=`var'1994+(`var'2000-`var'1994)/6
gen `var'1996=`var'1995+(`var'2000-`var'1994)/6
gen `var'1997=`var'1996+(`var'2000-`var'1994)/6
gen `var'1998=`var'1997+(`var'2000-`var'1994)/6
gen `var'1999=`var'1998+(`var'2000-`var'1994)/6


// interpolatie between 2000 and 2005

gen `var'2001=`var'2000*(`var'2005/`var'2000)^(1/5)
gen `var'2002=`var'2001*(`var'2005/`var'2000)^(1/5)
gen `var'2003=`var'2002*(`var'2005/`var'2000)^(1/5)
gen `var'2004=`var'2003*(`var'2005/`var'2000)^(1/5)


//  interpolate between 2005 and  2010-11

gen `var'2006=`var'2005*(`var'2010/`var'2005)^(1/5)
gen `var'2007=`var'2006*(`var'2010/`var'2005)^(1/5)
gen `var'2008=`var'2007*(`var'2010/`var'2005)^(1/5)
gen `var'2009=`var'2008*(`var'2010/`var'2005)^(1/5)


}


// We use relatively optimistic growth assumptions (=conservative) for the evolution of incomes at the bottom over recent years,
// for which we do not have data at this stage. These will be refined later. 
// Over 2000-2010, we know that the bottom 90% grew at about 50% of the full population per adult growth rate
// In benchmark we assume that the bottom 90% grow at about 90% of full population per adult growth rate

foreach var in thr_survey bracketavg_survey average_cons_survey{
gen `var'2011=`var'2010*1.13
gen `var'2012=`var'2011*1.09
gen `var'2013=`var'2012*1.10
gen `var'2014=`var'2013*1.07
}

/*
// Portion of code below  shows evolution of bottom incomes if they grow 100% at the same rate as the average (=very conservative)
// Only v. moderate impacts on final results.
// (top 1% peaks at 21.3 in recent period vs. 21.6)

foreach var in thr_survey bracketavg_survey average_cons_survey{
gen `var'2011=`var'2010*1.146
gen `var'2012=`var'2011*1.104
gen `var'2013=`var'2012*1.113
gen `var'2014=`var'2013*1.087
}
*/


// If we use NSS tabs
/*

// Interpolate survey variables for all years 
foreach var in thr_survey bracketavg_survey average_cons_survey{
gen `var'1955=(`var'1954+`var'1956)/2
//replace `var'1959=(`var'1958+`var'1960)/2
replace `var'1964=(`var'1963+`var'1965)/2
//replace `var'1966=`var'1965+(`var'1968-`var'1965)/3
//replace `var'1967=`var'1966+(`var'1968-`var'1965)/3
gen `var'1971=`var'1970+(`var'1973-`var'1970)/3
gen `var'1972=`var'1971+(`var'1973-`var'1970)/3

//gen `var'1974=`var'1973*(`var'1978/`var'1973)^(1/5)
gen `var'1975=`var'1974*(`var'1977/`var'1974)^(1/3)
gen `var'1976=`var'1975*(`var'1977/`var'1974)^(1/3)


//gen `var'1975=`var'1974+(`var'1977-`var'1974)/3
//gen `var'1976=`var'1975+(`var'1977-`var'1974)/3

// interpolate between 1978 and 1983
gen `var'1978=`var'1977*(`var'1983/`var'1977)^(1/6)
gen `var'1979=`var'1978*(`var'1983/`var'1977)^(1/6)
gen `var'1980=`var'1979*(`var'1983/`var'1977)^(1/6)
gen `var'1981=`var'1980*(`var'1983/`var'1977)^(1/6)
gen `var'1982=`var'1981*(`var'1983/`var'1977)^(1/6)


// interpolate between 1983 and 1988
gen `var'1984=`var'1983*(`var'1988/`var'1983)^(1/5)
gen `var'1985=`var'1984*(`var'1988/`var'1983)^(1/5)
gen `var'1986=`var'1985*(`var'1988/`var'1983)^(1/5)
gen `var'1987=`var'1986*(`var'1988/`var'1983)^(1/5)


// interpolate 1988 and 1994

gen `var'1989=`var'1988*(`var'1994/`var'1988)^(1/6)
gen `var'1990=`var'1989*(`var'1994/`var'1988)^(1/6)
gen `var'1991=`var'1990*(`var'1994/`var'1988)^(1/6)
gen `var'1992=`var'1991*(`var'1994/`var'1988)^(1/6)
gen `var'1993=`var'1992*(`var'1994/`var'1988)^(1/6)

// interpolatie between 1994 and 2000
gen `var'1995=`var'1994+(`var'2000-`var'1994)/6
gen `var'1996=`var'1995+(`var'2000-`var'1994)/6
gen `var'1997=`var'1996+(`var'2000-`var'1994)/6
gen `var'1998=`var'1997+(`var'2000-`var'1994)/6
gen `var'1999=`var'1998+(`var'2000-`var'1994)/6


// interpolatie between 2000 and 2005

gen `var'2001=`var'2000*(`var'2005/`var'2000)^(1/5)
gen `var'2002=`var'2001*(`var'2005/`var'2000)^(1/5)
gen `var'2003=`var'2002*(`var'2005/`var'2000)^(1/5)
gen `var'2004=`var'2003*(`var'2005/`var'2000)^(1/5)


//  interpolate between 2005 and  2010

gen `var'2006=`var'2005*(`var'2010/`var'2005)^(1/5)
gen `var'2007=`var'2006*(`var'2010/`var'2005)^(1/5)
gen `var'2008=`var'2007*(`var'2010/`var'2005)^(1/5)
gen `var'2009=`var'2008*(`var'2010/`var'2005)^(1/5)


}

foreach var in thr_survey bracketavg_survey average_cons_survey{
gen `var'2011=`var'2010*1.11
gen `var'2012=`var'2011*1.09
gen `var'2013=`var'2012*1.1174231
}

*/




// Compute mean survey 2005 1998 to infer evolution of tax thresholds
sum bracketavg_survey1998 [aw=pop]
global mean1998=r(mean)
sum bracketavg_survey2005 [aw=pop]
global mean2005=r(mean)

// ratio : 1.64


* Alternative, we use growth rates of our income control - this is conservative (the higher the rates, the lower the resulting top shares)
//Year	growth_adult_control
//2011		1.11
//2012		1.10
//2013		1.11



/*
forval x=1951(1)2012{
local y=`x'+1
foreach var in thr_survey bracketavg_survey average_cons_survey{
replace `var'`x'=(`var'`y'+`var'`x')/2
}
}
*/

reshape long thr_survey bracketavg_survey average_cons_survey, i(p) j(year)


//graph twoway (connected bracketavg_survey average_cons_survey year if p==50000, mcolor(red ) )

// save meta survey file
drop pop
save "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal_all_interp.dta", replace





// generate year by year files
forval x=1950(1)2014{
use "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal_all_interp.dta", clear
keep if year==`x'
save "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", replace

}
