global user "/Users/iLucas/Dropbox/Indian_Income_Distribution"
global survey "$user/India_2016/Data/NSS/Consumer_Expenditure_Data"
global SurveyInterpResults "tables-2017-04-26-15-29-11"
global pathway2 "/Users/ilucas/Dropbox/Indian_Income_Distribution/MetaSurveyData"

// number of adults per quintile
use "$survey/Combined_NSS_data_3.dta", clear
keep if year==1983
ineqdeco adult_cons [aw=adults_weight]
gen percapita_cons=total_cons/HH_Size
ineqdeco percapita_cons [aw=weight]
xtile quintile=percapita_cons [aw=weight], nq(5)

// mean of n adult per quintile
gen NumAdultsWeight=Num_Adults*weight
bys quintile: egen totalNumAdultsWeightquintile=total(NumAdultsWeight)
bys quintile: egen totalWeightquintile=total(weight)
gen meanadult=totalNumAdultsWeightquintile/totalWeightquintile

// total share adult per quintile
gen shareAdultsWeight=(Num_Adults/HH_Size)*weight
bys quintile: egen totalshareAdultsWeightquintile=total(shareAdultsWeight)
gen shareAdults=totalshareAdultsWeightquintile/totalWeightquintile

foreach var in 1 2 3 4 5 {
sum shareAdults if quintile==`var'
global shareAdultsQ`var'=r(mean)
}


//


// CPI DATA


insheet using "$pathway2/WB_CPI.csv", delim(,) clear

keep if v1=="India" | _n<=3
drop v2 v3 v4 v62
drop in 1/2

forval j = 5/61  {
local newname = v`j'[1]
rename v`j' y`newname'
}

reshape long y, i(v1) j(year)

drop if v1=="Country Name"

rename y CPI
rename year Year

save "$pathway2/WB_CPI.dta",replace

import excel using "$pathway2/BP_CPI.xlsx", clear
rename A Year
drop B C
joinby Year using "$pathway2/WB_CPI.dta", unmatched(both)
replace CPI=D if CPI==.
keep Year CPI 
save "$pathway2/WB_BP_CPI.dta", replace

joinby Year using "/Users/ilucas/Dropbox/Indian_Income_Distribution/MetaSurveyData/WIID_India_sub.dta", unmatched(both)

// drop year with 2 surveys
drop if Source_Comments=="National Sample Survey Round 6"
drop if Source_Comments=="National Sample Survey Round 10"

gen CPI7374=(6.097669+7.841705)/2
gen Mean_current=(CPI/CPI7374)*Mean


// Generate information necessary to move from individual distributions to adult
// distributions

foreach var in 1 2 3 4 5 {
gen PopulationQ`var'=Population*0.2*1e6
}

gen AdultPopulationQ1=PopulationQ1*$shareAdultsQ1
gen AdultPopulationQ2=PopulationQ2*$shareAdultsQ2
gen AdultPopulationQ3=PopulationQ3*$shareAdultsQ3
gen AdultPopulationQ4=PopulationQ4*$shareAdultsQ4
gen AdultPopulationQ5=PopulationQ5*$shareAdultsQ5

foreach var in 1 2 3 4 5 {
gen meanQ`var'=(Q`var'/100)*Mean_current*Population*1e6/PopulationQ`var'
}

foreach var in 1 2 3 4 5 {
gen adultmeanQ`var'=meanQ`var'*PopulationQ`var'/AdultPopulationQ`var'
}

foreach var in 1 2 3 4 5 {
gen bktpopshareQ`var'=AdultPopulationQ`var'/(AdultPopulationQ1+AdultPopulationQ2+AdultPopulationQ3+AdultPopulationQ4+AdultPopulationQ5)
}

gen Mean_adult_current=bktpopshareQ1*adultmeanQ1+bktpopshareQ2*adultmeanQ2+bktpopshareQ3*adultmeanQ3+bktpopshareQ4*adultmeanQ4+bktpopshareQ5*adultmeanQ5

replace Q1=Q1/100
replace Q2=Q2/100
replace Q3=Q3/100
replace Q4=Q4/100
replace Q5=Q5/100

keep Year Mean_current bktpopshareQ* adultmeanQ* Mean_adult Q* meanQ*
drop if Mean_current==.
drop if adultmeanQ1==.

save "$pathway2/NSSineq_all.dta",replace

// Generation distribution of individuals (adult mean)

use "$pathway2/NSSineq_all.dta", clear
levelsof Year, local(NSShistlist) clean
foreach year in `NSShistlist'{
use "$pathway2/NSSineq_all.dta", clear
keep if Year==`year'
reshape long meanQ Q, i(Year) j(group)
rename Q bracketsh
gen p=0 if group==1
replace p=0.2 if group==2
replace p=0.4 if group==3
replace p=0.6 if group==4
replace p=0.8 if group==5

gen country="India" if _n==1
rename Mean_adult_current average
replace average=. if _n!=1

rename meanQ bracketavg
drop group
rename Year year
replace year=. if _n!=1

keep year country average p bracketsh
order year country average p bracketsh

export excel using "$pathway2/NSSineq`year'_allindiv.xlsx", firstrow(var) replace
}





// Generate distributions of adults (not individuals)

use "$pathway2/NSSineq_all.dta",clear

levelsof Year, local(NSShistlist) clean

foreach var in `NSShistlist'{
use "$pathway2/NSSineq_all.dta", clear
keep if Year==`var'
reshape long adultmeanQ bktpopshareQ, i(Year) j(group)
set obs 6
replace bktpopshareQ=0 if bktpopshareQ==. 
sort bktpopshareQ
gen p=sum(bkt)
replace bktpopshareQ=p[_n-1]
drop in 1
drop p
rename bktpopshareQ p

gen country="India" if _n==1
rename Mean_adult average
drop Mean_current
replace average=. if _n!=1

rename adultmeanQ bracketavg
drop group
rename Year year
replace year=. if _n!=1

order year country average p bracketavg
keep year country average p bracketavg
export excel using "$pathway2/NSSineq`var'_adults.xlsx", firstrow(var) replace
}

// GPINTER MUST BE RUN here



// Integrate interpolated results for individuals
use "$pathway2/NSSineq_all.dta",clear
levelsof Year, local(NSShistlist) clean
foreach var in `NSShistlist'{
insheet using "$pathway2/allindivs-2017-06-01-23-20-37/India, `var'.csv", delim(,) clear
replace year=`var'
rename average average_nsstab_indiv
rename thr thr_nsstab_indiv
rename topsh topsh_nsstab_indiv
rename bracketavg bracketavg_nsstab_indiv
rename b b_nsstab_indiv
drop country topavg
replace p=p*100
gen float p2=1000*round(p,0.001)
drop p
rename p2 p
save "$pathway2/NSSineq_indiv_`var'.dta",replace
}


/*
// Integrate interpolated results for adults
use "$pathway2/NSSineq_all.dta",clear
levelsof Year, local(NSShistlist) clean
foreach var in `NSShistlist'{
insheet using "$pathway2/adults-2017-05-14-13-37-52/India, `var'.csv", delim(,) clear
replace year=`var'
rename average average_nsstab_adults
rename thr thr_nsstab_adults
rename topsh topsh_nsstab_adults
rename bracketavg bracketavg_nsstab_adults
rename b b_nsstab_adults
drop country topavg
replace p=p*100
gen float p2=1000*round(p,0.001)
drop p
rename p2 p
save "$pathway2/NSSineq_adults_`var'.dta",replace
}
*/

// Merge the two sets of files

use "$pathway2/NSSineq_all.dta",clear
levelsof Year, local(NSShistlist) clean
local firstiter=0
foreach var in `NSShistlist'{
//use "$pathway2/NSSineq_adults_`var'.dta", clear
use "$pathway2/NSSineq_indiv_`var'.dta"
if `firstiter'!=0{
append using "$pathway2/NSSineq_gpinter_all.dta", force
}
save "$pathway2/NSSineq_gpinter_all.dta", replace
local firstiter=1
}


/*
// Import Full NSS interpolation (recent years only - adult distributions)

// YEAR 1983
insheet using "/Users/iLucas/Dropbox/Indian_Income_Distribution/India_2016/ICPSR/InterpFromSurvey/$SurveyInterpResults/India, 1983.csv", delim(,) clear
replace p=p*100
gen float p2=1000*round(p,0.001)
drop p
rename p2 p
replace year=1983
drop country
save "$pathway2/NSS_full_1983.dta", replace

//YEAR 1988
insheet using "/Users/iLucas/Dropbox/Indian_Income_Distribution/India_2016/ICPSR/InterpFromSurvey/$SurveyInterpResults/India, 1988.csv", delim(,) clear
replace p=p*100
gen float p2=1000*round(p,0.001)
drop p
rename p2 p
replace year=1988
drop country
save "$pathway2/NSS_full_1988.dta", replace

//YEAR 2010
insheet using "/Users/iLucas/Dropbox/Indian_Income_Distribution/India_2016/ICPSR/InterpFromSurvey/$SurveyInterpResults/India, 2010.csv", delim(,) clear
replace p=p*100
gen float p2=1000*round(p,0.001)
drop p
rename p2 p
replace year=2010
drop country
save "$pathway2/NSS_full_2010.dta", replace

// append 3 files
use "$pathway2/NSS_full_1983.dta", clear
append using "$pathway2/NSS_full_1988.dta"
append using "$pathway2/NSS_full_2010.dta"
save "$pathway2/NSS_full_all.dta", replace


// Join bracket interpolation with interpolated files for which we have full micro surveys
use "$pathway2/NSSineq_gpinter_all.dta", clear
joinby year p using "$pathway2/NSS_full_all.dta", unmatched(both)
drop _merge
*/



use "$pathway2/NSSineq_gpinter_all.dta", clear
drop if year>=1983
// Compare results
sort year p

/*
graph twoway (line topsh topsh_nsstab_adults topsh_nsstab_indiv p if year==1983)
graph twoway (line bracketavg bracketavg_nsstab_adults bracketavg_nsstab_indiv p if p<90000 & year==1983)
graph twoway (line b b_nsstab_adults b_nsstab_indiv p if p>5000 & p<99000 & year==1983)

graph twoway (line topsh topsh_nsstab_adults topsh_nsstab_indiv p if year==1988)
graph twoway (line bracketavg bracketavg_nsstab_adults bracketavg_nsstab_indiv p if p<90000 & year==1988)
graph twoway (line b b_nsstab_adults b_nsstab_indiv p if p>5000 & p<99000 & year==1988)

graph twoway (line topsh topsh_nsstab_adults topsh_nsstab_indiv p if year==2010)
graph twoway (line bracketavg bracketavg_nsstab_adults bracketavg_nsstab_indiv p if p<90000 & year==2010)
graph twoway (line b b_nsstab_adults b_nsstab_indiv p if p>5000 & p<99000 & year==2010)
*/

// Not fully satisfactory but we will go ahead

// Strategy1 keep individual distribution
keep average_nsstab_indiv thr_nsstab_indiv topsh_nsstab_indiv bracketavg_nsstab_indiv b_nsstab_indiv p year
rename average_nsstab_indiv average 
rename thr_nsstab_indiv thr
rename topsh_nsstab_indiv topsh
rename bracketavg_nsstab_indiv bracketavg
rename b_nsstab_indiv b

// Strategy2 keep _nsstab_adults
// keep year average_nsstab_adults thr_nsstab_adults topsh_nsstab_adults bracketavg_nsstab_adults b_nsstab_adults p
/*
rename average_nsstab_adults average 
rename thr_nsstab_adults thr
rename topsh_nsstab_adults topsh
rename bracketavg_nsstab_adults bracketavg
rename b_nsstab_adults b
*/



bys year: egen average2=mean(average)
replace average=average2
drop average2

save "$pathway2/NSS_tab_final.dta", replace


use "$pathway2/NSS_tab_final.dta", clear

/*

// Evolution of top 20% share as per NSS tabulated surveys
replace topsh=topsh*100
graph twoway (line topsh year if p==80000), title("Evolution of top 20% consumption share, NSS data") ytitle(% total consumption)

