
////////////////////////
// Threshold range 3 //
//////////////////////

// combine tax and survey files - 1951-1956
global lower_thr 80000
global upper_thr 99800
global range3 80998
forval x=1951(1)1956{

use "$inputdata/TaxInterpResults/IndiaInterpTax`x'.dta", clear
joinby p using "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", unmatched(both)
drop _merge

// Profile 1 - linear rise

rename thr_tax tax
rename thr_survey survey 
rename bracketavg_tax bavg_tax 
rename bracketavg_survey bavg_survey

replace bavg_survey=bavg_survey
replace survey=survey


gen weights1=0
replace weights1=1 if p>=$lower_thr & p<$upper_thr
gen a1=tax/survey if p==$upper_thr
egen upfactor_top=mean(a1)
drop a1
gen upfactor1=1
egen weights1sum=total(weights1)

replace upfactor1=upfactor1[_n-1]+(weights1/(weights1sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor1=tax/survey if p>=$upper_thr

gen upfactoravg1=1
replace upfactoravg1=(upfactor1[_n+1]+upfactor1)/2 if weights1!=0
replace upfactoravg1=0 if p>=$upper_thr



// Profile 2 - convex

gen weights2a=0
replace weights2a=1 if p>=$lower_thr & p<$upper_thr
sum weights2a
global length=r(sum)
gen weights2b=sum(weights2a)
replace weights2b=0 if weights2a==0


gen weights2=0
replace weights2=0.05 if weights2b!=0 & weights2b<($length/6)
replace weights2=0.2 if weights2b>=($length/6) & weights2b<(2*$length/6)
replace weights2=0.3 if weights2b>=(2*$length/6) & weights2b<(3*$length/6)
replace weights2=0.4 if weights2b>=(3*$length/6) & weights2b<(4*$length/6)
replace weights2=0.6 if weights2b>=(4*$length/6) & weights2b<(5*$length/6)
replace weights2=0.8 if weights2b>=(5*$length/6) & weights2b<=(6*$length/6)

drop weights2a weights2b
egen weights2sum=total(weights2)

gen upfactor2=1

replace upfactor2=upfactor2[_n-1]+(weights2/(weights2sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor2=tax/survey if p>=$upper_thr

gen upfactoravg2=1
replace upfactoravg2=(upfactor2[_n+1]+upfactor2)/2 if weights2!=0

// Profile 3 - concave

gen weights3a=0
replace weights3a=1 if p>=$lower_thr & p<$upper_thr
sum weights3a
global length=r(sum)
gen weights3b=sum(weights3a)
replace weights3b=0 if weights3a==0


gen weights3=0
replace weights3=6 if weights3b!=0 & weights3b<($length/6)
replace weights3=5 if weights3b>=($length/6) & weights3b<(2*$length/6)
replace weights3=4 if weights3b>=(2*$length/6) & weights3b<(3*$length/6)
replace weights3=3 if weights3b>=(3*$length/6) & weights3b<(4*$length/6)
replace weights3=2 if weights3b>=(4*$length/6) & weights3b<(5*$length/6)
replace weights3=1 if weights3b>=(5*$length/6) & weights3b<=(6*$length/6)



drop weights3a weights3b
egen weights3sum=total(weights3)

gen upfactor3=1

replace upfactor3=upfactor3[_n-1]+(weights3/(weights3sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor3=tax/survey if p>=$upper_thr

gen upfactoravg3=1
replace upfactoravg3=(upfactor3[_n+1]+upfactor3)/2 if weights3!=0


*twoway line (upfactor3 upfactor2 upfactor1 p) if p>=$lower_thr-1

drop weights1sum weights2sum weights3sum


// merge results
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)


gsort - p

foreach n in 1 2 3 {
// thresholds
gen thr_final_`n'=survey
replace thr_final_`n'=survey*upfactor`n' if p>=$lower_thr & p<$upper_thr
replace thr_final_`n'=tax if p>=$upper_thr
// bracketavg
gen bavg_final_`n'=bavg_survey
replace bavg_final_`n'=bavg_survey*upfactoravg`n' if p>=$lower_thr & p<$upper_thr
replace bavg_final_`n'=bavg_tax if p>=$upper_thr
//shares
sum bavg_final_`n' [aw=pop]
gen meaninc_`n'=r(mean)
gen groupshare_`n'=bavg_final_`n'*pop/(meaninc_`n')
//topshares
gen topshare_`n'=sum(groupshare_`n')
//topaverage
gen topavg_`n'=(topshare_`n'*meaninc_`n')/((100-(p/1000))/100)
// pareto betas
gen beta_`n'=(topavg_`n'/thr_final_`n')
}


// Initial series

gsort - p
//shares
sum bavg_survey [aw=pop]
gen meaninc_survey=r(mean)
gen groupshare_survey=bavg_survey*pop/(meaninc_survey)
//topshares
gen topshare_survey=sum(groupshare_survey)
//topaverage
gen topavg_survey=(topshare_survey*meaninc_survey)/((100-(p/1000))/100)
// pareto betas
gen beta_survey=(topavg_survey/survey)

//shares
sum bavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bavg_tax*pop/(meaninc_tax)
//topshares
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/tax)

save "$inputdata/CombinedResults/IndiaInterpTaxSurvey`x'_$strategyA$strategyB$strategyC.dta", replace
}





// combine tax and survey files - 1957-1959
global lower_thr 80000
global upper_thr 99700
global range3 80997
forval x=1957(1)1959{
use "$inputdata/TaxInterpResults/IndiaInterpTax`x'.dta", clear
joinby p using "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", unmatched(both)
drop _merge

// Profile 1 - linear rise

rename thr_tax tax
rename thr_survey survey 
rename bracketavg_tax bavg_tax 
rename bracketavg_survey bavg_survey

replace bavg_survey=bavg_survey
replace survey=survey


gen weights1=0
replace weights1=1 if p>=$lower_thr & p<$upper_thr
gen a1=tax/survey if p==$upper_thr
egen upfactor_top=mean(a1)
drop a1
gen upfactor1=1
egen weights1sum=total(weights1)

replace upfactor1=upfactor1[_n-1]+(weights1/(weights1sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor1=tax/survey if p>=$upper_thr

gen upfactoravg1=1
replace upfactoravg1=(upfactor1[_n+1]+upfactor1)/2 if weights1!=0
replace upfactoravg1=0 if p>=$upper_thr



// Profile 2 - convex

gen weights2a=0
replace weights2a=1 if p>=$lower_thr & p<$upper_thr
sum weights2a
global length=r(sum)
gen weights2b=sum(weights2a)
replace weights2b=0 if weights2a==0


gen weights2=0
replace weights2=0.05 if weights2b!=0 & weights2b<($length/6)
replace weights2=0.2 if weights2b>=($length/6) & weights2b<(2*$length/6)
replace weights2=0.3 if weights2b>=(2*$length/6) & weights2b<(3*$length/6)
replace weights2=0.4 if weights2b>=(3*$length/6) & weights2b<(4*$length/6)
replace weights2=0.6 if weights2b>=(4*$length/6) & weights2b<(5*$length/6)
replace weights2=0.8 if weights2b>=(5*$length/6) & weights2b<=(6*$length/6)

drop weights2a weights2b
egen weights2sum=total(weights2)

gen upfactor2=1

replace upfactor2=upfactor2[_n-1]+(weights2/(weights2sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor2=tax/survey if p>=$upper_thr

gen upfactoravg2=1
replace upfactoravg2=(upfactor2[_n+1]+upfactor2)/2 if weights2!=0

// Profile 3 - concave

gen weights3a=0
replace weights3a=1 if p>=$lower_thr & p<$upper_thr
sum weights3a
global length=r(sum)
gen weights3b=sum(weights3a)
replace weights3b=0 if weights3a==0


gen weights3=0
replace weights3=6 if weights3b!=0 & weights3b<($length/6)
replace weights3=5 if weights3b>=($length/6) & weights3b<(2*$length/6)
replace weights3=4 if weights3b>=(2*$length/6) & weights3b<(3*$length/6)
replace weights3=3 if weights3b>=(3*$length/6) & weights3b<(4*$length/6)
replace weights3=2 if weights3b>=(4*$length/6) & weights3b<(5*$length/6)
replace weights3=1 if weights3b>=(5*$length/6) & weights3b<=(6*$length/6)



drop weights3a weights3b
egen weights3sum=total(weights3)

gen upfactor3=1

replace upfactor3=upfactor3[_n-1]+(weights3/(weights3sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor3=tax/survey if p>=$upper_thr

gen upfactoravg3=1
replace upfactoravg3=(upfactor3[_n+1]+upfactor3)/2 if weights3!=0


*twoway line (upfactor3 upfactor2 upfactor1 p) if p>=$lower_thr-1

drop weights1sum weights2sum weights3sum


// merge results
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)


gsort - p

foreach n in 1 2 3 {
// thresholds
gen thr_final_`n'=survey
replace thr_final_`n'=survey*upfactor`n' if p>=$lower_thr & p<$upper_thr
replace thr_final_`n'=tax if p>=$upper_thr
// bracketavg
gen bavg_final_`n'=bavg_survey
replace bavg_final_`n'=bavg_survey*upfactoravg`n' if p>=$lower_thr & p<$upper_thr
replace bavg_final_`n'=bavg_tax if p>=$upper_thr
//shares
sum bavg_final_`n' [aw=pop]
gen meaninc_`n'=r(mean)
gen groupshare_`n'=bavg_final_`n'*pop/(meaninc_`n')
//topshares
gen topshare_`n'=sum(groupshare_`n')
//topaverage
gen topavg_`n'=(topshare_`n'*meaninc_`n')/((100-(p/1000))/100)
// pareto betas
gen beta_`n'=(topavg_`n'/thr_final_`n')
}


// Initial series

gsort - p
//shares
sum bavg_survey [aw=pop]
gen meaninc_survey=r(mean)
gen groupshare_survey=bavg_survey*pop/(meaninc_survey)
//topshares
gen topshare_survey=sum(groupshare_survey)
//topaverage
gen topavg_survey=(topshare_survey*meaninc_survey)/((100-(p/1000))/100)
// pareto betas
gen beta_survey=(topavg_survey/survey)

//shares
sum bavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bavg_tax*pop/(meaninc_tax)
//topshares
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/tax)

save "$inputdata/CombinedResults/IndiaInterpTaxSurvey`x'_$strategyA$strategyB$strategyC.dta", replace


}









// combine tax and survey files - 1960-1962
global lower_thr 80000
global upper_thr 99600
global range3 80996
forval x=1960(1)1962{
use "$inputdata/TaxInterpResults/IndiaInterpTax`x'.dta", clear
joinby p using "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", unmatched(both)
drop _merge

// Profile 1 - linear rise

rename thr_tax tax
rename thr_survey survey 
rename bracketavg_tax bavg_tax 
rename bracketavg_survey bavg_survey

replace bavg_survey=bavg_survey
replace survey=survey


gen weights1=0
replace weights1=1 if p>=$lower_thr & p<$upper_thr
gen a1=tax/survey if p==$upper_thr
egen upfactor_top=mean(a1)
drop a1
gen upfactor1=1
egen weights1sum=total(weights1)

replace upfactor1=upfactor1[_n-1]+(weights1/(weights1sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor1=tax/survey if p>=$upper_thr

gen upfactoravg1=1
replace upfactoravg1=(upfactor1[_n+1]+upfactor1)/2 if weights1!=0
replace upfactoravg1=0 if p>=$upper_thr



// Profile 2 - convex

gen weights2a=0
replace weights2a=1 if p>=$lower_thr & p<$upper_thr
sum weights2a
global length=r(sum)
gen weights2b=sum(weights2a)
replace weights2b=0 if weights2a==0


gen weights2=0
replace weights2=0.05 if weights2b!=0 & weights2b<($length/6)
replace weights2=0.2 if weights2b>=($length/6) & weights2b<(2*$length/6)
replace weights2=0.3 if weights2b>=(2*$length/6) & weights2b<(3*$length/6)
replace weights2=0.4 if weights2b>=(3*$length/6) & weights2b<(4*$length/6)
replace weights2=0.6 if weights2b>=(4*$length/6) & weights2b<(5*$length/6)
replace weights2=0.8 if weights2b>=(5*$length/6) & weights2b<=(6*$length/6)

drop weights2a weights2b
egen weights2sum=total(weights2)

gen upfactor2=1

replace upfactor2=upfactor2[_n-1]+(weights2/(weights2sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor2=tax/survey if p>=$upper_thr

gen upfactoravg2=1
replace upfactoravg2=(upfactor2[_n+1]+upfactor2)/2 if weights2!=0

// Profile 3 - concave

gen weights3a=0
replace weights3a=1 if p>=$lower_thr & p<$upper_thr
sum weights3a
global length=r(sum)
gen weights3b=sum(weights3a)
replace weights3b=0 if weights3a==0


gen weights3=0
replace weights3=6 if weights3b!=0 & weights3b<($length/6)
replace weights3=5 if weights3b>=($length/6) & weights3b<(2*$length/6)
replace weights3=4 if weights3b>=(2*$length/6) & weights3b<(3*$length/6)
replace weights3=3 if weights3b>=(3*$length/6) & weights3b<(4*$length/6)
replace weights3=2 if weights3b>=(4*$length/6) & weights3b<(5*$length/6)
replace weights3=1 if weights3b>=(5*$length/6) & weights3b<=(6*$length/6)



drop weights3a weights3b
egen weights3sum=total(weights3)

gen upfactor3=1

replace upfactor3=upfactor3[_n-1]+(weights3/(weights3sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor3=tax/survey if p>=$upper_thr

gen upfactoravg3=1
replace upfactoravg3=(upfactor3[_n+1]+upfactor3)/2 if weights3!=0


*twoway line (upfactor3 upfactor2 upfactor1 p) if p>=$lower_thr-1

drop weights1sum weights2sum weights3sum


// merge results
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)


gsort - p

foreach n in 1 2 3 {
// thresholds
gen thr_final_`n'=survey
replace thr_final_`n'=survey*upfactor`n' if p>=$lower_thr & p<$upper_thr
replace thr_final_`n'=tax if p>=$upper_thr
// bracketavg
gen bavg_final_`n'=bavg_survey
replace bavg_final_`n'=bavg_survey*upfactoravg`n' if p>=$lower_thr & p<$upper_thr
replace bavg_final_`n'=bavg_tax if p>=$upper_thr
//shares
sum bavg_final_`n' [aw=pop]
gen meaninc_`n'=r(mean)
gen groupshare_`n'=bavg_final_`n'*pop/(meaninc_`n')
//topshares
gen topshare_`n'=sum(groupshare_`n')
//topaverage
gen topavg_`n'=(topshare_`n'*meaninc_`n')/((100-(p/1000))/100)
// pareto betas
gen beta_`n'=(topavg_`n'/thr_final_`n')
}


// Initial series

gsort - p
//shares
sum bavg_survey [aw=pop]
gen meaninc_survey=r(mean)
gen groupshare_survey=bavg_survey*pop/(meaninc_survey)
//topshares
gen topshare_survey=sum(groupshare_survey)
//topaverage
gen topavg_survey=(topshare_survey*meaninc_survey)/((100-(p/1000))/100)
// pareto betas
gen beta_survey=(topavg_survey/survey)

//shares
sum bavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bavg_tax*pop/(meaninc_tax)
//topshares
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/tax)

save "$inputdata/CombinedResults/IndiaInterpTaxSurvey`x'_$strategyA$strategyB$strategyC.dta", replace

}










// combine tax and survey files - 1963-1965
global lower_thr 80000
global upper_thr 99400
global range3 80994
forval x=1963(1)1965{
use "$inputdata/TaxInterpResults/IndiaInterpTax`x'.dta", clear
joinby p using "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", unmatched(both)
drop _merge

// Profile 1 - linear rise

rename thr_tax tax
rename thr_survey survey 
rename bracketavg_tax bavg_tax 
rename bracketavg_survey bavg_survey

replace bavg_survey=bavg_survey
replace survey=survey


gen weights1=0
replace weights1=1 if p>=$lower_thr & p<$upper_thr
gen a1=tax/survey if p==$upper_thr
egen upfactor_top=mean(a1)
drop a1
gen upfactor1=1
egen weights1sum=total(weights1)

replace upfactor1=upfactor1[_n-1]+(weights1/(weights1sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor1=tax/survey if p>=$upper_thr

gen upfactoravg1=1
replace upfactoravg1=(upfactor1[_n+1]+upfactor1)/2 if weights1!=0
replace upfactoravg1=0 if p>=$upper_thr



// Profile 2 - convex

gen weights2a=0
replace weights2a=1 if p>=$lower_thr & p<$upper_thr
sum weights2a
global length=r(sum)
gen weights2b=sum(weights2a)
replace weights2b=0 if weights2a==0


gen weights2=0
replace weights2=0.05 if weights2b!=0 & weights2b<($length/6)
replace weights2=0.2 if weights2b>=($length/6) & weights2b<(2*$length/6)
replace weights2=0.3 if weights2b>=(2*$length/6) & weights2b<(3*$length/6)
replace weights2=0.4 if weights2b>=(3*$length/6) & weights2b<(4*$length/6)
replace weights2=0.6 if weights2b>=(4*$length/6) & weights2b<(5*$length/6)
replace weights2=0.8 if weights2b>=(5*$length/6) & weights2b<=(6*$length/6)

drop weights2a weights2b
egen weights2sum=total(weights2)

gen upfactor2=1

replace upfactor2=upfactor2[_n-1]+(weights2/(weights2sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor2=tax/survey if p>=$upper_thr

gen upfactoravg2=1
replace upfactoravg2=(upfactor2[_n+1]+upfactor2)/2 if weights2!=0

// Profile 3 - concave

gen weights3a=0
replace weights3a=1 if p>=$lower_thr & p<$upper_thr
sum weights3a
global length=r(sum)
gen weights3b=sum(weights3a)
replace weights3b=0 if weights3a==0


gen weights3=0
replace weights3=6 if weights3b!=0 & weights3b<($length/6)
replace weights3=5 if weights3b>=($length/6) & weights3b<(2*$length/6)
replace weights3=4 if weights3b>=(2*$length/6) & weights3b<(3*$length/6)
replace weights3=3 if weights3b>=(3*$length/6) & weights3b<(4*$length/6)
replace weights3=2 if weights3b>=(4*$length/6) & weights3b<(5*$length/6)
replace weights3=1 if weights3b>=(5*$length/6) & weights3b<=(6*$length/6)



drop weights3a weights3b
egen weights3sum=total(weights3)

gen upfactor3=1

replace upfactor3=upfactor3[_n-1]+(weights3/(weights3sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor3=tax/survey if p>=$upper_thr

gen upfactoravg3=1
replace upfactoravg3=(upfactor3[_n+1]+upfactor3)/2 if weights3!=0


*twoway line (upfactor3 upfactor2 upfactor1 p) if p>=$lower_thr-1

drop weights1sum weights2sum weights3sum


// merge results
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)


gsort - p

foreach n in 1 2 3 {
// thresholds
gen thr_final_`n'=survey
replace thr_final_`n'=survey*upfactor`n' if p>=$lower_thr & p<$upper_thr
replace thr_final_`n'=tax if p>=$upper_thr
// bracketavg
gen bavg_final_`n'=bavg_survey
replace bavg_final_`n'=bavg_survey*upfactoravg`n' if p>=$lower_thr & p<$upper_thr
replace bavg_final_`n'=bavg_tax if p>=$upper_thr
//shares
sum bavg_final_`n' [aw=pop]
gen meaninc_`n'=r(mean)
gen groupshare_`n'=bavg_final_`n'*pop/(meaninc_`n')
//topshares
gen topshare_`n'=sum(groupshare_`n')
//topaverage
gen topavg_`n'=(topshare_`n'*meaninc_`n')/((100-(p/1000))/100)
// pareto betas
gen beta_`n'=(topavg_`n'/thr_final_`n')
}


// Initial series

gsort - p
//shares
sum bavg_survey [aw=pop]
gen meaninc_survey=r(mean)
gen groupshare_survey=bavg_survey*pop/(meaninc_survey)
//topshares
gen topshare_survey=sum(groupshare_survey)
//topaverage
gen topavg_survey=(topshare_survey*meaninc_survey)/((100-(p/1000))/100)
// pareto betas
gen beta_survey=(topavg_survey/survey)

//shares
sum bavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bavg_tax*pop/(meaninc_tax)
//topshares
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/tax)

save "$inputdata/CombinedResults/IndiaInterpTaxSurvey`x'_$strategyA$strategyB$strategyC.dta", replace

}
















// combine tax and survey files - 1966
global lower_thr 80000
global upper_thr 99500
global range3 80995
forval x=1966(1)1966{
use "$inputdata/TaxInterpResults/IndiaInterpTax`x'.dta", clear
joinby p using "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", unmatched(both)
drop _merge

// Profile 1 - linear rise

rename thr_tax tax
rename thr_survey survey 
rename bracketavg_tax bavg_tax 
rename bracketavg_survey bavg_survey

replace bavg_survey=bavg_survey
replace survey=survey


gen weights1=0
replace weights1=1 if p>=$lower_thr & p<$upper_thr
gen a1=tax/survey if p==$upper_thr
egen upfactor_top=mean(a1)
drop a1
gen upfactor1=1
egen weights1sum=total(weights1)

replace upfactor1=upfactor1[_n-1]+(weights1/(weights1sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor1=tax/survey if p>=$upper_thr

gen upfactoravg1=1
replace upfactoravg1=(upfactor1[_n+1]+upfactor1)/2 if weights1!=0
replace upfactoravg1=0 if p>=$upper_thr



// Profile 2 - convex

gen weights2a=0
replace weights2a=1 if p>=$lower_thr & p<$upper_thr
sum weights2a
global length=r(sum)
gen weights2b=sum(weights2a)
replace weights2b=0 if weights2a==0


gen weights2=0
replace weights2=0.05 if weights2b!=0 & weights2b<($length/6)
replace weights2=0.2 if weights2b>=($length/6) & weights2b<(2*$length/6)
replace weights2=0.3 if weights2b>=(2*$length/6) & weights2b<(3*$length/6)
replace weights2=0.4 if weights2b>=(3*$length/6) & weights2b<(4*$length/6)
replace weights2=0.6 if weights2b>=(4*$length/6) & weights2b<(5*$length/6)
replace weights2=0.8 if weights2b>=(5*$length/6) & weights2b<=(6*$length/6)

drop weights2a weights2b
egen weights2sum=total(weights2)

gen upfactor2=1

replace upfactor2=upfactor2[_n-1]+(weights2/(weights2sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor2=tax/survey if p>=$upper_thr

gen upfactoravg2=1
replace upfactoravg2=(upfactor2[_n+1]+upfactor2)/2 if weights2!=0

// Profile 3 - concave

gen weights3a=0
replace weights3a=1 if p>=$lower_thr & p<$upper_thr
sum weights3a
global length=r(sum)
gen weights3b=sum(weights3a)
replace weights3b=0 if weights3a==0


gen weights3=0
replace weights3=6 if weights3b!=0 & weights3b<($length/6)
replace weights3=5 if weights3b>=($length/6) & weights3b<(2*$length/6)
replace weights3=4 if weights3b>=(2*$length/6) & weights3b<(3*$length/6)
replace weights3=3 if weights3b>=(3*$length/6) & weights3b<(4*$length/6)
replace weights3=2 if weights3b>=(4*$length/6) & weights3b<(5*$length/6)
replace weights3=1 if weights3b>=(5*$length/6) & weights3b<=(6*$length/6)



drop weights3a weights3b
egen weights3sum=total(weights3)

gen upfactor3=1

replace upfactor3=upfactor3[_n-1]+(weights3/(weights3sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor3=tax/survey if p>=$upper_thr

gen upfactoravg3=1
replace upfactoravg3=(upfactor3[_n+1]+upfactor3)/2 if weights3!=0


*twoway line (upfactor3 upfactor2 upfactor1 p) if p>=$lower_thr-1

drop weights1sum weights2sum weights3sum


// merge results
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)


gsort - p

foreach n in 1 2 3 {
// thresholds
gen thr_final_`n'=survey
replace thr_final_`n'=survey*upfactor`n' if p>=$lower_thr & p<$upper_thr
replace thr_final_`n'=tax if p>=$upper_thr
// bracketavg
gen bavg_final_`n'=bavg_survey
replace bavg_final_`n'=bavg_survey*upfactoravg`n' if p>=$lower_thr & p<$upper_thr
replace bavg_final_`n'=bavg_tax if p>=$upper_thr
//shares
sum bavg_final_`n' [aw=pop]
gen meaninc_`n'=r(mean)
gen groupshare_`n'=bavg_final_`n'*pop/(meaninc_`n')
//topshares
gen topshare_`n'=sum(groupshare_`n')
//topaverage
gen topavg_`n'=(topshare_`n'*meaninc_`n')/((100-(p/1000))/100)
// pareto betas
gen beta_`n'=(topavg_`n'/thr_final_`n')
}


// Initial series

gsort - p
//shares
sum bavg_survey [aw=pop]
gen meaninc_survey=r(mean)
gen groupshare_survey=bavg_survey*pop/(meaninc_survey)
//topshares
gen topshare_survey=sum(groupshare_survey)
//topaverage
gen topavg_survey=(topshare_survey*meaninc_survey)/((100-(p/1000))/100)
// pareto betas
gen beta_survey=(topavg_survey/survey)

//shares
sum bavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bavg_tax*pop/(meaninc_tax)
//topshares
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/tax)

save "$inputdata/CombinedResults/IndiaInterpTaxSurvey`x'_$strategyA$strategyB$strategyC.dta", replace

}




// combine tax and survey files - 1967-1976
global lower_thr 80000
global upper_thr 99400
global range3 80994
forval x=1967(1)1976{
use "$inputdata/TaxInterpResults/IndiaInterpTax`x'.dta", clear
joinby p using "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", unmatched(both)
drop _merge

// Profile 1 - linear rise


rename thr_tax tax
rename thr_survey survey 
rename bracketavg_tax bavg_tax 
rename bracketavg_survey bavg_survey

// Surveys are interpolated from 1978 to 1982, this yields very odd profiles in which 
// Survey top earners are richer than in tax data, which cannot be the case
// We thus set survey=tax data for the top of the distribution for these specific years
replace bavg_survey=bavg_tax if bavg_survey>=bavg_tax & year>1977 & year<1983
replace survey=tax if survey>=tax & year>1977 & year<1983




gen weights1=0
replace weights1=1 if p>=$lower_thr & p<$upper_thr
gen a1=tax/survey if p==$upper_thr
egen upfactor_top=mean(a1)
drop a1
gen upfactor1=1
egen weights1sum=total(weights1)

replace upfactor1=upfactor1[_n-1]+(weights1/(weights1sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor1=tax/survey if p>=$upper_thr

gen upfactoravg1=1
replace upfactoravg1=(upfactor1[_n+1]+upfactor1)/2 if weights1!=0
replace upfactoravg1=0 if p>=$upper_thr



// Profile 2 - convex

gen weights2a=0
replace weights2a=1 if p>=$lower_thr & p<$upper_thr
sum weights2a
global length=r(sum)
gen weights2b=sum(weights2a)
replace weights2b=0 if weights2a==0


gen weights2=0
replace weights2=0.05 if weights2b!=0 & weights2b<($length/6)
replace weights2=0.2 if weights2b>=($length/6) & weights2b<(2*$length/6)
replace weights2=0.3 if weights2b>=(2*$length/6) & weights2b<(3*$length/6)
replace weights2=0.4 if weights2b>=(3*$length/6) & weights2b<(4*$length/6)
replace weights2=0.6 if weights2b>=(4*$length/6) & weights2b<(5*$length/6)
replace weights2=0.8 if weights2b>=(5*$length/6) & weights2b<=(6*$length/6)

drop weights2a weights2b
egen weights2sum=total(weights2)

gen upfactor2=1

replace upfactor2=upfactor2[_n-1]+(weights2/(weights2sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor2=tax/survey if p>=$upper_thr

gen upfactoravg2=1
replace upfactoravg2=(upfactor2[_n+1]+upfactor2)/2 if weights2!=0

// Profile 3 - concave

gen weights3a=0
replace weights3a=1 if p>=$lower_thr & p<$upper_thr
sum weights3a
global length=r(sum)
gen weights3b=sum(weights3a)
replace weights3b=0 if weights3a==0


gen weights3=0
replace weights3=6 if weights3b!=0 & weights3b<($length/6)
replace weights3=5 if weights3b>=($length/6) & weights3b<(2*$length/6)
replace weights3=4 if weights3b>=(2*$length/6) & weights3b<(3*$length/6)
replace weights3=3 if weights3b>=(3*$length/6) & weights3b<(4*$length/6)
replace weights3=2 if weights3b>=(4*$length/6) & weights3b<(5*$length/6)
replace weights3=1 if weights3b>=(5*$length/6) & weights3b<=(6*$length/6)



drop weights3a weights3b
egen weights3sum=total(weights3)

gen upfactor3=1

replace upfactor3=upfactor3[_n-1]+(weights3/(weights3sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor3=tax/survey if p>=$upper_thr

gen upfactoravg3=1
replace upfactoravg3=(upfactor3[_n+1]+upfactor3)/2 if weights3!=0


*twoway line (upfactor3 upfactor2 upfactor1 p) if p>=$lower_thr-1

drop weights1sum weights2sum weights3sum


// merge results
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)


gsort - p

foreach n in 1 2 3 {
// thresholds
gen thr_final_`n'=survey
replace thr_final_`n'=survey*upfactor`n' if p>=$lower_thr & p<$upper_thr
replace thr_final_`n'=tax if p>=$upper_thr
// bracketavg
gen bavg_final_`n'=bavg_survey
replace bavg_final_`n'=bavg_survey*upfactoravg`n' if p>=$lower_thr & p<$upper_thr
replace bavg_final_`n'=bavg_tax if p>=$upper_thr
//shares
sum bavg_final_`n' [aw=pop]
gen meaninc_`n'=r(mean)
gen groupshare_`n'=bavg_final_`n'*pop/(meaninc_`n')
//topshares
gen topshare_`n'=sum(groupshare_`n')
//topaverage
gen topavg_`n'=(topshare_`n'*meaninc_`n')/((100-(p/1000))/100)
// pareto betas
gen beta_`n'=(topavg_`n'/thr_final_`n')
}


// Initial series

gsort - p
//shares
sum bavg_survey [aw=pop]
gen meaninc_survey=r(mean)
gen groupshare_survey=bavg_survey*pop/(meaninc_survey)
//topshares
gen topshare_survey=sum(groupshare_survey)
//topaverage
gen topavg_survey=(topshare_survey*meaninc_survey)/((100-(p/1000))/100)
// pareto betas
gen beta_survey=(topavg_survey/survey)

//shares
sum bavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bavg_tax*pop/(meaninc_tax)
//topshares
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/tax)

save "$inputdata/CombinedResults/IndiaInterpTaxSurvey`x'_$strategyA$strategyB$strategyC.dta", replace

}






// combine tax and survey files - 1977-1984
global lower_thr 80000
global upper_thr 99700
global range3 80997
forval x=1977(1)1984{
use "$inputdata/TaxInterpResults/IndiaInterpTax`x'.dta", clear
joinby p using "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", unmatched(both)
drop _merge

// Profile 1 - linear rise

rename thr_tax tax
rename thr_survey survey 
rename bracketavg_tax bavg_tax 
rename bracketavg_survey bavg_survey

replace bavg_survey=bavg_survey
replace survey=survey


gen weights1=0
replace weights1=1 if p>=$lower_thr & p<$upper_thr
gen a1=tax/survey if p==$upper_thr
egen upfactor_top=mean(a1)
drop a1
gen upfactor1=1
egen weights1sum=total(weights1)

replace upfactor1=upfactor1[_n-1]+(weights1/(weights1sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor1=tax/survey if p>=$upper_thr

gen upfactoravg1=1
replace upfactoravg1=(upfactor1[_n+1]+upfactor1)/2 if weights1!=0
replace upfactoravg1=0 if p>=$upper_thr



// Profile 2 - convex

gen weights2a=0
replace weights2a=1 if p>=$lower_thr & p<$upper_thr
sum weights2a
global length=r(sum)
gen weights2b=sum(weights2a)
replace weights2b=0 if weights2a==0


gen weights2=0
replace weights2=0.05 if weights2b!=0 & weights2b<($length/6)
replace weights2=0.2 if weights2b>=($length/6) & weights2b<(2*$length/6)
replace weights2=0.3 if weights2b>=(2*$length/6) & weights2b<(3*$length/6)
replace weights2=0.4 if weights2b>=(3*$length/6) & weights2b<(4*$length/6)
replace weights2=0.6 if weights2b>=(4*$length/6) & weights2b<(5*$length/6)
replace weights2=0.8 if weights2b>=(5*$length/6) & weights2b<=(6*$length/6)

drop weights2a weights2b
egen weights2sum=total(weights2)

gen upfactor2=1

replace upfactor2=upfactor2[_n-1]+(weights2/(weights2sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor2=tax/survey if p>=$upper_thr

gen upfactoravg2=1
replace upfactoravg2=(upfactor2[_n+1]+upfactor2)/2 if weights2!=0

// Profile 3 - concave

gen weights3a=0
replace weights3a=1 if p>=$lower_thr & p<$upper_thr
sum weights3a
global length=r(sum)
gen weights3b=sum(weights3a)
replace weights3b=0 if weights3a==0


gen weights3=0
replace weights3=6 if weights3b!=0 & weights3b<($length/6)
replace weights3=5 if weights3b>=($length/6) & weights3b<(2*$length/6)
replace weights3=4 if weights3b>=(2*$length/6) & weights3b<(3*$length/6)
replace weights3=3 if weights3b>=(3*$length/6) & weights3b<(4*$length/6)
replace weights3=2 if weights3b>=(4*$length/6) & weights3b<(5*$length/6)
replace weights3=1 if weights3b>=(5*$length/6) & weights3b<=(6*$length/6)



drop weights3a weights3b
egen weights3sum=total(weights3)

gen upfactor3=1

replace upfactor3=upfactor3[_n-1]+(weights3/(weights3sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor3=tax/survey if p>=$upper_thr

gen upfactoravg3=1
replace upfactoravg3=(upfactor3[_n+1]+upfactor3)/2 if weights3!=0


*twoway line (upfactor3 upfactor2 upfactor1 p) if p>=$lower_thr-1

drop weights1sum weights2sum weights3sum


// merge results
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)


gsort - p

foreach n in 1 2 3 {
// thresholds
gen thr_final_`n'=survey
replace thr_final_`n'=survey*upfactor`n' if p>=$lower_thr & p<$upper_thr
replace thr_final_`n'=tax if p>=$upper_thr
// bracketavg
gen bavg_final_`n'=bavg_survey
replace bavg_final_`n'=bavg_survey*upfactoravg`n' if p>=$lower_thr & p<$upper_thr
replace bavg_final_`n'=bavg_tax if p>=$upper_thr
//shares
sum bavg_final_`n' [aw=pop]
gen meaninc_`n'=r(mean)
gen groupshare_`n'=bavg_final_`n'*pop/(meaninc_`n')
//topshares
gen topshare_`n'=sum(groupshare_`n')
//topaverage
gen topavg_`n'=(topshare_`n'*meaninc_`n')/((100-(p/1000))/100)
// pareto betas
gen beta_`n'=(topavg_`n'/thr_final_`n')
}


// Initial series

gsort - p
//shares
sum bavg_survey [aw=pop]
gen meaninc_survey=r(mean)
gen groupshare_survey=bavg_survey*pop/(meaninc_survey)
//topshares
gen topshare_survey=sum(groupshare_survey)
//topaverage
gen topavg_survey=(topshare_survey*meaninc_survey)/((100-(p/1000))/100)
// pareto betas
gen beta_survey=(topavg_survey/survey)

//shares
sum bavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bavg_tax*pop/(meaninc_tax)
//topshares
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/tax)

save "$inputdata/CombinedResults/IndiaInterpTaxSurvey`x'_$strategyA$strategyB$strategyC.dta", replace

}






// combine tax and survey files - 1985-1987
global lower_thr 80000
global upper_thr 99500
global range3 80995
forval x=1985(1)1987{
use "$inputdata/TaxInterpResults/IndiaInterpTax`x'.dta", clear
joinby p using "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", unmatched(both)
drop _merge

// Profile 1 - linear rise

rename thr_tax tax
rename thr_survey survey 
rename bracketavg_tax bavg_tax 
rename bracketavg_survey bavg_survey

replace bavg_survey=bavg_survey
replace survey=survey


gen weights1=0
replace weights1=1 if p>=$lower_thr & p<$upper_thr
gen a1=tax/survey if p==$upper_thr
egen upfactor_top=mean(a1)
drop a1
gen upfactor1=1
egen weights1sum=total(weights1)

replace upfactor1=upfactor1[_n-1]+(weights1/(weights1sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor1=tax/survey if p>=$upper_thr

gen upfactoravg1=1
replace upfactoravg1=(upfactor1[_n+1]+upfactor1)/2 if weights1!=0
replace upfactoravg1=0 if p>=$upper_thr



// Profile 2 - convex

gen weights2a=0
replace weights2a=1 if p>=$lower_thr & p<$upper_thr
sum weights2a
global length=r(sum)
gen weights2b=sum(weights2a)
replace weights2b=0 if weights2a==0


gen weights2=0
replace weights2=0.05 if weights2b!=0 & weights2b<($length/6)
replace weights2=0.2 if weights2b>=($length/6) & weights2b<(2*$length/6)
replace weights2=0.3 if weights2b>=(2*$length/6) & weights2b<(3*$length/6)
replace weights2=0.4 if weights2b>=(3*$length/6) & weights2b<(4*$length/6)
replace weights2=0.6 if weights2b>=(4*$length/6) & weights2b<(5*$length/6)
replace weights2=0.8 if weights2b>=(5*$length/6) & weights2b<=(6*$length/6)

drop weights2a weights2b
egen weights2sum=total(weights2)

gen upfactor2=1

replace upfactor2=upfactor2[_n-1]+(weights2/(weights2sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor2=tax/survey if p>=$upper_thr

gen upfactoravg2=1
replace upfactoravg2=(upfactor2[_n+1]+upfactor2)/2 if weights2!=0

// Profile 3 - concave

gen weights3a=0
replace weights3a=1 if p>=$lower_thr & p<$upper_thr
sum weights3a
global length=r(sum)
gen weights3b=sum(weights3a)
replace weights3b=0 if weights3a==0


gen weights3=0
replace weights3=6 if weights3b!=0 & weights3b<($length/6)
replace weights3=5 if weights3b>=($length/6) & weights3b<(2*$length/6)
replace weights3=4 if weights3b>=(2*$length/6) & weights3b<(3*$length/6)
replace weights3=3 if weights3b>=(3*$length/6) & weights3b<(4*$length/6)
replace weights3=2 if weights3b>=(4*$length/6) & weights3b<(5*$length/6)
replace weights3=1 if weights3b>=(5*$length/6) & weights3b<=(6*$length/6)



drop weights3a weights3b
egen weights3sum=total(weights3)

gen upfactor3=1

replace upfactor3=upfactor3[_n-1]+(weights3/(weights3sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor3=tax/survey if p>=$upper_thr

gen upfactoravg3=1
replace upfactoravg3=(upfactor3[_n+1]+upfactor3)/2 if weights3!=0


*twoway line (upfactor3 upfactor2 upfactor1 p) if p>=$lower_thr-1

drop weights1sum weights2sum weights3sum


// merge results
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)


gsort - p

foreach n in 1 2 3 {
// thresholds
gen thr_final_`n'=survey
replace thr_final_`n'=survey*upfactor`n' if p>=$lower_thr & p<$upper_thr
replace thr_final_`n'=tax if p>=$upper_thr
// bracketavg
gen bavg_final_`n'=bavg_survey
replace bavg_final_`n'=bavg_survey*upfactoravg`n' if p>=$lower_thr & p<$upper_thr
replace bavg_final_`n'=bavg_tax if p>=$upper_thr
//shares
sum bavg_final_`n' [aw=pop]
gen meaninc_`n'=r(mean)
gen groupshare_`n'=bavg_final_`n'*pop/(meaninc_`n')
//topshares
gen topshare_`n'=sum(groupshare_`n')
//topaverage
gen topavg_`n'=(topshare_`n'*meaninc_`n')/((100-(p/1000))/100)
// pareto betas
gen beta_`n'=(topavg_`n'/thr_final_`n')
}


// Initial series

gsort - p
//shares
sum bavg_survey [aw=pop]
gen meaninc_survey=r(mean)
gen groupshare_survey=bavg_survey*pop/(meaninc_survey)
//topshares
gen topshare_survey=sum(groupshare_survey)
//topaverage
gen topavg_survey=(topshare_survey*meaninc_survey)/((100-(p/1000))/100)
// pareto betas
gen beta_survey=(topavg_survey/survey)

//shares
sum bavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bavg_tax*pop/(meaninc_tax)
//topshares
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/tax)

save "$inputdata/CombinedResults/IndiaInterpTaxSurvey`x'_$strategyA$strategyB$strategyC.dta", replace
}











// combine tax and survey files - 1988-1989
global lower_thr 80000
global upper_thr 99400
global range3 80994
forval x=1988(1)1989{
use "$inputdata/TaxInterpResults/IndiaInterpTax`x'.dta", clear
joinby p using "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", unmatched(both)
drop _merge

// Profile 1 - linear rise

rename thr_tax tax
rename thr_survey survey 
rename bracketavg_tax bavg_tax 
rename bracketavg_survey bavg_survey

replace bavg_survey=bavg_survey
replace survey=survey


gen weights1=0
replace weights1=1 if p>=$lower_thr & p<$upper_thr
gen a1=tax/survey if p==$upper_thr
egen upfactor_top=mean(a1)
drop a1
gen upfactor1=1
egen weights1sum=total(weights1)

replace upfactor1=upfactor1[_n-1]+(weights1/(weights1sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor1=tax/survey if p>=$upper_thr

gen upfactoravg1=1
replace upfactoravg1=(upfactor1[_n+1]+upfactor1)/2 if weights1!=0
replace upfactoravg1=0 if p>=$upper_thr



// Profile 2 - convex

gen weights2a=0
replace weights2a=1 if p>=$lower_thr & p<$upper_thr
sum weights2a
global length=r(sum)
gen weights2b=sum(weights2a)
replace weights2b=0 if weights2a==0


gen weights2=0
replace weights2=0.05 if weights2b!=0 & weights2b<($length/6)
replace weights2=0.2 if weights2b>=($length/6) & weights2b<(2*$length/6)
replace weights2=0.3 if weights2b>=(2*$length/6) & weights2b<(3*$length/6)
replace weights2=0.4 if weights2b>=(3*$length/6) & weights2b<(4*$length/6)
replace weights2=0.6 if weights2b>=(4*$length/6) & weights2b<(5*$length/6)
replace weights2=0.8 if weights2b>=(5*$length/6) & weights2b<=(6*$length/6)

drop weights2a weights2b
egen weights2sum=total(weights2)

gen upfactor2=1

replace upfactor2=upfactor2[_n-1]+(weights2/(weights2sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor2=tax/survey if p>=$upper_thr

gen upfactoravg2=1
replace upfactoravg2=(upfactor2[_n+1]+upfactor2)/2 if weights2!=0

// Profile 3 - concave

gen weights3a=0
replace weights3a=1 if p>=$lower_thr & p<$upper_thr
sum weights3a
global length=r(sum)
gen weights3b=sum(weights3a)
replace weights3b=0 if weights3a==0


gen weights3=0
replace weights3=6 if weights3b!=0 & weights3b<($length/6)
replace weights3=5 if weights3b>=($length/6) & weights3b<(2*$length/6)
replace weights3=4 if weights3b>=(2*$length/6) & weights3b<(3*$length/6)
replace weights3=3 if weights3b>=(3*$length/6) & weights3b<(4*$length/6)
replace weights3=2 if weights3b>=(4*$length/6) & weights3b<(5*$length/6)
replace weights3=1 if weights3b>=(5*$length/6) & weights3b<=(6*$length/6)



drop weights3a weights3b
egen weights3sum=total(weights3)

gen upfactor3=1

replace upfactor3=upfactor3[_n-1]+(weights3/(weights3sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor3=tax/survey if p>=$upper_thr

gen upfactoravg3=1
replace upfactoravg3=(upfactor3[_n+1]+upfactor3)/2 if weights3!=0


*twoway line (upfactor3 upfactor2 upfactor1 p) if p>=$lower_thr-1

drop weights1sum weights2sum weights3sum


// merge results
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)


gsort - p

foreach n in 1 2 3 {
// thresholds
gen thr_final_`n'=survey
replace thr_final_`n'=survey*upfactor`n' if p>=$lower_thr & p<$upper_thr
replace thr_final_`n'=tax if p>=$upper_thr
// bracketavg
gen bavg_final_`n'=bavg_survey
replace bavg_final_`n'=bavg_survey*upfactoravg`n' if p>=$lower_thr & p<$upper_thr
replace bavg_final_`n'=bavg_tax if p>=$upper_thr
//shares
sum bavg_final_`n' [aw=pop]
gen meaninc_`n'=r(mean)
gen groupshare_`n'=bavg_final_`n'*pop/(meaninc_`n')
//topshares
gen topshare_`n'=sum(groupshare_`n')
//topaverage
gen topavg_`n'=(topshare_`n'*meaninc_`n')/((100-(p/1000))/100)
// pareto betas
gen beta_`n'=(topavg_`n'/thr_final_`n')
}


// Initial series

gsort - p
//shares
sum bavg_survey [aw=pop]
gen meaninc_survey=r(mean)
gen groupshare_survey=bavg_survey*pop/(meaninc_survey)
//topshares
gen topshare_survey=sum(groupshare_survey)
//topaverage
gen topavg_survey=(topshare_survey*meaninc_survey)/((100-(p/1000))/100)
// pareto betas
gen beta_survey=(topavg_survey/survey)

//shares
sum bavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bavg_tax*pop/(meaninc_tax)
//topshares
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/tax)

save "$inputdata/CombinedResults/IndiaInterpTaxSurvey`x'_$strategyA$strategyB$strategyC.dta", replace
}












// combine tax and survey files - 1990-1991
global lower_thr 80000
global upper_thr 99100
global range3 80991
forval x=1990(1)1991{
use "$inputdata/TaxInterpResults/IndiaInterpTax`x'.dta", clear
joinby p using "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", unmatched(both)
drop _merge

// Profile 1 - linear rise

rename thr_tax tax
rename thr_survey survey 
rename bracketavg_tax bavg_tax 
rename bracketavg_survey bavg_survey

replace bavg_survey=bavg_survey
replace survey=survey


gen weights1=0
replace weights1=1 if p>=$lower_thr & p<$upper_thr
gen a1=tax/survey if p==$upper_thr
egen upfactor_top=mean(a1)
drop a1
gen upfactor1=1
egen weights1sum=total(weights1)

replace upfactor1=upfactor1[_n-1]+(weights1/(weights1sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor1=tax/survey if p>=$upper_thr

gen upfactoravg1=1
replace upfactoravg1=(upfactor1[_n+1]+upfactor1)/2 if weights1!=0
replace upfactoravg1=0 if p>=$upper_thr



// Profile 2 - convex

gen weights2a=0
replace weights2a=1 if p>=$lower_thr & p<$upper_thr
sum weights2a
global length=r(sum)
gen weights2b=sum(weights2a)
replace weights2b=0 if weights2a==0


gen weights2=0
replace weights2=0.05 if weights2b!=0 & weights2b<($length/6)
replace weights2=0.2 if weights2b>=($length/6) & weights2b<(2*$length/6)
replace weights2=0.3 if weights2b>=(2*$length/6) & weights2b<(3*$length/6)
replace weights2=0.4 if weights2b>=(3*$length/6) & weights2b<(4*$length/6)
replace weights2=0.6 if weights2b>=(4*$length/6) & weights2b<(5*$length/6)
replace weights2=0.8 if weights2b>=(5*$length/6) & weights2b<=(6*$length/6)

drop weights2a weights2b
egen weights2sum=total(weights2)

gen upfactor2=1

replace upfactor2=upfactor2[_n-1]+(weights2/(weights2sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor2=tax/survey if p>=$upper_thr

gen upfactoravg2=1
replace upfactoravg2=(upfactor2[_n+1]+upfactor2)/2 if weights2!=0

// Profile 3 - concave

gen weights3a=0
replace weights3a=1 if p>=$lower_thr & p<$upper_thr
sum weights3a
global length=r(sum)
gen weights3b=sum(weights3a)
replace weights3b=0 if weights3a==0


gen weights3=0
replace weights3=6 if weights3b!=0 & weights3b<($length/6)
replace weights3=5 if weights3b>=($length/6) & weights3b<(2*$length/6)
replace weights3=4 if weights3b>=(2*$length/6) & weights3b<(3*$length/6)
replace weights3=3 if weights3b>=(3*$length/6) & weights3b<(4*$length/6)
replace weights3=2 if weights3b>=(4*$length/6) & weights3b<(5*$length/6)
replace weights3=1 if weights3b>=(5*$length/6) & weights3b<=(6*$length/6)



drop weights3a weights3b
egen weights3sum=total(weights3)

gen upfactor3=1

replace upfactor3=upfactor3[_n-1]+(weights3/(weights3sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor3=tax/survey if p>=$upper_thr

gen upfactoravg3=1
replace upfactoravg3=(upfactor3[_n+1]+upfactor3)/2 if weights3!=0


*twoway line (upfactor3 upfactor2 upfactor1 p) if p>=$lower_thr-1

drop weights1sum weights2sum weights3sum


// merge results
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)


gsort - p

foreach n in 1 2 3 {
// thresholds
gen thr_final_`n'=survey
replace thr_final_`n'=survey*upfactor`n' if p>=$lower_thr & p<$upper_thr
replace thr_final_`n'=tax if p>=$upper_thr
// bracketavg
gen bavg_final_`n'=bavg_survey
replace bavg_final_`n'=bavg_survey*upfactoravg`n' if p>=$lower_thr & p<$upper_thr
replace bavg_final_`n'=bavg_tax if p>=$upper_thr
//shares
sum bavg_final_`n' [aw=pop]
gen meaninc_`n'=r(mean)
gen groupshare_`n'=bavg_final_`n'*pop/(meaninc_`n')
//topshares
gen topshare_`n'=sum(groupshare_`n')
//topaverage
gen topavg_`n'=(topshare_`n'*meaninc_`n')/((100-(p/1000))/100)
// pareto betas
gen beta_`n'=(topavg_`n'/thr_final_`n')
}


// Initial series

gsort - p
//shares
sum bavg_survey [aw=pop]
gen meaninc_survey=r(mean)
gen groupshare_survey=bavg_survey*pop/(meaninc_survey)
//topshares
gen topshare_survey=sum(groupshare_survey)
//topaverage
gen topavg_survey=(topshare_survey*meaninc_survey)/((100-(p/1000))/100)
// pareto betas
gen beta_survey=(topavg_survey/survey)

//shares
sum bavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bavg_tax*pop/(meaninc_tax)
//topshares
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/tax)

save "$inputdata/CombinedResults/IndiaInterpTaxSurvey`x'_$strategyA$strategyB$strategyC.dta", replace
}


























// combine tax and survey files - 1992-1993
global lower_thr 80000
global upper_thr 99000
global range3 80990
forval x=1992(1)1993{
use "$inputdata/TaxInterpResults/IndiaInterpTax`x'.dta", clear
joinby p using "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", unmatched(both)
drop _merge

// Profile 1 - linear rise

rename thr_tax tax
rename thr_survey survey 
rename bracketavg_tax bavg_tax 
rename bracketavg_survey bavg_survey

replace bavg_survey=bavg_survey
replace survey=survey


gen weights1=0
replace weights1=1 if p>=$lower_thr & p<$upper_thr
gen a1=tax/survey if p==$upper_thr
egen upfactor_top=mean(a1)
drop a1
gen upfactor1=1
egen weights1sum=total(weights1)

replace upfactor1=upfactor1[_n-1]+(weights1/(weights1sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor1=tax/survey if p>=$upper_thr

gen upfactoravg1=1
replace upfactoravg1=(upfactor1[_n+1]+upfactor1)/2 if weights1!=0
replace upfactoravg1=0 if p>=$upper_thr



// Profile 2 - convex

gen weights2a=0
replace weights2a=1 if p>=$lower_thr & p<$upper_thr
sum weights2a
global length=r(sum)
gen weights2b=sum(weights2a)
replace weights2b=0 if weights2a==0


gen weights2=0
replace weights2=0.05 if weights2b!=0 & weights2b<($length/6)
replace weights2=0.2 if weights2b>=($length/6) & weights2b<(2*$length/6)
replace weights2=0.3 if weights2b>=(2*$length/6) & weights2b<(3*$length/6)
replace weights2=0.4 if weights2b>=(3*$length/6) & weights2b<(4*$length/6)
replace weights2=0.6 if weights2b>=(4*$length/6) & weights2b<(5*$length/6)
replace weights2=0.8 if weights2b>=(5*$length/6) & weights2b<=(6*$length/6)

drop weights2a weights2b
egen weights2sum=total(weights2)

gen upfactor2=1

replace upfactor2=upfactor2[_n-1]+(weights2/(weights2sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor2=tax/survey if p>=$upper_thr

gen upfactoravg2=1
replace upfactoravg2=(upfactor2[_n+1]+upfactor2)/2 if weights2!=0

// Profile 3 - concave

gen weights3a=0
replace weights3a=1 if p>=$lower_thr & p<$upper_thr
sum weights3a
global length=r(sum)
gen weights3b=sum(weights3a)
replace weights3b=0 if weights3a==0


gen weights3=0
replace weights3=6 if weights3b!=0 & weights3b<($length/6)
replace weights3=5 if weights3b>=($length/6) & weights3b<(2*$length/6)
replace weights3=4 if weights3b>=(2*$length/6) & weights3b<(3*$length/6)
replace weights3=3 if weights3b>=(3*$length/6) & weights3b<(4*$length/6)
replace weights3=2 if weights3b>=(4*$length/6) & weights3b<(5*$length/6)
replace weights3=1 if weights3b>=(5*$length/6) & weights3b<=(6*$length/6)



drop weights3a weights3b
egen weights3sum=total(weights3)

gen upfactor3=1

replace upfactor3=upfactor3[_n-1]+(weights3/(weights3sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor3=tax/survey if p>=$upper_thr

gen upfactoravg3=1
replace upfactoravg3=(upfactor3[_n+1]+upfactor3)/2 if weights3!=0


*twoway line (upfactor3 upfactor2 upfactor1 p) if p>=$lower_thr-1

drop weights1sum weights2sum weights3sum


// merge results
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)


gsort - p

foreach n in 1 2 3 {
// thresholds
gen thr_final_`n'=survey
replace thr_final_`n'=survey*upfactor`n' if p>=$lower_thr & p<$upper_thr
replace thr_final_`n'=tax if p>=$upper_thr
// bracketavg
gen bavg_final_`n'=bavg_survey
replace bavg_final_`n'=bavg_survey*upfactoravg`n' if p>=$lower_thr & p<$upper_thr
replace bavg_final_`n'=bavg_tax if p>=$upper_thr
//shares
sum bavg_final_`n' [aw=pop]
gen meaninc_`n'=r(mean)
gen groupshare_`n'=bavg_final_`n'*pop/(meaninc_`n')
//topshares
gen topshare_`n'=sum(groupshare_`n')
//topaverage
gen topavg_`n'=(topshare_`n'*meaninc_`n')/((100-(p/1000))/100)
// pareto betas
gen beta_`n'=(topavg_`n'/thr_final_`n')
}


// Initial series

gsort - p
//shares
sum bavg_survey [aw=pop]
gen meaninc_survey=r(mean)
gen groupshare_survey=bavg_survey*pop/(meaninc_survey)
//topshares
gen topshare_survey=sum(groupshare_survey)
//topaverage
gen topavg_survey=(topshare_survey*meaninc_survey)/((100-(p/1000))/100)
// pareto betas
gen beta_survey=(topavg_survey/survey)

//shares
sum bavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bavg_tax*pop/(meaninc_tax)
//topshares
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/tax)

save "$inputdata/CombinedResults/IndiaInterpTaxSurvey`x'_$strategyA$strategyB$strategyC.dta", replace
}





// combine tax and survey files - 1994-1995
global lower_thr 80000
global upper_thr 99000
global range3 8099
forval x=1994(1)1995{
use "$inputdata/TaxInterpResults/IndiaInterpTax`x'.dta", clear
joinby p using "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", unmatched(both)
drop _merge

// Profile 1 - linear rise

rename thr_tax tax
rename thr_survey survey 
rename bracketavg_tax bavg_tax 
rename bracketavg_survey bavg_survey

replace bavg_survey=bavg_survey
replace survey=survey


gen weights1=0
replace weights1=1 if p>=$lower_thr & p<$upper_thr
gen a1=tax/survey if p==$upper_thr
egen upfactor_top=mean(a1)
drop a1
gen upfactor1=1
egen weights1sum=total(weights1)

replace upfactor1=upfactor1[_n-1]+(weights1/(weights1sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor1=tax/survey if p>=$upper_thr

gen upfactoravg1=1
replace upfactoravg1=(upfactor1[_n+1]+upfactor1)/2 if weights1!=0
replace upfactoravg1=0 if p>=$upper_thr



// Profile 2 - convex

gen weights2a=0
replace weights2a=1 if p>=$lower_thr & p<$upper_thr
sum weights2a
global length=r(sum)
gen weights2b=sum(weights2a)
replace weights2b=0 if weights2a==0


gen weights2=0
replace weights2=0.05 if weights2b!=0 & weights2b<($length/6)
replace weights2=0.2 if weights2b>=($length/6) & weights2b<(2*$length/6)
replace weights2=0.3 if weights2b>=(2*$length/6) & weights2b<(3*$length/6)
replace weights2=0.4 if weights2b>=(3*$length/6) & weights2b<(4*$length/6)
replace weights2=0.6 if weights2b>=(4*$length/6) & weights2b<(5*$length/6)
replace weights2=0.8 if weights2b>=(5*$length/6) & weights2b<=(6*$length/6)

drop weights2a weights2b
egen weights2sum=total(weights2)

gen upfactor2=1

replace upfactor2=upfactor2[_n-1]+(weights2/(weights2sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor2=tax/survey if p>=$upper_thr

gen upfactoravg2=1
replace upfactoravg2=(upfactor2[_n+1]+upfactor2)/2 if weights2!=0

// Profile 3 - concave

gen weights3a=0
replace weights3a=1 if p>=$lower_thr & p<$upper_thr
sum weights3a
global length=r(sum)
gen weights3b=sum(weights3a)
replace weights3b=0 if weights3a==0


gen weights3=0
replace weights3=6 if weights3b!=0 & weights3b<($length/6)
replace weights3=5 if weights3b>=($length/6) & weights3b<(2*$length/6)
replace weights3=4 if weights3b>=(2*$length/6) & weights3b<(3*$length/6)
replace weights3=3 if weights3b>=(3*$length/6) & weights3b<(4*$length/6)
replace weights3=2 if weights3b>=(4*$length/6) & weights3b<(5*$length/6)
replace weights3=1 if weights3b>=(5*$length/6) & weights3b<=(6*$length/6)



drop weights3a weights3b
egen weights3sum=total(weights3)

gen upfactor3=1

replace upfactor3=upfactor3[_n-1]+(weights3/(weights3sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor3=tax/survey if p>=$upper_thr

gen upfactoravg3=1
replace upfactoravg3=(upfactor3[_n+1]+upfactor3)/2 if weights3!=0


*twoway line (upfactor3 upfactor2 upfactor1 p) if p>=$lower_thr-1

drop weights1sum weights2sum weights3sum


// merge results
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)


gsort - p

foreach n in 1 2 3 {
// thresholds
gen thr_final_`n'=survey
replace thr_final_`n'=survey*upfactor`n' if p>=$lower_thr & p<$upper_thr
replace thr_final_`n'=tax if p>=$upper_thr
// bracketavg
gen bavg_final_`n'=bavg_survey
replace bavg_final_`n'=bavg_survey*upfactoravg`n' if p>=$lower_thr & p<$upper_thr
replace bavg_final_`n'=bavg_tax if p>=$upper_thr
//shares
sum bavg_final_`n' [aw=pop]
gen meaninc_`n'=r(mean)
gen groupshare_`n'=bavg_final_`n'*pop/(meaninc_`n')
//topshares
gen topshare_`n'=sum(groupshare_`n')
//topaverage
gen topavg_`n'=(topshare_`n'*meaninc_`n')/((100-(p/1000))/100)
// pareto betas
gen beta_`n'=(topavg_`n'/thr_final_`n')
}


// Initial series

gsort - p
//shares
sum bavg_survey [aw=pop]
gen meaninc_survey=r(mean)
gen groupshare_survey=bavg_survey*pop/(meaninc_survey)
//topshares
gen topshare_survey=sum(groupshare_survey)
//topaverage
gen topavg_survey=(topshare_survey*meaninc_survey)/((100-(p/1000))/100)
// pareto betas
gen beta_survey=(topavg_survey/survey)

//shares
sum bavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bavg_tax*pop/(meaninc_tax)
//topshares
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/tax)

save "$inputdata/CombinedResults/IndiaInterpTaxSurvey`x'_$strategyA$strategyB$strategyC.dta", replace
}





















// combine tax and survey files - 1996
global lower_thr 80000
global upper_thr 99000
global range3 8099
forval x=1996(1)1996{
use "$inputdata/TaxInterpResults/IndiaInterpTax`x'.dta", clear
joinby p using "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", unmatched(both)
drop _merge

// Profile 1 - linear rise

rename thr_tax tax
rename thr_survey survey 
rename bracketavg_tax bavg_tax 
rename bracketavg_survey bavg_survey

replace bavg_survey=bavg_survey
replace survey=survey


gen weights1=0
replace weights1=1 if p>=$lower_thr & p<$upper_thr
gen a1=tax/survey if p==$upper_thr
egen upfactor_top=mean(a1)
drop a1
gen upfactor1=1
egen weights1sum=total(weights1)

replace upfactor1=upfactor1[_n-1]+(weights1/(weights1sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor1=tax/survey if p>=$upper_thr

gen upfactoravg1=1
replace upfactoravg1=(upfactor1[_n+1]+upfactor1)/2 if weights1!=0
replace upfactoravg1=0 if p>=$upper_thr



// Profile 2 - convex

gen weights2a=0
replace weights2a=1 if p>=$lower_thr & p<$upper_thr
sum weights2a
global length=r(sum)
gen weights2b=sum(weights2a)
replace weights2b=0 if weights2a==0


gen weights2=0
replace weights2=0.05 if weights2b!=0 & weights2b<($length/6)
replace weights2=0.2 if weights2b>=($length/6) & weights2b<(2*$length/6)
replace weights2=0.3 if weights2b>=(2*$length/6) & weights2b<(3*$length/6)
replace weights2=0.4 if weights2b>=(3*$length/6) & weights2b<(4*$length/6)
replace weights2=0.6 if weights2b>=(4*$length/6) & weights2b<(5*$length/6)
replace weights2=0.8 if weights2b>=(5*$length/6) & weights2b<=(6*$length/6)

drop weights2a weights2b
egen weights2sum=total(weights2)

gen upfactor2=1

replace upfactor2=upfactor2[_n-1]+(weights2/(weights2sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor2=tax/survey if p>=$upper_thr

gen upfactoravg2=1
replace upfactoravg2=(upfactor2[_n+1]+upfactor2)/2 if weights2!=0

// Profile 3 - concave

gen weights3a=0
replace weights3a=1 if p>=$lower_thr & p<$upper_thr
sum weights3a
global length=r(sum)
gen weights3b=sum(weights3a)
replace weights3b=0 if weights3a==0


gen weights3=0
replace weights3=6 if weights3b!=0 & weights3b<($length/6)
replace weights3=5 if weights3b>=($length/6) & weights3b<(2*$length/6)
replace weights3=4 if weights3b>=(2*$length/6) & weights3b<(3*$length/6)
replace weights3=3 if weights3b>=(3*$length/6) & weights3b<(4*$length/6)
replace weights3=2 if weights3b>=(4*$length/6) & weights3b<(5*$length/6)
replace weights3=1 if weights3b>=(5*$length/6) & weights3b<=(6*$length/6)



drop weights3a weights3b
egen weights3sum=total(weights3)

gen upfactor3=1

replace upfactor3=upfactor3[_n-1]+(weights3/(weights3sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor3=tax/survey if p>=$upper_thr

gen upfactoravg3=1
replace upfactoravg3=(upfactor3[_n+1]+upfactor3)/2 if weights3!=0


*twoway line (upfactor3 upfactor2 upfactor1 p) if p>=$lower_thr-1

drop weights1sum weights2sum weights3sum


// merge results
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)


gsort - p

foreach n in 1 2 3 {
// thresholds
gen thr_final_`n'=survey
replace thr_final_`n'=survey*upfactor`n' if p>=$lower_thr & p<$upper_thr
replace thr_final_`n'=tax if p>=$upper_thr
// bracketavg
gen bavg_final_`n'=bavg_survey
replace bavg_final_`n'=bavg_survey*upfactoravg`n' if p>=$lower_thr & p<$upper_thr
replace bavg_final_`n'=bavg_tax if p>=$upper_thr
//shares
sum bavg_final_`n' [aw=pop]
gen meaninc_`n'=r(mean)
gen groupshare_`n'=bavg_final_`n'*pop/(meaninc_`n')
//topshares
gen topshare_`n'=sum(groupshare_`n')
//topaverage
gen topavg_`n'=(topshare_`n'*meaninc_`n')/((100-(p/1000))/100)
// pareto betas
gen beta_`n'=(topavg_`n'/thr_final_`n')
}


// Initial series

gsort - p
//shares
sum bavg_survey [aw=pop]
gen meaninc_survey=r(mean)
gen groupshare_survey=bavg_survey*pop/(meaninc_survey)
//topshares
gen topshare_survey=sum(groupshare_survey)
//topaverage
gen topavg_survey=(topshare_survey*meaninc_survey)/((100-(p/1000))/100)
// pareto betas
gen beta_survey=(topavg_survey/survey)

//shares
sum bavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bavg_tax*pop/(meaninc_tax)
//topshares
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/tax)

save "$inputdata/CombinedResults/IndiaInterpTaxSurvey`x'_$strategyA$strategyB$strategyC.dta", replace
}





// combine tax and survey files - 1997
global lower_thr 80000
global upper_thr 98000
global range3 8098
forval x=1997(1)1997{
use "$inputdata/TaxInterpResults/IndiaInterpTax`x'.dta", clear
joinby p using "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", unmatched(both)
drop _merge

// Profile 1 - linear rise

rename thr_tax tax
rename thr_survey survey 
rename bracketavg_tax bavg_tax 
rename bracketavg_survey bavg_survey

replace bavg_survey=bavg_survey
replace survey=survey


gen weights1=0
replace weights1=1 if p>=$lower_thr & p<$upper_thr
gen a1=tax/survey if p==$upper_thr
egen upfactor_top=mean(a1)
drop a1
gen upfactor1=1
egen weights1sum=total(weights1)

replace upfactor1=upfactor1[_n-1]+(weights1/(weights1sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor1=tax/survey if p>=$upper_thr

gen upfactoravg1=1
replace upfactoravg1=(upfactor1[_n+1]+upfactor1)/2 if weights1!=0
replace upfactoravg1=0 if p>=$upper_thr



// Profile 2 - convex

gen weights2a=0
replace weights2a=1 if p>=$lower_thr & p<$upper_thr
sum weights2a
global length=r(sum)
gen weights2b=sum(weights2a)
replace weights2b=0 if weights2a==0


gen weights2=0
replace weights2=0.05 if weights2b!=0 & weights2b<($length/6)
replace weights2=0.2 if weights2b>=($length/6) & weights2b<(2*$length/6)
replace weights2=0.3 if weights2b>=(2*$length/6) & weights2b<(3*$length/6)
replace weights2=0.4 if weights2b>=(3*$length/6) & weights2b<(4*$length/6)
replace weights2=0.6 if weights2b>=(4*$length/6) & weights2b<(5*$length/6)
replace weights2=0.8 if weights2b>=(5*$length/6) & weights2b<=(6*$length/6)

drop weights2a weights2b
egen weights2sum=total(weights2)

gen upfactor2=1

replace upfactor2=upfactor2[_n-1]+(weights2/(weights2sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor2=tax/survey if p>=$upper_thr

gen upfactoravg2=1
replace upfactoravg2=(upfactor2[_n+1]+upfactor2)/2 if weights2!=0

// Profile 3 - concave

gen weights3a=0
replace weights3a=1 if p>=$lower_thr & p<$upper_thr
sum weights3a
global length=r(sum)
gen weights3b=sum(weights3a)
replace weights3b=0 if weights3a==0


gen weights3=0
replace weights3=6 if weights3b!=0 & weights3b<($length/6)
replace weights3=5 if weights3b>=($length/6) & weights3b<(2*$length/6)
replace weights3=4 if weights3b>=(2*$length/6) & weights3b<(3*$length/6)
replace weights3=3 if weights3b>=(3*$length/6) & weights3b<(4*$length/6)
replace weights3=2 if weights3b>=(4*$length/6) & weights3b<(5*$length/6)
replace weights3=1 if weights3b>=(5*$length/6) & weights3b<=(6*$length/6)



drop weights3a weights3b
egen weights3sum=total(weights3)

gen upfactor3=1

replace upfactor3=upfactor3[_n-1]+(weights3/(weights3sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor3=tax/survey if p>=$upper_thr

gen upfactoravg3=1
replace upfactoravg3=(upfactor3[_n+1]+upfactor3)/2 if weights3!=0


*twoway line (upfactor3 upfactor2 upfactor1 p) if p>=$lower_thr-1

drop weights1sum weights2sum weights3sum


// merge results
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)


gsort - p

foreach n in 1 2 3 {
// thresholds
gen thr_final_`n'=survey
replace thr_final_`n'=survey*upfactor`n' if p>=$lower_thr & p<$upper_thr
replace thr_final_`n'=tax if p>=$upper_thr
// bracketavg
gen bavg_final_`n'=bavg_survey
replace bavg_final_`n'=bavg_survey*upfactoravg`n' if p>=$lower_thr & p<$upper_thr
replace bavg_final_`n'=bavg_tax if p>=$upper_thr
//shares
sum bavg_final_`n' [aw=pop]
gen meaninc_`n'=r(mean)
gen groupshare_`n'=bavg_final_`n'*pop/(meaninc_`n')
//topshares
gen topshare_`n'=sum(groupshare_`n')
//topaverage
gen topavg_`n'=(topshare_`n'*meaninc_`n')/((100-(p/1000))/100)
// pareto betas
gen beta_`n'=(topavg_`n'/thr_final_`n')
}


// Initial series

gsort - p
//shares
sum bavg_survey [aw=pop]
gen meaninc_survey=r(mean)
gen groupshare_survey=bavg_survey*pop/(meaninc_survey)
//topshares
gen topshare_survey=sum(groupshare_survey)
//topaverage
gen topavg_survey=(topshare_survey*meaninc_survey)/((100-(p/1000))/100)
// pareto betas
gen beta_survey=(topavg_survey/survey)

//shares
sum bavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bavg_tax*pop/(meaninc_tax)
//topshares
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/tax)

save "$inputdata/CombinedResults/IndiaInterpTaxSurvey`x'_$strategyA$strategyB$strategyC.dta", replace
}










// combine tax and survey files - 1998-2010
global lower_thr 80000
global upper_thr 97000
global range3 8097
forval x=1998(1)2010{
use "$inputdata/TaxInterpResults/IndiaInterpTax`x'.dta", clear
joinby p using "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", unmatched(both)
drop _merge

// Profile 1 - linear rise

rename thr_tax tax
rename thr_survey survey 
rename bracketavg_tax bavg_tax 
rename bracketavg_survey bavg_survey

replace bavg_survey=bavg_survey
replace survey=survey


gen weights1=0
replace weights1=1 if p>=$lower_thr & p<$upper_thr
gen a1=tax/survey if p==$upper_thr
egen upfactor_top=mean(a1)
drop a1
gen upfactor1=1
egen weights1sum=total(weights1)

replace upfactor1=upfactor1[_n-1]+(weights1/(weights1sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor1=tax/survey if p>=$upper_thr

gen upfactoravg1=1
replace upfactoravg1=(upfactor1[_n+1]+upfactor1)/2 if weights1!=0
replace upfactoravg1=0 if p>=$upper_thr



// Profile 2 - convex

gen weights2a=0
replace weights2a=1 if p>=$lower_thr & p<$upper_thr
sum weights2a
global length=r(sum)
gen weights2b=sum(weights2a)
replace weights2b=0 if weights2a==0


gen weights2=0
replace weights2=0.05 if weights2b!=0 & weights2b<($length/6)
replace weights2=0.2 if weights2b>=($length/6) & weights2b<(2*$length/6)
replace weights2=0.3 if weights2b>=(2*$length/6) & weights2b<(3*$length/6)
replace weights2=0.4 if weights2b>=(3*$length/6) & weights2b<(4*$length/6)
replace weights2=0.6 if weights2b>=(4*$length/6) & weights2b<(5*$length/6)
replace weights2=0.8 if weights2b>=(5*$length/6) & weights2b<=(6*$length/6)

drop weights2a weights2b
egen weights2sum=total(weights2)

gen upfactor2=1

replace upfactor2=upfactor2[_n-1]+(weights2/(weights2sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor2=tax/survey if p>=$upper_thr

gen upfactoravg2=1
replace upfactoravg2=(upfactor2[_n+1]+upfactor2)/2 if weights2!=0

// Profile 3 - concave

gen weights3a=0
replace weights3a=1 if p>=$lower_thr & p<$upper_thr
sum weights3a
global length=r(sum)
gen weights3b=sum(weights3a)
replace weights3b=0 if weights3a==0


gen weights3=0
replace weights3=6 if weights3b!=0 & weights3b<($length/6)
replace weights3=5 if weights3b>=($length/6) & weights3b<(2*$length/6)
replace weights3=4 if weights3b>=(2*$length/6) & weights3b<(3*$length/6)
replace weights3=3 if weights3b>=(3*$length/6) & weights3b<(4*$length/6)
replace weights3=2 if weights3b>=(4*$length/6) & weights3b<(5*$length/6)
replace weights3=1 if weights3b>=(5*$length/6) & weights3b<=(6*$length/6)



drop weights3a weights3b
egen weights3sum=total(weights3)

gen upfactor3=1

replace upfactor3=upfactor3[_n-1]+(weights3/(weights3sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor3=tax/survey if p>=$upper_thr

gen upfactoravg3=1
replace upfactoravg3=(upfactor3[_n+1]+upfactor3)/2 if weights3!=0


*twoway line (upfactor3 upfactor2 upfactor1 p) if p>=$lower_thr-1

drop weights1sum weights2sum weights3sum


// merge results
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)


gsort - p

foreach n in 1 2 3 {
// thresholds
gen thr_final_`n'=survey
replace thr_final_`n'=survey*upfactor`n' if p>=$lower_thr & p<$upper_thr
replace thr_final_`n'=tax if p>=$upper_thr
// bracketavg
gen bavg_final_`n'=bavg_survey
replace bavg_final_`n'=bavg_survey*upfactoravg`n' if p>=$lower_thr & p<$upper_thr
replace bavg_final_`n'=bavg_tax if p>=$upper_thr
//shares
sum bavg_final_`n' [aw=pop]
gen meaninc_`n'=r(mean)
gen groupshare_`n'=bavg_final_`n'*pop/(meaninc_`n')
//topshares
gen topshare_`n'=sum(groupshare_`n')
//topaverage
gen topavg_`n'=(topshare_`n'*meaninc_`n')/((100-(p/1000))/100)
// pareto betas
gen beta_`n'=(topavg_`n'/thr_final_`n')
}


// Initial series

gsort - p
//shares
sum bavg_survey [aw=pop]
gen meaninc_survey=r(mean)
gen groupshare_survey=bavg_survey*pop/(meaninc_survey)
//topshares
gen topshare_survey=sum(groupshare_survey)
//topaverage
gen topavg_survey=(topshare_survey*meaninc_survey)/((100-(p/1000))/100)
// pareto betas
gen beta_survey=(topavg_survey/survey)

//shares
sum bavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bavg_tax*pop/(meaninc_tax)
//topshares
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/tax)

save "$inputdata/CombinedResults/IndiaInterpTaxSurvey`x'_$strategyA$strategyB$strategyC.dta", replace
}













// combine tax and survey files - 2011
global lower_thr 80000
global upper_thr 95000
global range3 8095
forval x=2011(1)2011{
use "$inputdata/TaxInterpResults/IndiaInterpTax`x'.dta", clear
joinby p using "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", unmatched(both)
drop _merge

// Profile 1 - linear rise

rename thr_tax tax
rename thr_survey survey 
rename bracketavg_tax bavg_tax 
rename bracketavg_survey bavg_survey

replace bavg_survey=bavg_survey
replace survey=survey


gen weights1=0
replace weights1=1 if p>=$lower_thr & p<$upper_thr
gen a1=tax/survey if p==$upper_thr
egen upfactor_top=mean(a1)
drop a1
gen upfactor1=1
egen weights1sum=total(weights1)

replace upfactor1=upfactor1[_n-1]+(weights1/(weights1sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor1=tax/survey if p>=$upper_thr

gen upfactoravg1=1
replace upfactoravg1=(upfactor1[_n+1]+upfactor1)/2 if weights1!=0
replace upfactoravg1=0 if p>=$upper_thr



// Profile 2 - convex

gen weights2a=0
replace weights2a=1 if p>=$lower_thr & p<$upper_thr
sum weights2a
global length=r(sum)
gen weights2b=sum(weights2a)
replace weights2b=0 if weights2a==0


gen weights2=0
replace weights2=0.05 if weights2b!=0 & weights2b<($length/6)
replace weights2=0.2 if weights2b>=($length/6) & weights2b<(2*$length/6)
replace weights2=0.3 if weights2b>=(2*$length/6) & weights2b<(3*$length/6)
replace weights2=0.4 if weights2b>=(3*$length/6) & weights2b<(4*$length/6)
replace weights2=0.6 if weights2b>=(4*$length/6) & weights2b<(5*$length/6)
replace weights2=0.8 if weights2b>=(5*$length/6) & weights2b<=(6*$length/6)

drop weights2a weights2b
egen weights2sum=total(weights2)

gen upfactor2=1

replace upfactor2=upfactor2[_n-1]+(weights2/(weights2sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor2=tax/survey if p>=$upper_thr

gen upfactoravg2=1
replace upfactoravg2=(upfactor2[_n+1]+upfactor2)/2 if weights2!=0

// Profile 3 - concave

gen weights3a=0
replace weights3a=1 if p>=$lower_thr & p<$upper_thr
sum weights3a
global length=r(sum)
gen weights3b=sum(weights3a)
replace weights3b=0 if weights3a==0


gen weights3=0
replace weights3=6 if weights3b!=0 & weights3b<($length/6)
replace weights3=5 if weights3b>=($length/6) & weights3b<(2*$length/6)
replace weights3=4 if weights3b>=(2*$length/6) & weights3b<(3*$length/6)
replace weights3=3 if weights3b>=(3*$length/6) & weights3b<(4*$length/6)
replace weights3=2 if weights3b>=(4*$length/6) & weights3b<(5*$length/6)
replace weights3=1 if weights3b>=(5*$length/6) & weights3b<=(6*$length/6)



drop weights3a weights3b
egen weights3sum=total(weights3)

gen upfactor3=1

replace upfactor3=upfactor3[_n-1]+(weights3/(weights3sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor3=tax/survey if p>=$upper_thr

gen upfactoravg3=1
replace upfactoravg3=(upfactor3[_n+1]+upfactor3)/2 if weights3!=0


*twoway line (upfactor3 upfactor2 upfactor1 p) if p>=$lower_thr-1

drop weights1sum weights2sum weights3sum


// merge results
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)


gsort - p

foreach n in 1 2 3 {
// thresholds
gen thr_final_`n'=survey
replace thr_final_`n'=survey*upfactor`n' if p>=$lower_thr & p<$upper_thr
replace thr_final_`n'=tax if p>=$upper_thr
// bracketavg
gen bavg_final_`n'=bavg_survey
replace bavg_final_`n'=bavg_survey*upfactoravg`n' if p>=$lower_thr & p<$upper_thr
replace bavg_final_`n'=bavg_tax if p>=$upper_thr
//shares
sum bavg_final_`n' [aw=pop]
gen meaninc_`n'=r(mean)
gen groupshare_`n'=bavg_final_`n'*pop/(meaninc_`n')
//topshares
gen topshare_`n'=sum(groupshare_`n')
//topaverage
gen topavg_`n'=(topshare_`n'*meaninc_`n')/((100-(p/1000))/100)
// pareto betas
gen beta_`n'=(topavg_`n'/thr_final_`n')
}


// Initial series

gsort - p
//shares
sum bavg_survey [aw=pop]
gen meaninc_survey=r(mean)
gen groupshare_survey=bavg_survey*pop/(meaninc_survey)
//topshares
gen topshare_survey=sum(groupshare_survey)
//topaverage
gen topavg_survey=(topshare_survey*meaninc_survey)/((100-(p/1000))/100)
// pareto betas
gen beta_survey=(topavg_survey/survey)

//shares
sum bavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bavg_tax*pop/(meaninc_tax)
//topshares
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/tax)

save "$inputdata/CombinedResults/IndiaInterpTaxSurvey`x'_$strategyA$strategyB$strategyC.dta", replace
}
















// combine tax and survey files - 2012
global lower_thr 80000
global upper_thr 94000
global range3 8094
forval x=2012(1)2012{
use "$inputdata/TaxInterpResults/IndiaInterpTax`x'.dta", clear
joinby p using "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", unmatched(both)
drop _merge

// Profile 1 - linear rise

rename thr_tax tax
rename thr_survey survey 
rename bracketavg_tax bavg_tax 
rename bracketavg_survey bavg_survey

replace bavg_survey=bavg_survey
replace survey=survey


gen weights1=0
replace weights1=1 if p>=$lower_thr & p<$upper_thr
gen a1=tax/survey if p==$upper_thr
egen upfactor_top=mean(a1)
drop a1
gen upfactor1=1
egen weights1sum=total(weights1)

replace upfactor1=upfactor1[_n-1]+(weights1/(weights1sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor1=tax/survey if p>=$upper_thr

gen upfactoravg1=1
replace upfactoravg1=(upfactor1[_n+1]+upfactor1)/2 if weights1!=0
replace upfactoravg1=0 if p>=$upper_thr



// Profile 2 - convex

gen weights2a=0
replace weights2a=1 if p>=$lower_thr & p<$upper_thr
sum weights2a
global length=r(sum)
gen weights2b=sum(weights2a)
replace weights2b=0 if weights2a==0


gen weights2=0
replace weights2=0.05 if weights2b!=0 & weights2b<($length/6)
replace weights2=0.2 if weights2b>=($length/6) & weights2b<(2*$length/6)
replace weights2=0.3 if weights2b>=(2*$length/6) & weights2b<(3*$length/6)
replace weights2=0.4 if weights2b>=(3*$length/6) & weights2b<(4*$length/6)
replace weights2=0.6 if weights2b>=(4*$length/6) & weights2b<(5*$length/6)
replace weights2=0.8 if weights2b>=(5*$length/6) & weights2b<=(6*$length/6)

drop weights2a weights2b
egen weights2sum=total(weights2)

gen upfactor2=1

replace upfactor2=upfactor2[_n-1]+(weights2/(weights2sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor2=tax/survey if p>=$upper_thr

gen upfactoravg2=1
replace upfactoravg2=(upfactor2[_n+1]+upfactor2)/2 if weights2!=0

// Profile 3 - concave

gen weights3a=0
replace weights3a=1 if p>=$lower_thr & p<$upper_thr
sum weights3a
global length=r(sum)
gen weights3b=sum(weights3a)
replace weights3b=0 if weights3a==0


gen weights3=0
replace weights3=6 if weights3b!=0 & weights3b<($length/6)
replace weights3=5 if weights3b>=($length/6) & weights3b<(2*$length/6)
replace weights3=4 if weights3b>=(2*$length/6) & weights3b<(3*$length/6)
replace weights3=3 if weights3b>=(3*$length/6) & weights3b<(4*$length/6)
replace weights3=2 if weights3b>=(4*$length/6) & weights3b<(5*$length/6)
replace weights3=1 if weights3b>=(5*$length/6) & weights3b<=(6*$length/6)



drop weights3a weights3b
egen weights3sum=total(weights3)

gen upfactor3=1

replace upfactor3=upfactor3[_n-1]+(weights3/(weights3sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor3=tax/survey if p>=$upper_thr

gen upfactoravg3=1
replace upfactoravg3=(upfactor3[_n+1]+upfactor3)/2 if weights3!=0


*twoway line (upfactor3 upfactor2 upfactor1 p) if p>=$lower_thr-1

drop weights1sum weights2sum weights3sum


// merge results
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)


gsort - p

foreach n in 1 2 3 {
// thresholds
gen thr_final_`n'=survey
replace thr_final_`n'=survey*upfactor`n' if p>=$lower_thr & p<$upper_thr
replace thr_final_`n'=tax if p>=$upper_thr
// bracketavg
gen bavg_final_`n'=bavg_survey
replace bavg_final_`n'=bavg_survey*upfactoravg`n' if p>=$lower_thr & p<$upper_thr
replace bavg_final_`n'=bavg_tax if p>=$upper_thr
//shares
sum bavg_final_`n' [aw=pop]
gen meaninc_`n'=r(mean)
gen groupshare_`n'=bavg_final_`n'*pop/(meaninc_`n')
//topshares
gen topshare_`n'=sum(groupshare_`n')
//topaverage
gen topavg_`n'=(topshare_`n'*meaninc_`n')/((100-(p/1000))/100)
// pareto betas
gen beta_`n'=(topavg_`n'/thr_final_`n')
}


// Initial series

gsort - p
//shares
sum bavg_survey [aw=pop]
gen meaninc_survey=r(mean)
gen groupshare_survey=bavg_survey*pop/(meaninc_survey)
//topshares
gen topshare_survey=sum(groupshare_survey)
//topaverage
gen topavg_survey=(topshare_survey*meaninc_survey)/((100-(p/1000))/100)
// pareto betas
gen beta_survey=(topavg_survey/survey)

//shares
sum bavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bavg_tax*pop/(meaninc_tax)
//topshares
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/tax)

save "$inputdata/CombinedResults/IndiaInterpTaxSurvey`x'_$strategyA$strategyB$strategyC.dta", replace
}















// combine tax and survey files - 2013
global lower_thr 80000
global upper_thr 93000
global range3 8093
forval x=2013(1)2014{
use "$inputdata/TaxInterpResults/IndiaInterpTax`x'.dta", clear
joinby p using "$inputdata/SurveyInterpResults/IndiaInterpSurveyFinal`x'.dta", unmatched(both)
drop _merge

// Profile 1 - linear rise

rename thr_tax tax
rename thr_survey survey 
rename bracketavg_tax bavg_tax 
rename bracketavg_survey bavg_survey

replace bavg_survey=bavg_survey
replace survey=survey


gen weights1=0
replace weights1=1 if p>=$lower_thr & p<$upper_thr
gen a1=tax/survey if p==$upper_thr
egen upfactor_top=mean(a1)
drop a1
gen upfactor1=1
egen weights1sum=total(weights1)

replace upfactor1=upfactor1[_n-1]+(weights1/(weights1sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor1=tax/survey if p>=$upper_thr

gen upfactoravg1=1
replace upfactoravg1=(upfactor1[_n+1]+upfactor1)/2 if weights1!=0
replace upfactoravg1=0 if p>=$upper_thr



// Profile 2 - convex

gen weights2a=0
replace weights2a=1 if p>=$lower_thr & p<$upper_thr
sum weights2a
global length=r(sum)
gen weights2b=sum(weights2a)
replace weights2b=0 if weights2a==0


gen weights2=0
replace weights2=0.05 if weights2b!=0 & weights2b<($length/6)
replace weights2=0.2 if weights2b>=($length/6) & weights2b<(2*$length/6)
replace weights2=0.3 if weights2b>=(2*$length/6) & weights2b<(3*$length/6)
replace weights2=0.4 if weights2b>=(3*$length/6) & weights2b<(4*$length/6)
replace weights2=0.6 if weights2b>=(4*$length/6) & weights2b<(5*$length/6)
replace weights2=0.8 if weights2b>=(5*$length/6) & weights2b<=(6*$length/6)

drop weights2a weights2b
egen weights2sum=total(weights2)

gen upfactor2=1

replace upfactor2=upfactor2[_n-1]+(weights2/(weights2sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor2=tax/survey if p>=$upper_thr

gen upfactoravg2=1
replace upfactoravg2=(upfactor2[_n+1]+upfactor2)/2 if weights2!=0

// Profile 3 - concave

gen weights3a=0
replace weights3a=1 if p>=$lower_thr & p<$upper_thr
sum weights3a
global length=r(sum)
gen weights3b=sum(weights3a)
replace weights3b=0 if weights3a==0


gen weights3=0
replace weights3=6 if weights3b!=0 & weights3b<($length/6)
replace weights3=5 if weights3b>=($length/6) & weights3b<(2*$length/6)
replace weights3=4 if weights3b>=(2*$length/6) & weights3b<(3*$length/6)
replace weights3=3 if weights3b>=(3*$length/6) & weights3b<(4*$length/6)
replace weights3=2 if weights3b>=(4*$length/6) & weights3b<(5*$length/6)
replace weights3=1 if weights3b>=(5*$length/6) & weights3b<=(6*$length/6)



drop weights3a weights3b
egen weights3sum=total(weights3)

gen upfactor3=1

replace upfactor3=upfactor3[_n-1]+(weights3/(weights3sum+1))*(upfactor_top-1) if p>=$lower_thr & p<$upper_thr
replace upfactor3=tax/survey if p>=$upper_thr

gen upfactoravg3=1
replace upfactoravg3=(upfactor3[_n+1]+upfactor3)/2 if weights3!=0


*twoway line (upfactor3 upfactor2 upfactor1 p) if p>=$lower_thr-1

drop weights1sum weights2sum weights3sum


// merge results
gen pop=.01
replace pop=0.001 if p>=99000 & p<99900
replace pop=0.0001 if p>=99900 & p<99990
replace pop=0.00001 if p>=99990
egen poptot=sum(pop)


gsort - p

foreach n in 1 2 3 {
// thresholds
gen thr_final_`n'=survey
replace thr_final_`n'=survey*upfactor`n' if p>=$lower_thr & p<$upper_thr
replace thr_final_`n'=tax if p>=$upper_thr
// bracketavg
gen bavg_final_`n'=bavg_survey
replace bavg_final_`n'=bavg_survey*upfactoravg`n' if p>=$lower_thr & p<$upper_thr
replace bavg_final_`n'=bavg_tax if p>=$upper_thr
//shares
sum bavg_final_`n' [aw=pop]
gen meaninc_`n'=r(mean)
gen groupshare_`n'=bavg_final_`n'*pop/(meaninc_`n')
//topshares
gen topshare_`n'=sum(groupshare_`n')
//topaverage
gen topavg_`n'=(topshare_`n'*meaninc_`n')/((100-(p/1000))/100)
// pareto betas
gen beta_`n'=(topavg_`n'/thr_final_`n')
}


// Initial series

gsort - p
//shares
sum bavg_survey [aw=pop]
gen meaninc_survey=r(mean)
gen groupshare_survey=bavg_survey*pop/(meaninc_survey)
//topshares
gen topshare_survey=sum(groupshare_survey)
//topaverage
gen topavg_survey=(topshare_survey*meaninc_survey)/((100-(p/1000))/100)
// pareto betas
gen beta_survey=(topavg_survey/survey)

//shares
sum bavg_tax [aw=pop]
gen meaninc_tax=r(mean)
gen groupshare_tax=bavg_tax*pop/(meaninc_tax)
//topshares
gen topshare_tax=sum(groupshare_tax)
//topaverage
gen topavg_tax=(topshare_tax*meaninc_tax)/((100-(p/1000))/100)
// pareto betas
gen beta_tax=(topavg_tax/tax)

save "$inputdata/CombinedResults/IndiaInterpTaxSurvey`x'_$strategyA$strategyB$strategyC.dta", replace
}

