


use "$inputdata/India_Current_NNI_WID.dta", clear

// Calculating Growth rate
gen gdp_rate=(mnninc999i_currentLCU-mnninc999i_currentLCU[_n-1])*100/mnninc999i_currentLCU[_n-1]
rename year Year
tempfile new_NNI
format mnninc999i_currentLCU %16.0g
saveold `new_NNI'

// Importing the created Raw file using BP tables
clear
*check here
// NB globals defined in India_Main
//import excel "$RawTaxData", sheet("HarmonizedTab") firstrow
import excel "$RawTaxData", sheet("HarmonizedTab_payersnonfilers1") firstrow
//import excel "$RawTaxData", sheet("HarmonizedTab_payersnonfilers3") firstrow
//import excel "$RawTaxData", sheet("HarmonizedTab") firstrow
//import excel "$RawTaxData", sheet("HarmonizedTab_Returned") firstrow
drop if Year=="" 
drop if Year=="Year" | Year=="year"
 

// Important Variables- Year, Threshold, Number of Tax Returns Gross Income 
// Calculating the variables
foreach var in Year-Gross_Income_Tax {
destring `var', replace
}

foreach var in N_Tax_Returns Gross_Income_Tax {
replace `var'=0 if `var'==.
}
rename Gross_Income_Tax Gross_Income
label var Gross_Income "Gross Income from IT file"

*drop total_income_tax
// Correcting the wrongly formatted-- After 1956- the income is multiplied by 1000 already ..
replace Gross_Income=Gross_Income/1000 if Year>=1957
replace N_Tax_Returns=round(N_Tax_Returns)

// 
bysort Year : egen Gross_Annual_income=total(Gross_Income*1000) // 1081
label var Gross_Annual_income "=Gross Income*1000"
format Gross_Annual_income %16.0g
rename Year AY
gen Year=AY-1
tempfile file1
saveold `file1'

clear
import excel "$RawTaxData", sheet("Macro_data") firstrow
destring Year, replace
drop if Year==.
drop year I-S

// Bringing the GDP growth rates
merge 1:m Year using `new_NNI'
keep if _m==3
drop _merge

// Estimating the new total income control
gen tot_income_control=National_incometaxunit*Ntaxunits*1000000
format tot_income_control %16.0g
gen tot_income_control_rate=(tot_income_control-tot_income_control[_n-1])*100/tot_income_control[_n-1]
gsort - Year
gen mnninc999i_partial=mnninc999i_currentLCU
foreach year of numlist 1949/1922{
replace mnninc999i_currentLCU=mnninc999i_currentLCU[_n-1]/(1+(gdp_growth_rate_Siva/100)) if Year==`year'
}

gen revised_income_control=mnninc999i_currentLCU*0.7
//graph twoway (connected revised_income_control Year) (connected tot_income_control Year) if Year<1950

 
// Checking the difference in old and new national income
gen check=National_incometaxunit*Ntaxunits*1000000
gen diff=(revised_income_control-check)*100/check
drop check diff

joinby Year using "$adult_pop"

save "$inputdata/incomecontrol.dta", replace

keep Year adult
rename Year year
save "$inputdata/adultpop.dta", replace

use "$inputdata/incomecontrol.dta", clear

// we use 20+ control population after 1983, 15+ before

gen NtaxunitsOld=Ntaxunits
gen NtaxunitsAdults=adult/1000000
gen NtaxunitsWorking=working/1000000
gen NtaxunitsMixed=working/1000000 if Year <=1983
replace NtaxunitsMixed=adult/1000000 if Year >1983


gen income_control_adult=revised_income_control/(NtaxunitsAdults*1000000)
foreach year of numlist 1922/2015{
sum income_control_adult if Year==`year'
global income_control_`year'=r(mean)
}


save "$inputdata/incomecontrol.dta", replace
use "$inputdata/incomecontrol.dta", clear


// Instead of using income control BP, we use income control given by survey and tax files combination
// In practice, does not really alter the result 

joinby Year using "$inputdata/incomecontrolnewseries.dta", unmatched (both)

drop _merge n year growth_adult_control
gen revised_income_control2=meaninc_1*adult
br revised_income_control revised_income_control2
replace revised_income_control=revised_income_control2
drop revised_income_control2 meaninc_1

// growth rate of national income between 2014-15 and 2015-16
replace revised_income_control=revised_income_control[3]*1.136 if Year==2014


// Calculate the total number of tax returns using..
merge 1:m Year using `file1' // 1093 - 12 years for which the Income data is unavailbale. Only the Population data is available.
sort Year thr
drop if _merge!=3 // Dropping the years for which we dont have the IT data
drop _merge


drop adult working

bys Year: egen totalReturns=total(N_Tax_Returns)
replace totalReturns=totalReturns/1000000
gen shareReturnsOld=100*(totalReturns/NtaxunitsOld)
label var shareReturnsOld "BP population control = 60% total population"
gen shareReturnsAdults=100*(totalReturns/NtaxunitsAdults)
label var shareReturnsAdults "Population over Age 20"
gen shareReturnsWorking=100*(totalReturns/NtaxunitsWorking)
label var shareReturnsWorking "Population over Age 15"
gen shareReturnsMixed=100*(totalReturns/NtaxunitsMixed)
label var shareReturnsMixed "Population over Age 20 after 1983"


save "$inputdata/NTaxpayers.dta", replace



// We take the mixed scenario for total tax returns

replace Ntaxunits=NtaxunitsAdults

*pause on 
*pause N TAX UNITS is ADULTS
*pause off 

// Finding the non-tax filers average income. Steps followed- 1) Calculate the Non-tax filers=Total Population control-Total Tax filers 2) income of non tax filers is total income control minus income of tax filers.
 
bysort Year :egen Total_Tax_Returns=total(N_Tax_Returns)
gen non_tax_filers=(Ntaxunits*1000000-Total_Tax_Returns) 
format non_tax_filers %16.0g

gen avg_non_tax_filers=(revised_income_control-Gross_Annual_income)/non_tax_filers
gen Total_tax_units=non_tax_filers+Total_Tax_Returns // 
format Total_tax_units %16.0g
label var Total_tax_units "=Ntaxunits*1000000"


// Creating the important variables as per the requirement of website.
gen country ="India"
gen avg=revised_income_control/Total_tax_units
gen topavg=avg if thr==0

// Checking Bracket averages
gen tax_filers_avg=Gross_Income*1000/N_Tax_Returns
gen check=1 if tax_filers_avg<thr
by Year: gen check1=1 if tax_filers_avg<thr[_n+1]
drop check check1

// 
 gen check=1 if thr!=0 & thr<avg_non_tax_filers
  gen dummyvar1=0
 gen dummyvar2=0
 replace dummyvar1=N_Tax_Returns if check==1
 replace dummyvar2=Gross_Income if check==1
 
 
 replace non_tax_filers=non_tax_filers+dummyvar1[_n-1]
 // Generating non-tax filers average
 replace avg_non_tax_filers=(revised_income_control-Gross_Annual_income+dummyvar2[_n-1]*1000)/non_tax_filers 

 drop if check==1
 drop check dummyvar1 dummyvar2 
 
  // issue 1922
 sum avg_non_tax_filers if Year==1922
 replace avg_non_tax_filers=r(mean) if Year==1922

 
 gen bracketavg=tax_filers_avg
 replace bracketavg=avg_non_tax_filers if bracketavg==.
 
 
 
 // Finding top average
  
foreach var in N_Tax_Returns Gross_Income {
 bysort Year: gen cum_`var'=sum(`var')
 bysort Year: egen cum1_`var'=total(`var')
 gen cum2_`var'=cum1_`var'-cum_`var'
 gen cum3_`var'=cum2[_n-1] 
 drop cum1* cum2* cum_*
 } 

 replace topavg=cum3_Gross_Income*1000/cum3_N_Tax_Returns if topavg==.
 drop cum*
 
 //  calculate p. 
 set matsize 1000
 set type double
 gen double p=0 if thr==0  
 gen double p1=(non_tax_filers)/(Total_tax_units)  
 gen double p1_1=p1[_n+1]  
 replace p1_1=. if p!=0 // Keeping only the first 
 replace p=p1_1[_n-1] if p==. 

 gen double p2= N_Tax_Returns/(Total_tax_units) 
 *format p2 %16.0
 gen double p3=p2[_n-1]
 
 gen double p3_1=max(p,p3) if p!=0
 replace p3_1=0 if p3_1==.
  
 by Year: gen double p4 =sum(p3_1)
 replace p=p4
 drop p1_1 p1 p2 p3 p3_1 p4 
 
 // Formatting
  keep country avg p thr Year topavg bracketavg
 order Year country avg p thr topavg bracketavg
 rename avg average 
 rename Year year

 tempfile test
 saveold `test'
 
  
 // Generating the raw files for Pareto interpolation
 
 
 forvalues var = 1922(1)1997{
clear
use `test'
capture noisily keep if year==`var'
*tostring Year, replace
 capture noisily replace year=. if thr!=0
 capture noisily replace country="" if thr!=0
 capture noisily replace average=. if thr!=0
 drop bracketavg
 capture noisily export excel using "$genfiles/India `var'.xlsx", sheet("India `var'") firstrow(variables) replace
 }

  // remaining years
 foreach var in  1998 2011 2012{
clear
use `test'
capture noisily keep if year==`var'
*tostring Year, replace
 capture noisily replace year=. if thr!=0
 capture noisily replace country="" if thr!=0
 capture noisily replace average=. if thr!=0
  drop topavg
 capture noisily export excel using "$genfiles/India `var'.xlsx", sheet("India `var'") firstrow(variables) replace
 }
 


foreach var in 2013 2014 { 
clear
use `test'
capture noisily keep if year==`var'
*tostring Year, replace
 capture noisily replace year=. if thr!=0
 capture noisily replace country="" if thr!=0
 capture noisily replace average=. if thr!=0
 drop bracketavg
 capture noisily export excel using "$genfiles/India `var'.xlsx", sheet("India `var'") firstrow(variables) replace
 }



 // if "HarmonizedTab_payersnonfilers3" is used above, use bit of code below instead of code just above
 /*
   // remaining years
 foreach var in 2013  {
clear
use `test'
capture noisily keep if year==`var'
*tostring Year, replace
 capture noisily replace year=. if thr!=0
 capture noisily replace country="" if thr!=0
 capture noisily replace average=. if thr!=0
  drop topavg
 capture noisily export excel using "$genfiles/India `var'.xlsx", sheet("India `var'") firstrow(variables) replace
 }
 
 */
 // Issue with 1973 file, corrected below
import excel using "$genfiles/India 1973.xlsx", firstrow clear
drop in 17
export excel using "$genfiles/India 1973.xlsx", sheet("India 1973") firstrow(variables) replace
