* Code to clean the Fiscal Data Shared by the Ministry of Finance * 


clear all
set more off

cd "/Users/lydiaassouad/Dropbox/WID_MiddleEast/Assouad2017Lebanon/Assouad2017DistributionSeries/FiscalData"



********************************************************************************
********************************************************************************
**** Cleaning v2 replacing inconsistent values by 0, keeping other amounts *****
********************************************************************************
********************************************************************************

clear  
use "LebanonRaw_new.dta"

*drop duplicate variables 
*drop BPT_REV SEQ_NO INH*
*We analyze inheritance separately 

* 4704282 observations

* Id
rename YEAR_NO year
rename TAX_ id

* Lump-sum method
rename MAK_TOT selfemplgross
rename MAK_PRF selfemplnet

*S-Corp
rename HAK_TOT corpgross
rename HAK_PRF corpnet

* Partners
rename ASH_TOT partgross
rename ASH_PRF partnet

* wages
rename F3101 wagegross
rename F3505 wagenet

* Rent 
rename BPT_REV rent

* Tax
rename F3605 labortax
rename BPT_TAX renttax
rename INC_TAX profittax

* replace missing values by 0
replace rent=0 if rent==.
replace renttax=0 if renttax==.


/*
1. Turnovers, wages, rent and taxes cannot be negative
2. Link between gross and net income
3. Link between taxes and taxable income
4. Implausibly high values 
*/ 



/*1. Turnovers of partners and individuals in S-corp, wages (gross and net), 
self-employment income (gross and net, estimated with a lump-sum scheme), 
built property revenues and taxes levied cannot be negative
*/


replace partnet=0 if partgross <0
replace partgross=0  if partgross <0

replace corpnet=0 if corpgross <0
replace corpgross=0  if corpgross <0
 
 
replace labortax=0 if wagegross <0
replace wagenet=0 if wagegross <0
replace wagegross=0 if wagegross <0
 
 
replace labortax=0 if wagenet <0
replace wagegross=0 if wagenet <0
replace wagenet=0 if wagenet <0

replace selfemplnet =0 if selfemplgross <0
replace selfemplgross =0 if selfemplgross <0

replace selfemplgross =0 if selfemplnet <0
replace selfemplnet =0 if selfemplnet <0
 
 
replace renttax=0 if rent<0
replace rent=0 if rent<0
  
replace wagegross=0 if labortax <0
replace wagenet=0 if labortax <0
replace labortax=0 if labortax <0

replace rent=0 if renttax <0
replace renttax=0 if renttax <0

*profit tax strictly positive 
 
*2. Net wages and self-employment incomes cannot be greater than gross income

* net wages cannot be greater than gross wages
gen imp=1 if wagegross<wagenet
replace wagegross=0 if imp==1
replace wagenet=0 if imp==1
replace labortax=0 if imp==1
drop imp

 
/*Self-employment incomes taxed according to a a lump-sum taxation scheme
Under the lump-sum profit method, the net profit is assessed as a percentage of
the taxpayer’s total income:  selfemplgross = selfemplnet * lump-sum rate 
=> Selfemplgross mustbe greater than Selfemplnet (and positive)
*/ 


gen imp=1 if selfemplgross<selfemplnet
replace selfemplnet =0 if imp==1
replace selfemplgross =0 if imp==1
drop imp
* 1254 and 942 real changes made


*******
gen imp=1 if corpgross<corpnet
replace corpnet =0 if imp==1
replace corpgross =0 if imp==1
drop imp

gen imp=1 if partgross < partnet
replace partnet =0 if imp==1
replace partgross =0 if imp==1
drop imp
*****
 
/* 3. Amounts of tax levied cannot be greater than gross and taxable income
Profit tax can be levied even if individuals declare negative income flows 
*/  

gen imp=1 if labortax >wagenet
replace wagegross=0 if imp==1
replace wagenet=0 if imp==1
replace labortax=0 if imp==1
drop imp

gen imp=1 if renttax > rent
replace rent=0 if imp==1
replace renttax=0 if imp==1
drop imp
  
  
* Profit tax does not distinguish between the type of profit taxed
* Profit taxes can't be positive if there is no profits  
  
gen imp=1 if profittax> 0 & corpnet<=0 & selfemplnet==0 & partnet<=0
replace profittax=0 if imp==1
replace corpnet=0 if imp==1
replace partnet=0 if imp==1
replace corpgross=0 if imp==1
replace corpnet=0 if imp==1
drop imp 

gen imp=1 if profittax> 0 & corpgross<=0 & selfemplgross==0 & partgross<=0
replace profittax=0 if imp==1
replace selfemplnet =0 if imp==1
replace selfemplgross =0 if imp==1
replace corpnet =0 if imp==1
replace corpgross =0 if imp==1
replace partnet =0 if imp==1
replace partgross =0 if imp==1
drop imp 


gen profitnet = selfemplnet + corpnet + partnet 
gen profitgross = selfemplgross +corpgross + partgross

gen imp=1 if profittax>profitgross
replace profitgross=0 if imp==1
replace profittax=0 if imp==1
replace profitnet=0 if imp==1
replace selfemplnet =0 if imp==1
replace selfemplgross =0 if imp==1
replace corpnet =0 if imp==1
replace corpgross =0 if imp==1
replace partnet =0 if imp==1
replace partgross =0 if imp==1
drop imp 

 

/*4. Implausibly high values 
* Individual wages between 2% and 200% of GDP 
*/

gen imp=1 if wagegross >=1412014120000
replace wagegross=0 if imp==1
replace wagenet=0 if imp==1
replace labortax=0 if imp==1
drop imp
  
gen incometax = profittax + labortax + renttax 

merge m:1 year using "/Users/lydiaassouad/Dropbox/WID_MiddleEast/Assouad2017Lebanon/Assouad2017DistributionSeries/GpinterIncome/LebanonIncomeParameters.dta", nogenerate
rename popsize pop20
drop factor* Lebanon* *Lebanon
save Lebanon_cl2.dta, replace
  
  
***********************************************************************************************
*  Removing outliers identified (see do_Outliers and ComparisonCleaning.xlsx for their identification)
***********************************************************************************************

local variables wagenet corpnet partnet selfemplnet rent

forval j=2005/2014{
use Lebanon_cl2.dta, clear

foreach x of local variables{
keep if year == `j'
gsort -`x'
sum `x'
gen clean`x'=`x'/r(mean)
}

replace rent=0 if cleanrent>1239
replace wagenet=0 if cleanwage>702
replace selfemplnet=0 if cleanselfemplnet>2267
replace corpnet=0 if cleancorpnet>28709
replace partnet=0 if cleanpartnet>8407

drop clean*

save `j'.dta, replace
}
 
use 2005.dta
forval j=2006/2014{
append using `j'.dta
}
gen income = 1.25*(wagenet + profitnet + rent)

save Lebanon_cleaned.dta, replace

forval j=2005/2014{
erase `j'.dta
}
