******************************************************************
* Do-Files used to create stata database  from Challenges lists  *
******************************************************************

* Characteristics of Challenges lists
	* Wealth is reported at the family level
	* It correpsonds to the value of the company owned by the family
	* It is gross of debts
	
********************
* I) Load the data *
********************

	* Import aggregate wealth and number of adults
	***********************************************
		import excel using  "$dataApH/Challenges1998-2013.xlsx", first clear sheet("Demo")  cellrange(A4:C20)
		tempfile temp
		save "`temp'",replace

	* Import Challenges lists :
	******************************
		import excel using  "$dataApH/Challenges1998-2013.xlsx", first clear sheet("Rawlists")  cellrange(A1:AC1411)
		keep Socit1 Secteur Nom1 Prnom1 Nom2 Famille FORTUNE*
		rename Socit1 societe
		rename Secteur secteur
		rename Nom1 nom
		rename Prnom1 prenom
		rename Nom2 nom2
		rename Famille famille
		forval i=1998/2013 {
			rename FORTUNE`i' patrimoine`i'
		}	
		* Yearly family wealth
			gsort - patrimoine2013
			gen ident=_n
			reshape long patrimoine ,i(ident nom2) j(year)
			drop if patrimoine==0 | patrimoine==.

		* keep only top 500
			gsort + year - patrimoine
			gen temp=1
			bys year : gen rank=sum(temp)
			drop if rank>500
		
		* Adjusted wealth	
			* Top 500 adjusted for French living abroad that have to be excluded
			* Estimation in 2013 : 55 billions in Switzerland and 17 billions in Belgium, Luxembourg and the UK
			bys year : egen top500=sum(patrimoine)
			sum top500 if year==2013
			gen tx_adj= (55+17)*1000/r(mean)
		
		* 2 observations when 2 associates
			expand 2 if nom2~="" 
			replace patrimoine=patrimoine/2 if nom2~="" 
			
		drop  societe secteur nom prenom nom2 rank ident top500 temp
		
		merge m:1 year using "`temp'"
		drop _merge
		erase "`temp'"
		tempfile temp
		save "`temp'",replace
		
*********************************		
* II) Individualization of data *
*********************************		

	* Different scenario of individulization
		* 1) Everyone is single
		* 2) Take into account married couple
		* 3) Take into account the size of the familly (2 relatives (brother/sisters, children, grand-children...) + spouses)
		* 4) Take into account the size of the familly (18 relatives (brother/sisters, children, grand-children...) + spouses)
	
forval i=1/4 {
	use "`temp'",clear
	if `i'==3 {
		* Familly of 10 individuals
			expand 10 if  famille~=""
			replace patrimoine=patrimoine/10 if famille~=""
	}
	if `i'==4 {
		* Familly of 15 individuals
			expand 15 if  famille~=""
			replace patrimoine=patrimoine/15 if famille~=""
	}

	if `i'>=2 {
		* Married couple
			gsort + year - patrimoine
			gen temp=1
			bys year : gen classmt=sum(temp)
			gen a=10 if classmt <=10
			replace a=100 if classmt >10
			replace a =1000 if classmt >100
			set seed 1896936
			bys year a : gen uni=runiform()
			expand 2 if uni<=0.8
			replace patrimoine=patrimoine/2 if uni<=0.8
			drop classmt temp a uni
	}
	drop famille
*********************		
* III) Wealth share *
*********************			

	
* Top 0.0001% : 50 individuals in 2010)
* Top 0.001% : 500 individuals in 2010
	
	* Number of individuals
		gen nbretop50 = 0.000001*nadult*1000
		gen nbretop500 = 10*nbretop50
		foreach var of newlist top50 top500 {
			gen arrondi`var'=nbre`var'-int(nbre`var')
			gen sup`var'=int(nbre`var')+1
		}

	* Observations in top50 or top500
		gsort + year - patrimoine
		gen temp=1
		bys year : gen classmt=sum(temp)
	order year classmt patrimoine  nbre* arrondi* suptop* nadult  pat_cn     
	sort year classmt

	* Wealth share
		foreach var of newlist top50 top500  {
			bys year : egen share`var'=sum((patrimoine*(classmt<=int(nbre`var'))+arrondi`var'*patrimoine*(classmt==sup`var'))/(pat_cn*1000))
			bys year : egen mean`var'=sum((patrimoine*(classmt<=int(nbre`var'))+arrondi`var'*patrimoine*(classmt==sup`var'))/nbre`var')
			bys year : egen seuil`var'=sum(patrimoine*(classmt==int(nbre`var')))
		}
		gen pat_mean=pat_cn/nadult*1000000
		collapse (mean) pat_mean share* mean*   seuiltop* pat_cn tx_adj  ,by(year)
		order year pat_mean
		* Problem with year 1999
		foreach var of varlist sharetop50 sharetop500 meantop50 meantop500 seuiltop50 seuiltop500 {
			replace `var'=. if year==1999
		}
	* Adjusted wealth share
		gen sharetop500_adj=sharetop500*(1-tx_adj)

	
	foreach var of varlist sharetop50 sharetop500 meantop50 meantop500 seuiltop50 seuiltop500 sharetop500_adj {
		rename `var' `var'_`i' 
	}
			
	tempfile base`i'
	save "`base`i''",replace
}
forval i=1/3 {
	merge 1:1 year using "`base`i''"
	drop _merge
}	
order year pat_mean *_1 *_2 *_3 *_4
export excel using "$dataApH/Challenges1998-2013.xlsx", firstrow(variable) sheet("Results")   sheetmodify cell(A5) 

* Top 0.001% from capitalization method
	use "C:\Users\John\Dropbox\WIDFrance\Papers\GGP2016Wealth\GGP2016WealthAppendixB\DataFiles\Exportation\Results\gperc\gperc_19702014.dta", clear
	gen sharetop500cap=ytop*0.00001/patmean
	keep if sex==0 & year>=1998 & gperc==99999
	keep sharetop500cap
	export excel using "$dataApH/Challenges1998-2013.xlsx", firstrow(variable) sheet("Results")   sheetmodify cell(AE5) 

		