******Predicting managerial status with the  PASS 
***
**
*

cap log close
log using "$log/PASS_est", text replace

********Merge to PASS survey
use "$orig/PENDDAT.dta", clear
merge 1:1 pnr welle using "$data\PASS_beh_match.dta"


**********Managers from KldB
cap drop manager_kldb
gen manager_kldb=mod(beruf2010_4, 10)

gen kldb_manager=. if manager_kldb==.
replace kldb_manager=1 if manager_kldb==9
replace kldb_manager=0 if manager_kldb<9



***Managers from survey infromation
tab PET2700
gen manager=.
replace manager=1 if PET2700==1
replace manager=0 if PET2700==2

cap drop logit*
cap drop logit


****split sample randomly into test- and train data 
*keep only if valid values 
keep if tentgelt!=. & beruf2010_4!=. & manager!=. & educ!=. & age_spell!=. & frau!=. & year!=.
* generate randomization via random variable
set seed 5016
gen random=runiformint(0,1)



**********Covariates descriptives
label var tentgelt "Daily pay (Euros)"
label var age_spell "Age"
label var frau "Female (0/1)"
label var year "Current year"
label var realexp "Labor market experience (years)"
label var jobtenure "Job tenure (years)"
label var manager "Manager (PASS) (0/1)"
label var kldb_manager "Manager (Kldb) (0/1)"
tab educ, gen(educ_)
foreach y of numlist 1(1)6 {
local educlab: label educ `y'	
label var educ_`y' "Education: `educlab'"
}


***Display summary statistics by sample type
local xvar "manager  kldb_manager tentgelt age_spell frau year realexp jobtenure educ_1 educ_2 educ_3 educ_4 educ_5 educ_6"
estpost summarize `xvar'  if random==0
eststo train, title("Train sample")
estpost summarize `xvar'  if random==1
eststo test, title("Test sample")

esttab train test using "$log/descriptives_PASS.rtf", replace cells("mean(fmt(%9.2f) pattern(1 1 1 1)) sd(fmt(%9.2f) pattern(1 1 1 1))") ///
 label  
 
 
***save both samples as datasets
preserve
keep if random==1
save "$data/test_sample.dta", replace
restore 
keep if random==0
save "$data/train_sample.dta", replace

***Controls
global control "c.tentgelt##c.tentgelt##c.tentgelt i.educ  i.age_spell i.frau i.year c.realexp##c.realexp c.jobtenure##c.jobtenure"



****Predict managerial status using logit
logit manager i.beruf2010_3 $control
est store logit_3digit, title("Logit 3-digit")

logit manager i.beruf2010_4 $control
est store logit_4digit, title("Logit 4-digit")

logit manager i.kldb_manager $control
est store logit_manager, title("Logit Manager code")

esttab logit_3digit logit_4digit logit_manager

esttab logit*, drop(*age*  *beruf*) b(%9.3f) se(%9.3f) stats( r2_p N, fmt(%9.3f %14.0f) labels("Pseudo R2" "N"))  lab mti
esttab logit* using "$log/results_predictions_logit.rtf", replace drop(*age*  *beruf*) b(%9.3f) se(%9.3f) stats( r2_p N, fmt(%9.3f %14.0f) labels("Pseudo R2" "N")) lab mti


***display AMEs
logit manager i.beruf2010_3 $control
margins, dydx(c.tentgelt i.educ i.frau  c.realexp jobtenure) post
est store logit_3digit_ame, title("Logit 3-digit")

logit manager i.beruf2010_4 $control
margins, dydx(c.tentgelt i.educ i.frau  c.realexp jobtenure) post
est store logit_4digit_ame, title("Logit 4-digit")


logit manager i.kldb_manager $control
margins, dydx(c.tentgelt i.educ i.frau  c.realexp jobtenure) post
est store logit_manager_ame, title("Logit Manager code")

esttab logit_3digit_ame logit_4digit_ame logit_manager_ame

esttab logit*ame, b(%9.3f) se(%9.3f) stats( r2_p N, fmt(%9.3f %14.0f) labels("Pseudo R2" "N"))  lab mti
esttab logit*ame using "$log/results_predictions_logit_ame.rtf", replace b(%9.3f) se(%9.3f) stats( r2_p N, fmt(%9.3f %14.0f) labels("Pseudo R2" "N")) lab mti


******Use the test sample to predict managerial status there
drop _all
use "$data/test_sample.dta"
est restore logit_3digit
predict logit_3digit
est restore logit_4digit
predict logit_4digit
est restore logit_manager
predict logit_manager


***Display ROC-curves
roccomp manager logit_3digit logit_4digit  logit_manager, graph summary title("(A) Logit")  graphr(color(white)) 
graph save "$log/roc_manager_logit.gph", replace
graph export "$log/roc_manager_logit.pdf", replace


****Predict managerial status using probit
cap drop logit*
cap drop logit
drop _all
use "$data/train_sample.dta"

probit manager i.beruf2010_3 $control
est store probit_3digit, title("Probit 3-digit")

probit manager i.beruf2010_4 $control
est store probit_4digit,  title("Probit 4-digit")

probit manager i.kldb_manager $control
est store probit_manager, title("Probit Manager code")

esttab probit*, drop(*age* *beruf*) b(%9.3f) se(%9.3f) stats( r2_p N, fmt(%9.3f %14.0f) labels("Pseudo R2" "N"))  lab mti
esttab probit* using "$log/results_predictions_probit.rtf", replace drop(*age* *ear* *beruf*) b(%9.3f) se(%9.3f) stats( r2_p N, fmt(%9.3f %14.0f) labels("Pseudo R2" "N")) 

esttab logit_manager logit_3digit logit_4digit probit_manager probit_3digit probit_4digit, drop(*age* *beruf*) b(%9.3f) se(%9.3f) stats( r2_p N, fmt(%9.3f %14.0f) labels("Pseudo R2" "N"))  lab mti
esttab logit_manager logit_3digit logit_4digit probit_manager probit_3digit probit_4digit using "$log/results_predictions_all.rtf", replace drop(*age* *beruf* *ear*) b(%9.3f) se(%9.3f) stats( r2_p N, fmt(%9.3f %14.0f) labels("Pseudo R2" "N")) lab



***AMEs
probit manager i.beruf2010_3 $control
margins, dydx(c.tentgelt i.educ i.frau  c.realexp jobtenure) post
est store probit_3digit_ame, title("Probit 3-digit")

probit manager i.beruf2010_4 $control
margins, dydx(c.tentgelt i.educ i.frau  c.realexp jobtenure) post
est store probit_4digit_ame, title("Probit 4-digit")


probit manager i.kldb_manager $control
margins, dydx(c.tentgelt i.educ i.frau  c.realexp jobtenure) post
est store probit_manager_ame, title("Probit Manager code")


esttab probit*ame, b(%9.3f) se(%9.3f) stats( r2_p N, fmt(%9.3f %14.0f) labels("Pseudo R2" "N"))  lab mti
esttab probit*ame using "$log/results_predictions_probit_ame.rtf", replace b(%9.3f) se(%9.3f) stats( r2_p N, fmt(%9.3f %14.0f) labels("Pseudo R2" "N")) lab mti

***Generate AME-table from the paper
esttab logit_manager_ame logit_3digit_ame logit_4digit_ame probit_manager_ame probit_3digit_ame probit_4digit_ame, b(%9.3f) se(%9.3f) stats( r2_p N, fmt(%9.3f %14.0f) labels("Pseudo R2" "N"))  lab mti
esttab logit_manager_ame logit_3digit_ame logit_4digit_ame probit_manager_ame probit_3digit_ame probit_4digit_ame using "$log/results_predictions_all_ame.rtf", replace b(%9.3f) se(%9.3f) stats( r2_p N, fmt(%9.3f %14.0f) labels("Pseudo R2" "N"))  lab mti


******Use the test sample to predict managerial status there
drop _all
use "$data/test_sample.dta"
est restore probit_3digit
predict probit_3digit
est restore probit_4digit
predict probit_4digit
est restore probit_manager
predict probit_manager

***Display ROC-curves
roccomp manager probit_3digit probit_4digit  probit_manager, graph summary title("(B) Probit")  graphr(color(white)) 
graph save "$log/roc_manager_probit.gph", replace
graph export "$log/roc_manager_probit.pdf", replace


****Combine the ROC curves
graph combine "$log/roc_manager_logit.gph" "$log/roc_manager_probit.gph", graphr(color(white)) note("Controls: Kldb2010 (dummies), Imputed education (dummies), daily earnings (cubic), experience (squared), tenure (linear), age (dummies), gender (dummy), year (linear).") xsize(5) ysize(2)
graph export "$log/roccurves_logit_probit.pdf", replace
graph export "$log/roccurves_logit_probit.emf", replace
graph export "$log/roccurves_logit_probit.wmf", replace
graph export "$log/roccurves_logit_probit.png", replace



cap log close