/*
global orig "N:\Ablagen\D01700-IAB-Projekte\D01700-Projekte-FDZ\Datensaetze\_Endprodukte\SIAB\_Archiv\SIAB_7517_v1"
global data "N:\Ablagen\D01700-Projekte\D01700-CollischM\Methodenbericht_managers\data"                      // Verzeichnis für neue generierte Datensätze
global log "N:\Ablagen\D01700-Projekte\D01700-CollischM\Methodenbericht_managers\results"    
*/

clear all
use "$siab_path/SIAB_7517_v1.dta", clear



***Only BeH
keep if quelle==1

**keep only topspell per individual
gsort persnr begepi -tentgelt
by  persnr begepi: gen xspell=_n
keep if xspell==1



***Impute Education
tab1 ausbildung schule

**Methodenbericht 09/18
gen educ=.
replace educ=1 if ausbildung==1 //No Training
replace educ=2 if ausbildung==2 //Voc Training
replace educ=3 if schule==8 | schule==9 //Upper Sec
replace educ=4 if (schule==8 | schule==9) & ausbildung==2 //Voc Training + Upper Sec
replace educ=5 if ausbildung==11 //FH
replace educ=6 if ausbildung==12 //University


**Sort data
sort persnr begepi endepi

**gen age at beginning of Spell
cap drop age_spell
gen age_spell=year(begepi)-gebjahr

***gen year
gen year=year(begepi)

***impute forward
**Forward insertion of missing values by previous valid responses for those aged 18+
bysort persnr (begepi endepi): replace educ=educ[_n-1] if educ==. & age_spell>18

**below 18: change valid values to "1"
replace educ=1 if educ!=. & age_spell<18

**Forward insertion of valid responses for those with abitur and vocational training
bysort persnr (begepi endepi): replace educ=4 if educ==3 & educ[_n-1]==2 & age_spell>18

**Forward insertion of valid values by previous valid responses for those aged 18+
bysort persnr (begepi endepi): replace educ=educ[_n-1] if educ<educ[_n-1] &  age_spell>18 & educ[_n-1]!=.

***Writing backwards
bysort persnr (begepi endepi): replace educ=educ[_n+1] if educ<educ[_n+1] & educ[_n+1]==6 &  age_spell>29 //University
bysort persnr (begepi endepi): replace educ=educ[_n+1] if educ<educ[_n+1] & educ[_n+1]==5 &  age_spell>27 //Applied 
bysort persnr (begepi endepi): replace educ=educ[_n+1] if educ<educ[_n+1] & educ[_n+1]==4 &  age_spell>23 //Upper Sec. + Voc Training
bysort persnr (begepi endepi): replace educ=educ[_n+1] if educ<educ[_n+1] & educ[_n+1]==3 &  age_spell>21 //Upper Sec.
bysort persnr (begepi endepi): replace educ=educ[_n+1] if educ<educ[_n+1] & educ[_n+1]==2 &  age_spell>20 //Voc Training

replace educ=1 if educ==. //set missings to no training
label def educ 1 "No vocational Training" 2 "Vocational training" 3 "Upper Secondary" 4 "Upper Secondary + voc training" 5 "University of applied sciences" 6 "University"
label val educ educ



***generate tenure 
**tenure

sort persnr betnr begepi endepi

* mark all BEH spells with 0 except the first one, reporting gap = 31 days
  by persnr: gen job_epi = 0 if betnr == betnr[_n-1] & begepi-endepi[_n-1] <= 1 
* mark all first BEH spells with 1 
  replace job_epi = 1 if job_epi ==. 
* generate a unique number per spell in first row, repectively
  gsort -job_epi persnr 
  gen job_nr = _n if job_epi == 1
* transfer number to whole period
  sort persnr betnr begepi endepi
  by persnr: replace job_nr = job_nr[_n-1] if job_nr == . & job_epi == 0

bys job_nr (begepi endepi): gen job_last = _n == _N  
  
* adjust beginning and end (do we need these?)
sort persnr job_nr begepi endepi
bys job_nr (begepi endepi): gen gap=begepi-endepi[_n-1] <= 31 & begepi-endepi[_n-1]>1
gen adat2=begepi
gen edat2=endepi
cap drop year
gen year=year(adat2)
replace adat2=edat2[_n-1]+1 if job_nr==job_nr[_n-1] & gap==31 & year==year[_n-1] 
drop gap
bys job_nr (adat2 edat2): gen gap=adat2-edat2[_n-1] <= 31 & adat2-edat2[_n-1]>1



bys job_nr (adat2 edat2): gen gap2=adat2-edat2[_n-1] <= 31 & adat2-edat2[_n-1]>1
tab gap2,m
drop gap gap2
format adat2 edat2 %td
sort job_nr begepi endepi
by job_nr: egen jbeg = min(begepi) if job_nr ~=.
by job_nr: egen jend = max(endepi) if job_nr ~=.
format jbeg jend %d

label var jbeg    "begin date of job periode"
label var jend    "end date of job periode"
label var job_epi "job period"
label var job_nr  "counter of the job period"


* jobtenure at beginning of the episode
gen jobtenure = (begepi-jbeg)/365.25
lab var jobtenure "jobtenure years beg spell"



****Experience at start of a job spell
gen dauer=endepi-begepi+1
bys persnr (begepi endepi): gen realexp = (sum(dauer)-dauer)/365.25
lab var realexp "exp in years" 
sum realexp


****generate managers from KlDB 2010
gen manager_kldb=mod(beruf2010_4, 10)
gen kldb_manager=. if manager_kldb==.
replace kldb_manager=1 if manager_kldb==9
replace kldb_manager=0 if manager_kldb<9
replace kldb_manager=1 if beruf2010_4==7110


***The PASS-data is only available from 2007 onwards
keep if year>=2007

****estimates 
est use "$data/manager_pred_3d"
predict logit_3digit
est use "$data/manager_pred_4d"
predict logit_4digit
est use "$data/manager_pred"
predict logit_manager


*******predict number of managers
gen manager_3digit=1 if logit_3digit>0.8 &  logit_3digit!=.
replace manager_3digit=0 if logit_3digit<=0.8 &  logit_3digit!=.


gen manager_4digit=1 if logit_4digit>0.8 &  logit_4digit!=.
replace manager_4digit=0 if logit_4digit<=0.8 &  logit_4digit!=.


gen manager_simple=1 if logit_manager>0.8 &  logit_manager!=.
replace manager_simple=0 if logit_manager<=0.8 &  logit_manager!=.

gen manager_3digit_70=1 if logit_3digit>0.7 &  logit_3digit!=.
replace manager_3digit_70=0 if logit_3digit<=0.7 &  logit_3digit!=.


***Replace managers that are likely managers
replace manager_3digit=1 if kldb_manager==1
replace manager_4digit=1 if kldb_manager==1
replace manager_simple=1 if kldb_manager==1
replace manager_3digit_70=1 if kldb_manager==1
sum manager_3digit  manager_4digit manager_simple


***Regress managerial or supervisory status on covariates
est clear
foreach y of varlist kldb_manager manager_simple manager_3digit manager_3digit_70 manager_4digit {
    reg `y' frau c.realexp##c.realexp c.jobtenure##c.jobtenure i.educ i.year i.age_spell, cl(persnr)
	sum `y' if frau==0
	estadd scalar mean=`r(mean)'
	eststo `y'_res 
}
esttab *_res, drop(*yea*) b(%9.3f) se(%9.3f) stats(mean N, fmt(a3 %14.0f) labels("Male mean" "N"))
esttab *_res using "$log/results_SIAB.rtf", replace drop(*yea*) b(%9.3f) se(%9.3f) stats(mean N, fmt(a3 %14.0f) labels("Male mean" "N"))
 

 
**********Table with summary statistics
label var kldb_manager "Manager (Kldb only)"
label var manager_simple "Manager simple code (prediction)"
label var manager_3digit "Manager 3-digit KLDB (prediction)"
label var manager_3digit_70 "Manager 3-digit KLDB (prediction)"
label var manager_4digit "Manager 4-digit KLDB (prediction)"
label var tentgelt "Daily pay (Euros)"
label var age_spell "Age"
label var frau "Female (0/1)"
label var year "Current year"
label var realexp "Labor market experience (years)"
label var jobtenure "Job tenure (years)"
tab educ, gen(educ_)
foreach y of numlist 1(1)6 {
local educlab: label educ `y'	
label var educ_`y' "Education: `educlab'"
}
local xvar "kldb_manager manager_simple manager_3digit manager_3digit_70 manager_4digit  tentgelt age_spell frau year realexp jobtenure educ_1 educ_2 educ_3 educ_4 educ_5 educ_6"
estpost summarize `xvar'  
eststo siab_all


esttab siab_all  using "$log/descriptives_SIAB.rtf", replace cells("mean(fmt(%9.2f) pattern(1 1 1 1)) sd(fmt(%9.2f) pattern(1 1 1 1))") ///
 label  



save "$data\SIAB17_beh.dta", replace