***Merge PASS and IEB Information

set rmsg on

cap log close
cap log using "$log\1_1_PASS_BeH_match.log", replace                           


set seed 5016

*Load PASS Data
use "$orig/PENDDAT.dta", clear

***Generate interview date
cap drop stichtag
gen stichtag=mdy(pintmon, pinttag, pintjahr)
                   
*Generate wide data to match to administrative records
keep pnr welle stichtag
reshape wide stichtag , i(pnr) j(welle) 
save "$data\passadiab_waves.dta", replace  



****Merge to BEH data
* First dataset
use "$orig/PASS-ADIAB_7518_v2.dta", clear
*keep beh_beg_dat beh_end_dat betnr prs_id ieb_fall_id beh_tag_entg

**Impute Education
tab1 ausbildung schule

**Methodenbericht 09/18
gen educ=.
replace educ=1 if ausbildung==1 //No Training
replace educ=2 if ausbildung==2 //Voc Training
replace educ=3 if schule==8 | schule==9 //Upper Sec
replace educ=4 if (schule==8 | schule==9) & ausbildung==2 //Voc Training + Upper Sec
replace educ=5 if ausbildung==11 //FH
replace educ=6 if ausbildung==12 //University


**Sort data
sort pnr begepi endepi

**gen age at beginning of Spell
gen age_spell=year(begepi)-gebjahr

***gen year
gen year=year(begepi)

***impute forward
**Forward insertion of missing values by previous valid responses for those aged 18+
bysort pnr (begepi endepi): replace educ=educ[_n-1] if educ==. & age_spell>18

**below 18: change valid values to "1"
replace educ=1 if educ!=. & age_spell<18

**Forward insertion of valid responses for those with abitur and vocational training
bysort pnr (begepi endepi): replace educ=4 if educ==3 & educ[_n-1]==2 & age_spell>18

**Forward insertion of valid values by previous valid responses for those aged 18+
bysort pnr (begepi endepi): replace educ=educ[_n-1] if educ<educ[_n-1] &  age_spell>18 & educ[_n-1]!=.

***Writing backwards
bysort pnr (begepi endepi): replace educ=educ[_n+1] if educ<educ[_n+1] & educ[_n+1]==6 &  age_spell>29 //University
bysort pnr (begepi endepi): replace educ=educ[_n+1] if educ<educ[_n+1] & educ[_n+1]==5 &  age_spell>27 //Applied 
bysort pnr (begepi endepi): replace educ=educ[_n+1] if educ<educ[_n+1] & educ[_n+1]==4 &  age_spell>23 //Upper Sec. + Voc Training
bysort pnr (begepi endepi): replace educ=educ[_n+1] if educ<educ[_n+1] & educ[_n+1]==3 &  age_spell>21 //Upper Sec.
bysort pnr (begepi endepi): replace educ=educ[_n+1] if educ<educ[_n+1] & educ[_n+1]==2 &  age_spell>20 //Voc Training

replace educ=1 if educ==. //set missings to no training
label def educ 1 "No vocational Training" 2 "Vocational training" 3 "Upper Secondary" 4 "Upper Secondary + voc training" 5 "University of applied sciences" 6 "University"
label val educ educ




keep if quelle==1


***generate tenure 
**tenure

sort pnr betnr begepi endepi

* mark all BEH spells with 0 except the first one, reporting gap = 31 days
  by pnr: gen job_epi = 0 if betnr == betnr[_n-1] & begepi-endepi[_n-1] <= 1 
* mark all first BEH spells with 1 
  replace job_epi = 1 if job_epi ==. 
* generate a unique number per spell in first row, repectively
  gsort -job_epi pnr 
  gen job_nr = _n if job_epi == 1
* transfer number to whole period
  sort pnr betnr begepi endepi
  by pnr: replace job_nr = job_nr[_n-1] if job_nr == . & job_epi == 0

bys job_nr (begepi endepi): gen job_last = _n == _N  
  
* adjust beginning and end (do we need these?)
sort pnr job_nr begepi endepi
bys job_nr (begepi endepi): gen gap=begepi-endepi[_n-1] <= 31 & begepi-endepi[_n-1]>1
gen adat2=begepi
gen edat2=endepi
cap drop year
gen year=year(adat2)
replace adat2=edat2[_n-1]+1 if job_nr==job_nr[_n-1] & gap==31 & year==year[_n-1] 
drop gap
bys job_nr (adat2 edat2): gen gap=adat2-edat2[_n-1] <= 31 & adat2-edat2[_n-1]>1

bys job_nr (adat2 edat2): gen gap2=adat2-edat2[_n-1] <= 31 & adat2-edat2[_n-1]>1
tab gap2,m
drop gap gap2
format adat2 edat2 %td
sort job_nr begepi endepi
by job_nr: egen jbeg = min(begepi) if job_nr ~=.
by job_nr: egen jend = max(endepi) if job_nr ~=.
format jbeg jend %d

label var jbeg    "begin date of job periode"
label var jend    "end date of job periode"
label var job_epi "job period"
label var job_nr  "counter of the job period"


* jobtenure at beginning of the episode
gen jobtenure = (begepi-jbeg)/365.25
lab var jobtenure "jobtenure years beg spell"



****Experience at start of a job spell
gen dauer=endepi-begepi+1
bys pnr (begepi endepi): gen realexp = (sum(dauer)-dauer)/365.25
lab var realexp "exp in years" 
sum realexp










***Only keep employment spells
keep if quelle==1
merge m:1 pnr using "$data\passadiab_waves.dta", nogen keep(3)      // Interviewdaten über die unveränderliche Personennummer anspielen

foreach i of numlist 1(1)12 {
cap gen et_w`i' = 1  if stichtag`i' >= begepi & stichtag`i' <= endepi & stichtag`i'!= .   
cap replace et_w`i' = 1 if stichtag`i' >= begepi & stichtag`i' <= endepi & stichtag`i'!= .  
 }
 
gen double id = _n
**Generate long data
reshape long et_w stichtag , i(id) j(welle)    

keep if et_w==1

**Sort by pay level
gsort pnr stichtag -tentgelt

***Only keep highest-paid spell
by pnr stichtag : gen nwage=_n  
keep if nwage==1
save "$data\PASS_beh_match.dta", replace

