#
# Code for the paper "Bayesian inference for non-anonymous Growth Incidence Curves"
# written by Edwin Fourrier and Michel Lubrano
#
# Wage dynamics at Michigan state university
# Bernstein polynomials for naGIC
# Gender issues
# Prob matrices

rm(list = ls())

library(xtable)
library(ineq)
library(mvtnorm)
library(dplyr)

Bregs = function(y,Z,m,b0=0,M0=0,s0=0,nu0=0){
  # Bayesian inference regression with possibly an informative prior
  # Return as a list for m draws of beta and sigma2
  
  if (sum(M0)==0){
    kz = ncol(Z)
    M0 = matrix(0,kz,kz)
    b0 = matrix(0,kz,1)
    s0 = 0
    nu0 = 0
  }
  n = length(y)
  M = t(Z)%*%Z + M0
  M1 = solve(M)
  Zy = t(Z)%*%y
  b = M1%*%(M0%*%b0 + Zy)
  se = y%*%y - t(b)%*%M%*%b + s0 + t(b0)%*%M0%*%b0
  s = as.numeric(se)
  nu = n+nu0
  
  #cat(det(M1),det(M1*s/nu),"\n")
  
  Bs = rmvt(m, delta = b, sigma = M1*s/(nu-2), df = nu)
  Ss = s/rchisq(m,n)
  
  return(list(Bs=Bs,Ss=Ss))
}

BDic = function(id06,id12,ki,km,m,tit,g0=0){
  # Search for Bayesian DIC
  # Prior information with g0
  # Specific to na-GIC
  
  y1 = Salary06[id06]
  y2 = Salary12[id12]
  id1 = order(y1)
  y = log(y2[id1])-log(y1[id1])
  n = length(y1)
  p = seq(0,1,length=n)
  
  Bc = NULL
  Ac = NULL
  Dc = NULL
  Ml = NULL # Marginal likelihood
  Sig = NULL
  
  for (k in ki:km){
    Z = Bern(k,p)
    b0 = matrix(0,k+1,1)
    ZZ = t(Z)%*%Z
    M0 = g0*ZZ
    M0[,c(1,k+1)] = 0
    M0[c(1,k+1),] = 0
    nu0 = 3
    s0 = 1 #t(y)%*%y/n
    
    if (g0==0) {
      M0 = matrix(0,k+1,k+1)
      s0 = 0
      nu0 = 0
    }  
    out = Bregs(y,Z,m,b0,M0,s0,nu0)
    #out = Breg(y,Z,m)
    Bs = out$Bs
    Ss = out$Ss
    b = colMeans(Bs)
    sb = apply(Bs,2,sd)
    s2 = mean(Ss)
    yZ = y-Z%*%b
    lpy = -n/2*log(s2) - 0.5/s2*t(yZ)%*%yZ
    Bc = cbind(Bc,-2*lpy + (k+1)*log(n))
    Ac = cbind(Ac,-2*lpy + 2*(k+1))
    d0 = det(as.matrix(M0[2:k,2:k]))
    if (d0==0) d0 = NA
    dM0 = log(d0)/2
    ml = dM0-log(det(M0+ZZ))/2 + (nu0/2)*log(s0)-((n+nu0)/2)*log(s2)+
    (n/2)*log(pi)+lgamma((n+nu0)/2)-lgamma(nu0/2)
    Ml = cbind(Ml,ml)
    Sig = cbind(Sig,s2)
    cat("Posterior results for k = ",k,"\n")
    print(b,digits=3)
    print(sb,digits=3)
    print(b/sb,digits=3)
    print(s2,digits=5)
    cat("\n\n")
    
    lpy = NULL
    for (j in 1:m){
      yZb = y-Z%*%Bs[j,]
      yZbb = t(yZb)%*%yZb
      lpy = cbind(lpy,-n/2*log(Ss[j]) - 0.5/Ss[j]*yZbb)
    }
    Dc = cbind(Dc,-2*mean(lpy)+2*(k+1))
    
  }
  
  cat("Search between",ki,km,"with g0 = ",g0,"\n", 
      " BIC ",which.min(Bc)+ki-1,
      " Marg Lik",which.max(Ml)+ki-1,
      " AIC", which.min(Ac)+ki-1,
      " DIC ",which.min(Dc)+ki-1,
      "\n")
  
  sdx = function(x){
    a = (x-min(x,na.rm=T))/(max(x,na.rm=T)-min(x,na.rm=T))
    return( a )
  }
  pts = function(Dc,ic){
    i = which.min(Dc)
    points(i+ki-1,Dc[i],pch=19,col=ic,cex=1.5)
  }
  Ac = sdx(Ac)
  Bc = sdx(Bc)
  Dc = sdx(Dc)
  Ml = 1-sdx(Ml)
  plot(seq(ki,km),Ac,type='l',xlab = "Polynomial of order k ",ylab = '',
       main=paste(tit,"with g = ",g0))
  lines(seq(ki,km),Bc,col=2)
  lines(seq(ki,km),Dc,col=3)
  lines(seq(ki,km),Ml,col=4)
  pts(Ac,1)
  pts(Bc,2)
  pts(Dc,3)
  pts(Ml,4)
  legend("top",inset=0.02,legend=c("Marg Lik","BIC","DIC","AIC"),col = c(4,2,3,1),lty=1)
  
}

Bern = function(k,p){
  # Bernstein poly
  # Builds the basis for regression
  n = length(p)
  z = matrix(NA,n,k+1)
  for (j in 0:k){
    z[,j+1] = choose(k,j)*p^j*(1-p)^(k-j)
  }
  return( z )
}


dBern = function(b,k,p){
  # First derivative of Bernstein poly
  z = 0
  for (j in 0:(k-1)){
    z = z+(b[j+2]-b[j+1])*choose(k-1,j)*p^j*(1-p)^(k-j-1)
  }
  return( k*z )
}

Berna = function(k,x){
  # Bernstein poly for general variable between a and b
  # Builds the basis for regression
  n = length(x)
  a = min(x)
  b = max(x)
  p = (x-a)/(b-a)
  z = matrix(NA,n,k+1)
  for (j in 0:k){
    z[,j+1] = choose(k,j)*p^j*(1-p)^(k-j)
  }
  return( z )
}


Bernba = function(b,x){
  # For ploting Bernstein polynomial
  # x is between a and b
  aa = min(x)
  bb = max(x)
  p = (x-aa)/(bb-aa)
  k = length(b)-1
  z = 0
  for (j in 0:k){
    z = z + b[j+1]*choose(k,j)*p^j*(1-p)^(k-j)
  }
  return( (z-min(z)) )
}

Bernb = function(b,p){
  # For ploting Bernstein polynomial
  k = length(b)-1
  z = 0
  for (j in 0:k){
    z = z + b[j+1]*choose(k,j)*p^j*(1-p)^(k-j)
  }
  return( z )
}


dBernb = function(b,p){
  # First derivative of Bernstein poly
  k = length(b)-1
  z = 0
  for (j in 0:(k-1)){
    z = z+(b[j+2]-b[j+1])*choose(k-1,j)*p^j*(1-p)^(k-j-1)
  }
  return( k*z )
}


BaBern = function(y,k,m,p){
  # Bayesian inference using Bernstein polynomials
  Z = Bern(k,p)
  Ms = Breg(y,Z,m)
  
  return(Ms)
}

Breg = function(y,X,m){
  # Bayesian inference regression with a diffuse prior
  # m draws for regression of y over X
  n = length(y)
  M = t(X)%*%X 
  M1 = solve(M)
  Xy = t(X)%*%y
  b = M1%*%Xy
  s = as.numeric(t(y)%*%y - t(Xy)%*%M1%*%Xy)
  
  Ms = rmvt(m, delta = b, sigma = M1*s/n, df = n)
  
  return(Ms)
}

BnaGIC = function(y1,y2,k,m){
  # Bayesian inference for a naGIC curve with Bernstein of degree k
  # m draws
  
  gt = log(mean(y2,na.rm=T))-log(mean(y1,na.rm=T))
  id1 = order(y1)
  gna = log(y2[id1])-log(y1[id1])
  id = !is.na(gna)
  n = sum(id)
  p = seq(0,1,length=n)
  Mtnas = BaBern(gna[id],k,m,p)
  #apply(Mtnas, 2, quantile, probs = c(0.05,0.5, 0.95),  na.rm = TRUE)
  Mas = matrix(0,m,n)
  dMas = matrix(0,m,n)
  for (i in 1:m){
    Mas[i,] = Bernb(Mtnas[i,],p)
    dMas[i,] = dBernb(Mtnas[i,],p)
  }
  
  return(list(Mas=Mas,p=p,dMas=dMas))
}

BGIC = function(y1,y2,k,m){
  # Bayesian inference for a GIC curve with Bernstein of degree k
  # m draws 
  # 
  
  n1 = length(y1)
  n2 = length(y2)
  n = min(c(n1,n2))
  p = seq(0,1,length=n)
  y1b = sort(sample(y1,n))
  y2b = sort(sample(y2,n))
  
  gt = log(mean(y2b))-log(mean(y1b))
  gna = log(y2b)-log(y1b)
  Mtnas = BaBern(gna,k,m,p)
  Mas = matrix(0,m,n)
  dMas = matrix(0,m,n)
  for (i in 1:m){
    Mas[i,] = Bernb(Mtnas[i,],p)
    dMas[i,] = dBernb(Mtnas[i,],p)
  }
  
  return(list(Mas=Mas,p=p,dMas=dMas))
}

Prob = function(M,p,g){
  # Stochastic dominance at the order one
  # Pro poor probability
  
  np = length(p)
  ip = c(seq(1,np,np/10),np)
  Ms = matrix(NA,3,11)
  j = 0
  for (i in ip){
    j = j+1
    Ms[1,j] = p[i]
    Ms[2,j] = mean(M[,i]>0)
    Ms[3,j] = mean(M[,i]>g)
  }
  A = data.frame("Anayses a growth incidence curve \\
      First line:  Pr(M1>0) \\
      second line: Pr(M1>g) \n\n")
  print(xtable(A))
  print(xtable(Ms,digits=2))
  
  return( list(Mp=Ms[2,],Mg=Ms[3,]))
}

Probc = function(M1,p1,M2,p2){
  # Compares two GIC
  # p1 = MasM$p
  # M1 = MasM$Mas
  # p2 = MasF$p
  # M2 = MasF$Mas
  np1 = length(p1) 
  np2 = length(p2)
  np = min(np1,np2)
  ip1 = c(seq(1,np1,np1/10),np1)
  ip2 = c(seq(1,np2,np2/10),np2)
  Ms = matrix(NA,3,11)
  
  Msp1 = M1[,ip1]
  Msp2 = M2[,ip2]
  
  for (i in 1:11){
    idg = Msp1[,i]>Msp2[,i]
    idl = Msp1[,i]<Msp2[,i]
    Ms[1,i] = p1[ip1[i]]
    Ms[2,i] = mean(idg)
    Ms[3,i] = mean(idl)
  }
  cat("Compares two incidence curves \\
      First line:  Pr(M1>M2) \\
      second line: Pr(M1<M2) \n")
  print(xtable(Ms,digits=2))
  cat("Overall probability M1>M2 = ",mean(Ms[2,]),"\n")
  cat("Overall probability M1<M2 = ",1-mean(Ms[2,]),"\n\n")
}

PlnaGIC = function(id06,id12,k,tit,icol,ypl=0.30,m=5000){
  y1 = Salary06[id06]
  y2 = Salary12[id12]
  ga = log(mean(Salary12[id12])) - log(mean(Salary06[id06]))
  cat("Growth rate = ",ga,"\n")
  
  set.seed(1357)
  Mas =  BGIC(y1,y2,k,m)
  Masna = BnaGIC(y1,y2,k,m)
  
  plot(Mas$p,colMeans(Mas$Mas),type='l',ylab='Growth rate',ylim=c(0.00,ypl),xlab="p",
       main=paste(tit,"k = ",k))
  lines(Mas$p,apply(Mas$Mas, 2, quantile,0.05),col=1,lty=2)
  lines(Mas$p,apply(Mas$Mas, 2, quantile,0.95),col=1,lty=2)
  
  lines(Masna$p,colMeans(Masna$Mas),col=icol)
  lines(Masna$p,apply(Masna$Mas, 2, quantile,0.05),col=icol,lty=2)
  lines(Masna$p,apply(Masna$Mas, 2, quantile,0.95),col=icol,lty=2)
  
  abline(h=ga,lty=3,col=3,lwd=2)
  legend("bottom",inset=0.02,legend=c("GIC","naGIC","Mean growth"),lty=1,col=c(1,icol,3),cex=0.75)
  
  plot(Masna$p,colMeans(Masna$dMas),type='l',ylab='na-GIC Derivative',
       xlab="p",ylim=c(-1.5,0.5),
       main=paste(tit,"k = ",k))
  lines(Masna$p,apply(Masna$dMas, 2, quantile,0.05),col=1,lty=2)
  lines(Masna$p,apply(Masna$dMas, 2, quantile,0.95),col=1,lty=2)
  
  abline(h=0,lty=3,col=3,lwd=2)
  legend("bottom",inset=0.02,legend=c("naGIC derivative","zero line"),
         lty=c(1,3),col=c(1,3),cex=0.75)
  
  
  Prob(Masna$Mas,Masna$p,ga)
  
}


##### Academic wage dynamics at MSU #####

setwd("F:/Etudiants/Edwin/Paper NA-Gic/Snde/Calculs finaux")

data = read.csv("Snde.csv",sep=",",header=TRUE, stringsAsFactors=F) 
attach(data)

idw = is.na(Gender)
cat("Undefined gender = ",sum(idw),"\n")
#Gf = sample(c("M","F"),sum(id),replace=T)
#Gender[id] = Gf

table(Title06)
acad = c("Assistant Prof","Associate Prof","Full Prof","Endowed Prof")
para = c("Educator","Instructor","Lecturer","Specialist","Research Assoc","Libra-Archiv")
admi = c("Director","Chair","Dean","Acad staff","Provost","Presidence")
Salary06[Salary06==0] = NA
Salary12[Salary12==0] = NA

id06a = !(is.na(Salary06)) & (Title06 %in% acad) & !(Tenure06=="EM") & !(Tenure06=="C") & !(Tenure06=="CE")
id06aM = !(is.na(Salary06)) & (Title06 %in% acad) & !(Tenure06=="EM") & !(Tenure06=="C") & !(Tenure06=="CE") & (Gender=="M")
id06aF = !(is.na(Salary06)) & (Title06 %in% acad) & !(Tenure06=="EM") & !(Tenure06=="C") & !(Tenure06=="CE") & (Gender=="F")
summary(Salary06[id06a])

id12a  = id06a & !is.na(Salary12)
id12aM = id06aM & !is.na(Salary12)
id12aF = id06aF & !is.na(Salary12)

id06p  = !(is.na(Salary06)) & (Title06 %in% para)
id06pM = !(is.na(Salary06)) & (Title06 %in% para)  & (Gender=="M")
id06pF = !(is.na(Salary06)) & (Title06 %in% para)  & (Gender=="F")

id12p  = id06p  & !is.na(Salary12)
id12pM = id06pM & !is.na(Salary12)
id12pF = id06pF & !is.na(Salary12)
 
idquit = !(is.na(Salary06)) & is.na(Salary12)
idstay = !idquit
cat("Quitting academics",sum(id06a&idquit))
cat("Quitting para-academics",sum(id06p&idquit))

mean(Salary06[idquit & id06aM],na.rm=T)
table(idquit & id06aM)/sum(table(idquit & id06aM))
table(idquit & id06aF)/sum(table(idquit & id06aF))

table(idquit)/length(idquit)
cat("Gender proportion for academics and para-academics","\n",
"Academics",table(Gender[id06a])/sum(table(Gender[id06a])),"\n",
"Para acad",table(Gender[id06p])/sum(table(Gender[id06p])),"\n")


#### Summary stat ######

# Table 2 Sample characteristics of academic stayers in 2006

Taba06 = matrix(NA,4,6)
row.names(Taba06) = c("Assistant Prof","Associate Prof","Endowed Prof","Full Prof" )
Taba06[,1] = table(Title06[id06a & idstay])
Taba06[,2] = tapply(Salary06[id06a & idstay],Title06[id06a & idstay],mean)
Taba06[,3] = table(Title06[id06aF & idstay])
Taba06[,4] = tapply(Salary06[id06aF & idstay],Title06[id06aF & idstay],mean)
Taba06[,5] = table(Title06[id06aM & idstay])
Taba06[,6] = tapply(Salary06[id06aM & idstay],Title06[id06aM & idstay],mean)
Tabt = c(sum(id06a & idstay),mean(Salary06[id06a & idstay]),
         sum(id06aF & idstay,na.rm=T),mean(Salary06[id06aF & idstay],na.rm=T),
         sum(id06aM & idstay,na.rm=T),mean(Salary06[id06aM & idstay],na.rm=T))
Tabx = rbind(Taba06,Tabt)
row.names(Tabx) = c("Assistant Prof","Associate Prof","Endowed Prof","Full Prof",
                    "Total")
xtable(Tabx[c(1,2,4,3,5),],digits=0)
cat("Female prop = ",Taba06[c(1,2,4,3),3]/Taba06[c(1,2,4,3),1],"\n")
cat("Female total prop = ",sum(id06aF & idstay,na.rm=T)/sum(id06a & idstay),"\n")
cat("Female wages = ",Taba06[c(1,2,4,3),4]/Taba06[c(1,2,4,3),2],"\n")
cat("Female wages total",mean(Salary06[id06aF & idstay],na.rm=T)/mean(Salary06[id06a & idstay],na.rm=T))


# Table 3: Sample characteristics of para-academic stayers in 2006
Tab = matrix(NA,6,6)
row.names(Tab) = c("Educator","Instructor","Lecturer","Libra-Archiv",
                   "Research Assoc","Specialist")
Tab[,1] = table(Title06[id06p & idstay])
Tab[,2] = tapply(Salary06[id06p & idstay],Title06[id06p & idstay],mean)
Tab[,3] = table(Title06[id06pF & idstay])
Tab[,4] = tapply(Salary06[id06pF & idstay],Title06[id06pF & idstay],mean)
Tab[,5] = table(Title06[id06pM & idstay])
Tab[,6] = tapply(Salary06[id06pM & idstay],Title06[id06pM & idstay],mean)
Tabt = c(sum(id06p & idstay),mean(Salary06[id06p & idstay]),
         sum(id06pF & idstay,na.rm=T),mean(Salary06[id06pF & idstay],na.rm=T),
         sum(id06pM & idstay,na.rm=T),mean(Salary06[id06pM & idstay],na.rm=T))
Tabx = rbind(Tab[c(2,3,1,5,6,4),],Tabt)
names(Tabx[6,]) = "Total"
xtable(Tabx,digits=0)
cat("Female prop = ",Tab[c(2,3,1,5,6,4),3]/Tab[c(2,3,1,5,6,4),1],"\n")
cat("Female total prop = ",sum(id06pF,na.rm=T)/sum(id06p),"\n")
cat("Female wages = ",Tab[c(2,3,1,5,6,4),4]/Tab[c(2,3,1,5,6,4),2],"\n")
cat("Female wages total",mean(Salary06[id06aF],na.rm=T)/mean(Salary06[id06a],na.rm=T))


#### Wage increase over the period ######

id = idstay
cat("Global wage increase = ",
    log(mean(Salary12[id],na.rm=T)) - log(mean(Salary06[id],na.rm=T)),
    "\n")

cat("Wage increase for administratives = ",
    log(mean(Salary12[(Title12 %in% admi) & id],na.rm=T)) - 
      log(mean(Salary06[(Title12 %in% admi) & id],na.rm=T)),
    "\n")

cat("Wage increase for academics = ",
    log(mean(Salary12[id12a & id],na.rm=T)) - 
      log(mean(Salary06[id06a & id],na.rm=T)),
    "\n")

cat("Wage increase for para-academics = ",
    log(mean(Salary12[id12p & id],na.rm=T)) - 
      log(mean(Salary06[id06p & id],na.rm=T)),
    "\n")


###### naGIC for academics #####

id06 = (id06a) & !idquit
id12 = id06 & !is.na(Salary12)
tit = "Academics, "
g0 = 0.01
ki = 3
km = 6
BDic(id06,id12,ki,km,1000,tit,g0)
k = 4
icol = 2
ypl = 0.3
PlnaGIC(id06,id12,k,tit,icol,ypl)

##### NA-GIC for para-academics #####

id06 = (id06p) & !idquit
id12 = id06 & !is.na(Salary12)
tit = "Para-academics, "
ki = 3
km = 12
g0 = 0.01
BDic(id06,id12,ki,km,1000,tit,g0)
k = 4
icol = 2
ypl=0.5
PlnaGIC(id06,id12,k,tit,icol,ypl)

### Gender issues, section 4.4 #####

set.seed(1357)

y1aM = Salary06[id06aM & !idquit]
y2aM = Salary12[id12aM & !idquit]

y1aF = Salary06[id06aF & !idquit]
y2aF = Salary12[id12aF & !idquit]

y1pM = Salary06[id06pM & !idquit]
y2pM = Salary12[id12pM & !idquit]

y1pF = Salary06[id06pF & !idquit]
y2pF = Salary12[id12pF & !idquit]

# Figure 5: Gender issues for wage dynamics, Academics
m = 5000
k = 4
MasM = BnaGIC(y1aM,y2aM,k,m)
MasF = BnaGIC(y1aF,y2aF,k,m)

plot(MasM$p,colMeans(MasM$Mas),type='l',ylab='Growth rate',ylim=c(0.00,0.35),xlab="p",
     main="na-GIC for Academics")
lines(MasM$p,apply(MasM$Mas, 2, quantile,0.05),col=1,lty=2)
lines(MasM$p,apply(MasM$Mas, 2, quantile,0.95),col=1,lty=2)

lines(MasF$p,colMeans(MasF$Mas),col=2)
lines(MasF$p,apply(MasF$Mas, 2, quantile,0.05),col=2,lty=2)
lines(MasF$p,apply(MasF$Mas, 2, quantile,0.95),col=2,lty=2)

ga = log(mean(Salary12[id12a & !idquit])) - log(mean(Salary06[id06a & !idquit]))
abline(h=ga,lty=3,col=3,lwd=2)
legend("bottomleft",inset=0.02,legend=c("Male","Female","Growth rate"),
       col=c(1,2,3),lty=c(1,1,3))

# Table 4: Statistical tests academics

Prob(MasM$Mas,MasM$p,ga) # For analysing one curve
Prob(MasF$Mas,MasF$p,ga)
Probc(MasM$Mas,MasM$p,MasF$Mas,MasF$p) # For comparing two curves: academics


# Fig 5: Gender issues for wage dynamics, right panel

m = 5000
MpsM = BnaGIC(y1pM,y2pM,4,m)
MpsF = BnaGIC(y1pF,y2pF,4,m)

plot(MpsM$p,colMeans(MpsM$Mas),type='l',ylab='Growth rate',ylim=c(0.00,0.45),xlab="p",
     main="na-GIC for Para academics stayers")
lines(MpsM$p,apply(MpsM$Mas, 2, quantile,0.05),col=1,lty=2)
lines(MpsM$p,apply(MpsM$Mas, 2, quantile,0.95),col=1,lty=2)

lines(MpsF$p,colMeans(MpsF$Mas),col=2)
lines(MpsF$p,apply(MpsF$Mas, 2, quantile,0.05),col=2,lty=2)
lines(MpsF$p,apply(MpsF$Mas, 2, quantile,0.95),col=2,lty=2)

gp = log(mean(Salary12[id12p & !idquit])) - log(mean(Salary06[id06p & !idquit]))
abline(h=gp,lty=3,col=3,lwd=2)

legend("top",inset=0.02,legend=c("Male","Female","Growth rate"),
       col=c(1,2,3),lty=c(1,1,3),lwd=c(1,1,2))

# Table 4: Statistical tests, para-academics

Prob(MpsM$Mas,MpsM$p,gp) # For analysing one curve
Prob(MpsF$Mas,MpsF$p,gp)
Probc(MpsM$Mas,MpsM$p,MpsF$Mas,MpsF$p) # For comparing two curves: para academics


##### Transition tables for academics, Males and Females ###

TabM = table(Title06[id06aM],Title12[id06aM]) # Transition males
TabF = table(Title06[id06aF],Title12[id06aF]) # Transition females

sum(Title12[id06aM]%in%admi,na.rm=T)/sum(id06aM,na.rm=T)
sum(Title12[id06aF]%in%admi,na.rm=T)/sum(id06aF,na.rm=T)
mean(Salary12[Title12[id06aM]%in%admi],na.rm=T)
mean(Salary12[Title12[id06aF]%in%admi],na.rm=T)
max(Salary12[Title12[id06aM]%in%admi],na.rm=T)
max(Salary12[Title12[id06aF]%in%admi],na.rm=T)
sd(Salary12[Title12[id06aM]%in%admi],na.rm=T)
sd(Salary12[Title12[id06aF]%in%admi],na.rm=T)

# Table 5: Transition probabilities for males and females

for (i in 1:4){
  cat(" Males   ",sum(TabM[i,]),"\n")
  cat(" Females ",sum(TabF[i,]),"\n")
  TabM[i,] = TabM[i,]/sum(TabM[i,])
  TabF[i,] = TabF[i,]/sum(TabF[i,])
}

colnames(TabM)
TabM1 = cbind(TabM[,c(1,4,5,11,10,9)],rowSums(TabM[,c(2,3,6,7,8,12,13,14)]))
TabM2 = data.frame(TabM1[c(1,2,4,3),])
colnames(TabM2) = c("Quit","Assistant.Prof","Associate.Prof","Full.Prof","Endowed.Prof",
                    "Emeritus","Administration")
# Table 5 for males, promotions have to be computed by hand
xtable(TabM2,digits=3)

colnames(TabF)
TabF1 = cbind(TabF[,c(1,4,5,11,10,9)],rowSums(TabF[,c(2,3,6,7,8,12)]))
TabF2 = data.frame(TabF1[c(1,2,4,3),])
colnames(TabF2) = c("Quit","Assistant.Prof","Associate.Prof","Full.Prof","Endowed.Prof",  
                    "Emeritus","Administration")
# Table 5 for females, promotions have to be computed by hand
xtable(TabF2,digits=3)



##### End of Program #####


##### Transition probabilities for para-academics, just for checking, not in the text ###

TabM = table(Title06[id06pM],Title12[id06pM]) # Transition males
TabF = table(Title06[id06pF],Title12[id06pF]) # Transition females

sum(Title12[id06pM]%in%admi,na.rm=T)/sum(id06pM,na.rm=T)
sum(Title12[id06pF]%in%admi,na.rm=T)/sum(id06pF,na.rm=T)

mean(Salary12[Title12[id06pM]%in%admi],na.rm=T)
mean(Salary12[Title12[id06pF]%in%admi],na.rm=T)
max(Salary12[Title12[id06pM]%in%admi],na.rm=T)
max(Salary12[Title12[id06pF]%in%admi],na.rm=T)
sd(Salary12[Title12[id06pM]%in%admi],na.rm=T)
sd(Salary12[Title12[id06pF]%in%admi],na.rm=T)

# Transition probabilities
for (i in 1:4){
  cat(" Males   ",sum(TabM[i,]),"\n")
  cat(" Females ",sum(TabF[i,]),"\n")
  TabM[i,] = TabM[i,]/sum(TabM[i,])
  TabF[i,] = TabF[i,]/sum(TabF[i,])
}

colnames(TabM)
TabM1 = cbind(TabM[,c(1,4,5,11,10,9)],rowSums(TabM[,c(2,3,6,7,8,12,13)]))
xtable(TabM1[c(1,2,4,3),],digits=3)

colnames(TabF)
TabF1 = cbind(TabF[,c(1,4,5,11,10,9)],rowSums(TabF[,c(2,3,6,7,8,12)]))
xtable(TabF1[c(1,2,4,3),],digits=3)


