
library(MASS);
library(sandwich);

het_test<-function(e,x){
  e2 = e^2;
  x2 = x^2;
  v = residuals(lm(e2~x2));
  e2 = e2 - mean(e2);
  te = length(e)*(1-(sum(v^2))/sum(e2^2));
return(pchisq(te,dim(x)[2]));
}

moment<-function(x){ 
  x=as.matrix(x);
  y=t(x)%*%x;
  return(y);
}

prod<-function(x,y){
  x=as.matrix(x);
  kk=dim(x)[2];
  z=x*matrix(rep(y,kk),ncol=kk)
  return(z);
}

thr_est<-function(dat,yi,xi,qi,intercept=1,h=0,graph=0){

#  dat     = data matrix (nxk)
# yi      = index of dependent (y) variable, e.g.: yi = 1
# xi      = indexes of independent (x) variables, e.g.: xi = 2|3
# qi      = index of threshold (q) variable, e.g.: qi = 4;
# h       = heteroskedasticity indicator
#          Set h=0 to impose homoskedasticity assumption
#         Set h=1 to use White-correction for heteroskedasticity
#  qhat    = LS estimate of threshold
#  sse     = minimizing sum of squared errors
#graph   = 0;     # Set _graph=0 to not view the graph of the likelihood
                  
gn=300  ### grid size
trim=0.15  ### trim number at the beginning and the end

conf1 = .95;  #Confidence Level for Confidence Regions  @
conf2 = .8;   # Confidence Level for first step of two-step
              #  Confidence Regions for regression parameters @
nonpar = 1;   # Indicator for non-parametric method used to estimate nuisance scale in the presence of
              #   heteroskedasticity (only relevant if h=1).
              #   Set _nonpar=1  to estimate regressions using
              #   a quadratic.
              #  Set _nonpar=2  to estimate regressions using
              #   an Epanechnikov kernel with automatic bandwidth. @


if (h!=0|h!=1){
print("You have entered h = ") 
print(h);
print("This number must be either 0 (homoskedastic case) or 1 (heteoskedastic)");
print("The program will either crash or produce invalid results");
}
if ((nonpar!=1|nonpar!=2)&(h==1)){
print("You have entered _nonpar = ");
print(nonpar);
print("This number should be either 1 (quadratic regression)");
print("   or 2 (kernel regression)");
print("The program will employ the quadratic regression method");
}

n = dim(dat)[1];
q = dat[,qi];

y = dat[,yi];

if(intercept==1){
x = cbind(1,dat[,xi]);
}else{
x = as.matrix(dat[,xi]);
}

k = dim(x)[2];

#yname = names[yi];
#qname = names[qi];
#xname = "Constant"|names[xi];
#### linear model

mi = solve(moment(x));
beta = mi%*%(t(x)%*%y);
e=y-x%*%beta;
ee=t(e)%*%e;
sig = ee/(n-k);
xe = prod(x,e);

if(h==0){
  se = sqrt(diag(mi)%*%sig);
}else{
  se = sqrt(diag(mi%*%moment(xe)%*%mi));
 }
vy = sum((y - mean(y))^2);
r_2 = 1-ee/vy;


#### threshold model


qq=sort(q); 
gamma1=seq(qq[round(trim*length(qq))],to=qq[round((1-trim)*length(qq))],length.out=gn);
qs=gamma1;
qn =length(qs);

sn = rep(0,qn);
r=1; 
while(r<=qn){
  c1= which(q <= qs[r]);
  c2= which(q > qs[r]);
  x1 = x[c1,];
  y1 = y[c1];
  x2 = x[c2,];
  y2 = y[c2];
  mi1 = solve(moment(x1));
  mi2 = solve(moment(x2));
  beta1 = mi1%*%(t(x1)%*%y1);
  beta2 = mi2%*%(t(x2)%*%y2);
  e1 = y1 - x1%*%beta1;
  e2 = y2 - x2%*%beta2;
  ee1 = t(e1)%*%e1;
  ee2 = t(e2)%*%e2; 
  sn[r]=ee1+ee2;
  r=r+1;
}

rmin=which.min(sn);
smin=sn[rmin];
qhat=qs[rmin];
sighat=smin/n;

cc1=which(q<= qhat);
cc2=which(q> qhat);
x1 = x[cc1,];
y1 = y[cc1];
x2 = x[cc2,];
y2 = y[cc2];

mi1 = solve(moment(x1));
mi2 = solve(moment(x2));
beta1 = mi1%*%(t(x1)%*%y1);
beta2 = mi2%*%(t(x2)%*%y2);
e1 = y1 - x1%*%beta1;
e2 = y2 - x2%*%beta2;
ej=rep(0,length(q));

ej[cc1]=e1;
ej[cc2]=e2;
n1 = dim(y1)[1];
n2 = dim(y2)[1];
ee1 = t(e1)%*%e1;
ee2 = t(e2)%*%e2; 

if(graph==1){
	sig1 = ee1/(n1-k);
	sig2 = ee2/(n2-k);
	sig_jt = (ee1+ee2)/(n-k*2);
	if(h==0){
  		se1 = sqrt(diag(mi1)%*%sig_jt);
		  se2 = sqrt(diag(mi2)%*%sig_jt);
	}else{
	  se1 = sqrt(diag(mi1%*%moment(prod(x1,e1))%*%mi1));
	  se2 = sqrt(diag(mi2%*%moment(prod(x2,e2))%*%mi2));
	}
### caclulare the R_square
	vy1 = sum((y1 - mean(y1))^2);
	vy2 = sum((y2 - mean(y2))^2);
	r2_1 = 1 - ee1/vy1;
	r2_2 = 1 - ee2/vy2;
	r2_joint = 1 - (ee1+ee2)/vy;
	if(h==0){
	  lr  = (sn-smin)/sighat;
	}else{
	  r1 = (x%*%(beta1-beta2))^2;
	  r2 = r1*(ej^2);
	  qx = cbind(1,q,q^2);
	  qh = cbind(1,qhat,qhat^2);
 ### using regression

m1 = solve(moment(qx))%*%(t(qx)%*%r1);
m2 = solve(moment(qx))%*%(t(qx)%*%r2);
g1 = qh%*%m1;
g2 = qh%*%m2;
###using nonparametrics
  if(nonpar==2){
    sigq = sqrt(mean((q-mean(q))^2));
    hband = 2.344*sigq/(n^(0.2));
    u2 = ((qhat-q)/hband)^2;
    kh = ((1-u2)*.75/hband)*(u2<=1);
    g1 = mean(kh*r1);
    g2 = mean(kh*r2);
          }
  eta2 = g2/g1;
  lr = (sn-smin)/eta2;
}

c1 = -2*log(1-sqrt(conf1));
c2 = -2*log(1-sqrt(conf2));
lr1 = (lr >= c1);
lr2 = (lr >= c2);
if(max(lr1)==1){;
  qhat1 = qs[which.min(lr1)];
  qhat2 = qs[qn+1-which.min(rev(lr1))];
}else{
  qhat1 = qs[1];
  qhat2 = qs[qn];
}


z =which.max((pnorm(seq(.01,by=.01,length.out=300))*2-1) >= conf1)/100;

beta1l = beta1 - se1*z;
beta1u = beta1 + se1*z;
beta2l = beta2 - se2*z;
beta2u = beta2 + se2*z;


print("Global OLS Estimation, Without Threshold");
if(h==1){
  print("Heteroskedasticity Correction Used");
}else{
  print("OLS Standard Errors Reported");
}
linbeta=cbind(beta,se);
colnames(linbeta)=c("est","se")
print(linbeta);
print(paste("Observations:                      ", n));
print(paste("Degrees of Freedom:                ",(n-k)));
print(paste("Sum of Squared Errors:             ", ee));
print(paste("Residual Variance                 ", sig));
print(paste("R-squared                         ", r_2));
print(paste("Heteroskedasticity Test (P-Value) ", het_test(e,x)));

print("       __________________________________________");
print("Threshold Estimation");
print(paste("Threshold Estimate        ", qhat));
print("confidence interval for threshold estimator");
print(c(qhat1,qhat2));
print(paste("Sum of Squared Errors:             ", (ee1+ee2)));
print(paste("Residual Variance                 ", sig_jt));
print(paste("R-squared                         ", r2_joint));
print(paste("Heteroskedasticity Test (P-Value) ", het_test(ej,x)));

print("     Regime 1        Parameter Estimates");
thbeta1=cbind(beta1,se1,beta1l,beta1u)
colnames(thbeta1)=c("est","se","low_confidence","up_confidence")
print(thbeta1);
print(paste("Observations:                      ", n1));
print(paste("Degrees of Freedom:                " ,(n1-k)));
print(paste("Sum of Squared Errors:             ", ee1));
print(paste("Residual Variance                 ", sig1));
print(paste("R-squared                         ", r2_1));
print("       __________________________________________");
print("     Regime 2       Parameter Estimates");
thbeta2=cbind(beta2,se2,beta2l,beta2u)
colnames(thbeta2)=c("est","se","low_confidence","up_confidence")
print(thbeta2);
print(paste("Observations:                      ", n2));
print(paste("Degrees of Freedom:                " ,(n2-k)));
print(paste("Sum of Squared Errors:             ", ee2));
print(paste("Residual Variance                 ", sig2));
print(paste("R-squared                         ", r2_2));

plot(qs,lr,xlab="threshold value",ylab="likelihood ratio",type="l");
title("Confidence Interval Construction for Threshold");
}
result=list(threshold=qhat,betahat=cbind(beta1,beta2),beta_sig=cbind(se1,se2),rss=ee1+ee2);
return(result);
}


waldtest<-function(dat,yi,xi,qi,intercept=1,plot=1,gn=300,trim=0.15){
y=dat[,yi];
T=length(y);
if(intercept==1){
x= cbind(1,dat[,xi]);
}else{
x = as.matrix(dat[,xi]);
}
k=ncol(x);
q=as.vector(dat[,qi]);

qs=sort(q);
gammas=qs[round(seq(trim,by=(1-2*trim)/gn,length.out=gn)*length(q))];
z0=as.matrix(x);
k=dim(z0)[2];
store=matrix(0,nrow=gn,ncol=1);
j=1; 
while(j<=gn){
    d1=(q<= gammas[j])*1;
    d2=1-d1;    
    n1=sum(d1);
    n2=sum(d2);
    z1=prod(z0,d1);
    z2=prod(z0,d2);
    zz=cbind(z1,z2);
    m=ginv(moment(zz));
    theta=ginv(moment(zz))%*%(t(zz)%*%y);
    e=y-zz%*%theta;
    ze=prod(zz,e);
    v=moment(ze);
    I=diag(rep(1,k));
    R=cbind(I,-I);
    w=t(R%*%theta)%*%ginv(R%*%m%*%v%*%m%*%t(R))%*%R%*%theta;
    store[j]=w;
    j=j+1;
}
wald01=max(store);
j=which.max(store);
threshold=gammas[j];

if (plot==1){
  plot(gammas,store,xlab="Gamma",type="l")
  title("wald Statistic as function of Gamma");
} 
res=list(supwald=wald01,threshold=threshold);
return(res);
}


### function for ms_error
thre_me<-function(dat,yi,xi,qi,alpha=0.15,h=1,intercept=1){

y=dat[,yi];
T=n=length(y);
if(intercept==1){
x= cbind(1,dat[,xi]);
}else{
x = as.matrix(dat[,xi]);
}
k=ncol(x);
q=as.vector(dat[,qi]);

##### estimate the model with given alpha

gama_low=sort(q)[floor(T*alpha)]
gama_high=sort(q)[floor(T*(1-alpha))];

c1=which(q<gama_low);
c2=which(q>gama_high);
x1 = x[c1,];
y1 = y[c1];
x2 = x[c2,];
y2 = y[c2];

mi1 = solve(moment(x1));
mi2 = solve(moment(x2));

beta_a1 = mi1%*%(t(x1)%*%y1);
beta_a2 = mi2%*%(t(x2)%*%y2);
e1 = y1 - x1%*%beta_a1;
e2 = y2 - x2%*%beta_a2;
n1 = dim(x1)[1];
n2 = dim(x2)[1];
ee1 = t(e1)%*%e1;
ee2 = t(e2)%*%e2; 

sig1 = ee1/(n1-k);
sig2 = ee2/(n2-k);
sig_jt = (ee1+ee2)/(n-k*2);
if(h==0){
  		 se1 = sqrt(diag(mi1)%*%sig_jt);
		 se2 = sqrt(diag(mi2)%*%sig_jt);
	}else{
	  se1 = sqrt(diag(mi1%*%moment(prod(x1,e1))%*%mi1));
	  se2 = sqrt(diag(mi2%*%moment(prod(x2,e2))%*%mi2));
	}
betas_a=cbind(beta_a1,se1,beta_a2,se2)
colnames(betas_a)=c("reg1","se","reg2","se");

ze1=prod(x1,e1);
ze2=prod(x2,e2);

fm1=lm(y1~x1-1);
fm2=lm(y2~x2-1);
m1_a=alpha*meatHAC(fm1);
m2_a=alpha*meatHAC(fm2);


thre1=thr_est(dat,yi,xi,qi,intercept=1,h=1,graph=1)
threshold=thre1$threshold;
cc1=which(q<threshold);
cc2=which(q>threshold);
nn1=length(cc1);
nn2=length(cc2);

xx1 = x[cc1,];
yy1 = y[cc1];
xx2 = x[cc2,];
yy2 = y[cc2];

mmi1 = solve(moment(xx1));
mmi2 = solve(moment(xx2));

beta_hat1 = mmi1%*%(t(xx1)%*%yy1);
beta_hat2 = mmi2%*%(t(xx2)%*%yy2);

et1 = yy1 - xx1%*%beta_hat1;
et2 = yy2 - xx2%*%beta_hat2;
eet1 = t(et1)%*%et1;
eet2 = t(et2)%*%et2; 


sig1 = eet1/(nn1-k);
sig2 = eet2/(nn2-k);
sig_jt1 = (eet1+eet2)/(n-k*2);
if(h==0){
  		 sse1 = sqrt(diag(mmi1)%*%sig_jt1);
		 sse2 = sqrt(diag(mmi2)%*%sig_jt1);
	}else{
	  sse1 = sqrt(diag(mmi1%*%moment(prod(xx1,et1))%*%mmi1));
	  sse2 = sqrt(diag(mmi2%*%moment(prod(xx2,et2))%*%mmi2));
	}

betas_hat=cbind(beta_hat1,sse1,beta_hat2,sse2)
colnames(betas_hat)=c("reg1","se","reg2","se");

xxe1=prod(xx1,et1);
xxe2=prod(xx2,et2);

ffm1=lm(yy1~xx1-1);
ffm2=lm(yy2~xx2-1);
m1_hat=alpha*meatHAC(ffm1);
m2_hat=alpha*meatHAC(ffm2);

#m1_a=alpha*moment(ze1)/n1;
#m2_a=alpha*moment(ze2)/n2;
#m1_hat=nn1/T*moment(xxe1)/nn1;
#m2_hat=nn2/T*moment(xxe2)/nn2;

G1_a=t(x1)%*%x1/T;
G2_a=t(x2)%*%x2/T;
G1=t(xx1)%*%xx1/T;
G2=t(xx2)%*%xx2/T;

omiga11=solve(G1_a)%*%m1_a%*%solve(G1_a)
omiga12=solve(G1)%*%m1_hat%*%solve(G1)
omiga13=solve(G1)%*%m1_a%*%solve(G1_a)
omiga14=solve(G1_a)%*%m1_a%*%solve(G1)
T11=omiga11+omiga12-omiga13-omiga14;

omiga21=solve(G2_a)%*%m2_a%*%solve(G2_a)
omiga22=solve(G2)%*%m2_hat%*%solve(G2)
omiga23=solve(G2)%*%m2_a%*%solve(G2_a)
omiga24=solve(G2_a)%*%m2_a%*%solve(G2)
T21=omiga21+omiga22-omiga23-omiga24;

d1=beta_hat1-beta_a1;
d2=beta_hat2-beta_a2;

T_stat=as.numeric(T*(t(d1)%*%solve(T11)%*%d1+t(d2)%*%solve(T21)%*%d2));

pvalue=pchisq(T_stat,2*k);
me_test=cbind(T_stat,pvalue);

supwald=waldtest(dat,yi,xi,qi,intercept=1);

res<-list(threshold=threshold,betas_a=betas_a, betas_hat=betas_hat, me_test,supwald=supwald)
return(res);
}



