library(ggplot2)
library(ggrepel)
library(lme4)
library(lmerTest)
library(car)
library(cowplot)
theme_set(theme_cowplot())
library(colorspace)   ## hsv colorspace manipulations
library(forcats)
library(tidyr)
library(corrplot)

library(dplyr)

# some aesthetic preferences
my_pch = c(16, 2, 0)
my_colors <- c("blue", "mediumorchid", "tomato", "gray60")

read.csv("ultrasound.csv",T) -> df

# remove speaker 35 because of NA values for YZ1
# but keep if a plot of 35's LD1-LD2 space is needed
df <- droplevels(df[df$subj != "35",])

# range-normalize LD data
norm01 <- function(X){
	(X - min(X))/diff(range(X))
	}
df <- df %>% 
	group_by(subj) %>% 
	mutate(sLD1 = norm01(LD1), sLD2 = norm01(LD2))

# make dataframe of LD1 and LD2 medians by speaker and phone
meds <- df %>% 
	group_by(subj,phone,coart_class) %>% 
	summarize(medLD1 = median(sLD1), medLD2 = median(sLD2))
bin <- meds$phone
levels(bin) <- list(train=c("IY1", "S", "SH"),
						apical=c("ZZ1", "ZW1"),
						laminal=c("IZ1", "YZ1"))	
meds <- bind_cols(meds,as.data.frame(bin))
levels(meds$phone) <- c("i", "iʑ", "s", "ɕ", "yʑ", "ɿ", "ʮ")

###

# median LD1-LD2 plot for speaker 1; repeat for all speakers or as needed
sz1 <- ggplot(meds[meds$subj=="1",], aes(medLD1,medLD2,label=phone)) + 
	geom_point(aes(color=coart_class,pch=coart_class),size=2,stroke=1) +
	geom_text_repel(force=1) +
	scale_color_manual(values = my_colors, 
					name="Condition", 
					labels=c("ɿ, ʮ (fric. onset)", 
						"iʑ, yʑ (fric. onset)", 
						"iʑ ,yʑ (no fric. onset)", 
						"Training (i, ɕ, s)")
						) +
	scale_shape_manual(values = c(0, 1, 2, 16), 
					name="Condition", 
					labels=c("ɿ, ʮ (fric. onset)", 
						"iʑ, yʑ (fric. onset)", 
						"iʑ ,yʑ (no fric. onset)", 
						"Training (i, ɕ, s)")
						) +
	xlab("LD1") + 
	ylab("LD2") +
	scale_x_continuous(expand = c(0.15,0)) +
	scale_y_continuous(expand = c(0.15,0)) +
	theme(legend.text=element_text(size=10))
	
###

# calculate distance from speaker medians for i, s, ɕ
df2 <- df %>% 
	group_by(subj) %>% 
	mutate(med.LD1.s = median(df[df$phone == "S",]$sLD1), 
       med.LD2.s = median(df[df$phone == "S",]$sLD2), 
       med.LD1.i = median(df[df$phone == "IY1",]$sLD1), 
       med.LD2.i = median(df[df$phone == "IY1",]$sLD2), 
       med.LD1.c = median(df[df$phone == "SH",]$sLD1),
       med.LD2.c = median(df[df$phone == "SH",]$sLD2), 
       ) 
df2 <- df2 %>% 
	mutate(dist.i = sqrt((sLD1 - med.LD1.i)^2+(sLD2 - med.LD2.i)^2),
			 dist.s = sqrt((sLD1 - med.LD1.s)^2+(sLD2 - med.LD2.s)^2),
			 dist.c = sqrt((sLD1 - med.LD1.c)^2+(sLD2 - med.LD2.c)^2))
# add age cohort factor back in
young <- c("1","2","3","5","6","10","11",
			"13","19","20","28","29","30","31",
			"34","35","36","37","41","42","43","44") 
df2 <- mutate(df2, YoB=factor(case_when(subj %in% young ~ "post-1985", !(subj %in% young) ~ "pre-1985")))
# remove LD training data (i, s, ɕ themselves)
df3 <- droplevels(df2[df2$phone != "S" & df2$phone != "SH" & df2$phone != "IY1",])

# median distances, for plotting
df5 <- df3 %>% 
	group_by(subj,phone) %>% 
	summarize(median.dist.s = median(dist.s), 
		median.dist.c = median(dist.c), 
		median.dist.i = median(dist.i))
# add age/gender in again
male <- c("9","10","11","13","19","24", 
	"27","28","29","34","35", 
	"36","38","39","41","44")
young <- c("1","2","3","5","6","10","11",
	"13","19","20","28","29","30","31",
	"34","35","36","37","41","42","43","44") 
df5 <- df5 %>% 
	mutate(gender=factor(case_when(subj %in% male ~ "male",
		 						 !(subj %in% male) ~ "female")),
		 		YoB=factor(case_when(subj %in% young ~ "post-1985", 
		 		 				 !(subj %in% young) ~ "pre-1985")))

# LD1-LD2 distance ellipse plots

ciplot <- ggplot(df3, aes(dist.c,dist.i,color=phone,shape=phone,lty=phone)) + 
	geom_abline(color="gray", slope=1, intercept=0) +
	stat_ellipse() + 
	scale_color_discrete(
					name="Phone", 
					labels=c("iʑ", "yʑ", "ɿ", "ʮ")
					) +
	scale_shape_discrete(
					name="Phone", 
					labels=c("iʑ", "yʑ", "ɿ", "ʮ")
					) +
	scale_linetype_discrete(
					name="Phone", 
					labels=c("iʑ", "yʑ", "ɿ", "ʮ")
					) +
	labs(x="Dist. from /ɕ/", y="Dist. from /i/") + 
	background_grid(major="xy") +
	geom_point(data=df5, aes(median.dist.c,median.dist.i,color=phone)) +
	facet_wrap(~fct_rev(YoB))

csplot <- ggplot(df3, aes(dist.c,dist.s,color=phone,shape=phone,lty=phone)) + 
	geom_abline(color="gray", slope=1, intercept=0) + 
	stat_ellipse() + 
	scale_color_discrete(
					name="Phone", 
					labels=c("iʑ", "yʑ", "ɿ", "ʮ")
					) +
	scale_shape_discrete(
					name="Phone", 
					labels=c("iʑ", "yʑ", "ɿ", "ʮ")
					) +
	scale_linetype_discrete(
					name="Phone", 
					labels=c("iʑ", "yʑ", "ɿ", "ʮ")
					) +
	labs(x="Dist. from /ɕ/", y="Dist. from /s/") + 
	background_grid(major="xy") +
	geom_point(data=df5, aes(median.dist.c,median.dist.s,color=phone)) +
	facet_wrap(~fct_rev(YoB))

leg <- get_legend(ciplot)
ps <- plot_grid(ciplot + theme(legend.position="none"),
		  csplot + theme(legend.position="none"),
		  ncol=1)
png("ellipses-v3.png", width=6.5, height=5.5, units="in", res = 400)
plot_grid(ps,NULL,leg,rel_widths=c(8,0.5,2), nrow=1)
dev.off()

###

# lmers for LD measures

# restructure so ZZ is reference level
df3$phone <- relevel(df3$phone, ref="ZZ1")
df3$YoB <- as.factor(df3$YoB)
df3$YoB <- relevel(df3$YoB, ref="pre-1985")

# models for s distance, ɕ distance, and i distance, respectively
age.s <- lmer(dist.s ~ phone*YoB + (1|subj), data=df3)
age.c <- lmer(dist.c ~ phone*YoB + (1|subj), data=df3)
age.i <- lmer(dist.i ~ phone*YoB + (1|subj), data=df3)


###

# LD distance correlations
# set up new dataframe with medians
df4 <- df3 %>% 
	group_by(subj,phone) %>% 
	summarize(median.dist.s = median(dist.s), 
		median.dist.c = median(dist.c), 
		median.dist.i = median(dist.i)
		) %>% 
	pivot_wider(names_from = phone, 
		values_from = c("median.dist.s", 
						"median.dist.c", 
						"median.dist.i"), 
		names_sep=".")
df4 <- df4 %>% 
	mutate(gender=factor(case_when(subj %in% male ~ "male",
		 						 !(subj %in% male) ~ "female")),
		 		YoB=factor(case_when(subj %in% young ~ "post-1985", 
		 		 				 !(subj %in% young) ~ "pre-1985")))

s.ant <- ggplot(df4, aes(median.dist.s.ZZ1,median.dist.s.ZW1)) + 
	geom_abline(color="gray", intercept=0, slope=1) + 
	annotate(geom="text",x=0.65,y=0.75, label="Identity line", angle = 43, size=4) +
	geom_point() + xlim(0,1) + ylim(0,1) + 
	xlab("Median /s/ dist., /ɿ/") + ylab("Median /s/ dist., /ʮ/") +
	geom_smooth(method="lm") + facet_wrap(~fct_rev(YoB)) +
	theme(axis.text=element_text(size=8))
c.ant <- ggplot(df4, aes(median.dist.c.ZZ1,median.dist.c.ZW1)) + 
	geom_abline(color="gray", intercept=0, slope=1) + 
	geom_point() + xlim(0,1) + ylim(0,1) + 
	xlab("Median /ɕ/ dist., /ɿ/") + ylab("Median /ɕ/ dist., /ʮ/") +
	geom_smooth(method="lm") + facet_wrap(~fct_rev(YoB)) +
	theme(axis.text=element_text(size=8))
i.ant <- ggplot(df4, aes(median.dist.i.ZZ1,median.dist.i.ZW1)) + 
	geom_abline(color="gray", intercept=0, slope=1) + 
	geom_point() + xlim(0,1) + ylim(0,1) + 
	xlab("Median /i/ dist., /ɿ/") + ylab("Median /i/ dist., /ʮ/") +
	geom_smooth(method="lm") + facet_wrap(~fct_rev(YoB)) +
	theme(axis.text=element_text(size=8))
s.post <- ggplot(df4, aes(median.dist.s.IZ1,median.dist.s.YZ1)) + 
	geom_abline(color="gray", intercept=0, slope=1) + 
	geom_point() + xlim(0,1) + ylim(0,1) + 
	xlab("Median /s/ dist., /iʑ/") + ylab("Median /s/ dist., /yʑ/") +
	geom_smooth(method="lm") + facet_wrap(~fct_rev(YoB)) +
	theme(axis.text=element_text(size=8))
c.post <- ggplot(df4, aes(median.dist.c.IZ1,median.dist.c.YZ1)) + 
	geom_abline(color="gray", intercept=0, slope=1) + 
	geom_point() + xlim(0,1) + ylim(0,1) + 
	xlab("Median /ɕ/ dist., /iʑ/") + ylab("Median /ɕ/ dist., /yʑ/") + 
	geom_smooth(method="lm") + facet_wrap(~fct_rev(YoB)) +
	theme(axis.text=element_text(size=8))
i.post <- ggplot(df4, aes(median.dist.i.IZ1,median.dist.i.YZ1)) + 
	geom_abline(color="gray", intercept=0, slope=1) + 
	geom_point() + xlim(0,1) + ylim(0,1) + 
	xlab("Median /i/ dist., /iʑ/") + ylab("Median /i/ dist., /yʑ/") +
	geom_smooth(method="lm") + facet_wrap(~fct_rev(YoB)) +
	theme(axis.text=element_text(size=8))
	
png("ld-corrs-v2.png", width=10.8, height=4.4, units="in", res = 600)
plot_grid(s.ant,c.ant,i.ant,s.post,c.post,i.post,nrow=2)
dev.off()

###

# corr analysis
df4 -> df6 # for changing colnames for better display
colnames(df6) <- c("subj", "med.dist.s.iʑ","med.dist.s.yʑ","med.dist.s.ɿ","med.dist.s.ʮ","med.dist.ɕ.iʑ","med.dist.ɕ.yʑ","med.dist.ɕ.ɿ","med.dist.ɕ.ʮ","med.dist.i.iʑ","med.dist.i.yʑ","med.dist.i.ɿ","med.dist.i.ʮ","gender","YoB")

lds.cor.old <- cor(df6[df6$YoB == "pre-1985",2:13], use = "complete.obs")
lds.cor.young <- cor(df6[(df6$YoB == "post-1985" & df4$subj != "36"),2:13], use = "complete.obs")

# correlograms
res1 <- cor.mtest(df4[df4$YoB == "pre-1985",2:13], use = "complete.obs")
res2 <- cor.mtest(df4[(df4$YoB == "post-1985" & df4$subj != "36"),2:13], use = "complete.obs")

png("ld-corrplot-old-v2.png", width=5, height=5, units="in", res = 400)
corrplot(lds.cor.old, type='upper', order="AOE",  col = c("black", "white"), bg = "lightblue", tl.col = "black", p.mat = res1$p, insig = "label_sig", sig.level = c(.001, .01, .05), pch.cex = .9, pch.col = "red")
dev.off()
png("ld-corrplot-young-v2.png", width=5, height=5, units="in", res = 400)
corrplot(lds.cor.young, type='upper', order="AOE",  col = c("black", "white"), bg = "lightblue", tl.col = "black", p.mat = res2$p, insig = "label_sig", sig.level = c(.001, .01, .05), pch.cex = .9, pch.col = "red")
dev.off()
