library(Rtsne) # Load package setwd("C:/reps/HEAL/Publications-2018-GPTP/data") #write subset of expressions write.csv2(x=evalData[evalData$R2.keijzer4>0.2,c(2,4,9)], file = "evalData-subset-keijzer4_0.2.csv") # EXECUTE EXTERNAL PROGRAM TO PRODUCE genotypicSimilarity-subset-keijzer4_0.4.csv.gz genotypicMat <- read.csv2(file="genotypicSimilarity-subset-keijzer4_0.2.csv.gz", sep=" ", header = FALSE) genotypicDist <- as.dist(genotypicMat) #mdsResult <- cmdscale(genotypicDist) #plot(mdsResult) #mapped <- tsne(genotypicDist, perplexity = 30, max_iter = 1000) mapped <- Rtsne(as.matrix(genotypicMat), dims = 2, perpexity=1, theta=0.01, max_iter=1000, verbose=TRUE, is_distance=TRUE) plot(mapped$Y) # join back with original for clusters and qualities library(dbscan) genotypicClusters <- hdbscan(x=as.dist(genotypicMat), minPts = 10) kmeansClusters <- kmeans(x=mapped$Y, centers = 10 ) genotypicClusters <- hdbscan(x=mapped$Y, minPts = 10, gen_hdbscan_tree = TRUE) hdbscanClusters <- cutree(genotypicClusters$hc, h=8) genotypicClusters <- hclust(as.dist(genotypicMat)) plot(genotypicClusters) temp <- cutree(genotypicClusters, k=3) plot(genotypicClusters) #(test <- identify(genotypicClusters)) library(cluster) pamClusters <- pam(x= genotypicMat, k = 2, diss=TRUE, metric=NULL, cluster.only = TRUE, trace.lev=1, pamonce=2) mapped_qualities <- data.frame(mapped$Y, m[m$q>0.2, c(3,4)], genCluster = genotypicClusters$cluster, evalData[evalData$R2.keijzer4>0.2, ]) ggplot(mapped_qualities, aes(x=X1, y=X2, c=genCluster)) + geom_point(aes(color=hdbscanClusters)) + #geom_point(aes(color=R2.keijzer4)) + theme(legend.position = "none") + labs(color="R²", x="", y="") + scale_color_distiller(type="qual", palette="Set1") #scale_color_gradient2(low="blue", mid="yellow", high="red", midpoint=0.6) ggsave("genotypic_clusters_keijzer4_02.pdf", device=pdf, width=8, height=4) #histogram of R² values ggplot(evalData, aes(x=R2.keijzer4)) + theme_classic() + geom_histogram(binwidth=0.01) + labs(x="R² (Keijzer-4)", y="Number of expressions") + scale_y_log10() ggsave("quality_distribution_keijzer4.pdf", device=pdf)