1 | library(Rtsne) # Load package
|
---|
2 |
|
---|
3 | setwd("C:/reps/HEAL/Publications-2018-GPTP/data")
|
---|
4 | #write subset of expressions
|
---|
5 | write.csv2(x=evalData[evalData$R2.keijzer4>0.2,c(2,4,9)], file = "evalData-subset-keijzer4_0.2.csv")
|
---|
6 |
|
---|
7 |
|
---|
8 |
|
---|
9 | # EXECUTE EXTERNAL PROGRAM TO PRODUCE genotypicSimilarity-subset-keijzer4_0.4.csv.gz
|
---|
10 | genotypicMat <- read.csv2(file="genotypicSimilarity-subset-keijzer4_0.2.csv.gz", sep=" ", header = FALSE)
|
---|
11 |
|
---|
12 | genotypicDist <- as.dist(genotypicMat)
|
---|
13 |
|
---|
14 |
|
---|
15 |
|
---|
16 | #mdsResult <- cmdscale(genotypicDist)
|
---|
17 | #plot(mdsResult)
|
---|
18 |
|
---|
19 | #mapped <- tsne(genotypicDist, perplexity = 30, max_iter = 1000)
|
---|
20 |
|
---|
21 | mapped <- Rtsne(as.matrix(genotypicMat), dims = 2, perpexity=1, theta=0.01, max_iter=1000, verbose=TRUE, is_distance=TRUE)
|
---|
22 |
|
---|
23 | plot(mapped$Y)
|
---|
24 |
|
---|
25 |
|
---|
26 | # join back with original for clusters and qualities
|
---|
27 | library(dbscan)
|
---|
28 | genotypicClusters <- hdbscan(x=as.dist(genotypicMat), minPts = 10)
|
---|
29 | kmeansClusters <- kmeans(x=mapped$Y, centers = 10 )
|
---|
30 | genotypicClusters <- hdbscan(x=mapped$Y, minPts = 10, gen_hdbscan_tree = TRUE)
|
---|
31 | hdbscanClusters <- cutree(genotypicClusters$hc, h=8)
|
---|
32 |
|
---|
33 | genotypicClusters <- hclust(as.dist(genotypicMat))
|
---|
34 | plot(genotypicClusters)
|
---|
35 | temp <- cutree(genotypicClusters, k=3)
|
---|
36 | plot(genotypicClusters)
|
---|
37 | #(test <- identify(genotypicClusters))
|
---|
38 |
|
---|
39 | library(cluster)
|
---|
40 | pamClusters <- pam(x= genotypicMat, k = 2, diss=TRUE, metric=NULL, cluster.only = TRUE, trace.lev=1, pamonce=2)
|
---|
41 |
|
---|
42 | mapped_qualities <- data.frame(mapped$Y, m[m$q>0.2, c(3,4)], genCluster = genotypicClusters$cluster, evalData[evalData$R2.keijzer4>0.2, ])
|
---|
43 | ggplot(mapped_qualities, aes(x=X1, y=X2, c=genCluster)) +
|
---|
44 | geom_point(aes(color=hdbscanClusters)) +
|
---|
45 | #geom_point(aes(color=R2.keijzer4)) +
|
---|
46 | theme(legend.position = "none") +
|
---|
47 | labs(color="R²", x="", y="") +
|
---|
48 | scale_color_distiller(type="qual", palette="Set1")
|
---|
49 | #scale_color_gradient2(low="blue", mid="yellow", high="red", midpoint=0.6)
|
---|
50 | ggsave("genotypic_clusters_keijzer4_02.pdf", device=pdf, width=8, height=4)
|
---|
51 |
|
---|
52 |
|
---|
53 | #histogram of R² values
|
---|
54 | ggplot(evalData, aes(x=R2.keijzer4)) +
|
---|
55 | theme_classic() +
|
---|
56 | geom_histogram(binwidth=0.01) +
|
---|
57 | labs(x="R² (Keijzer-4)", y="Number of expressions") +
|
---|
58 | scale_y_log10()
|
---|
59 |
|
---|
60 | ggsave("quality_distribution_keijzer4.pdf", device=pdf)
|
---|