Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/ExpressionClustering_R/genotypic_similarity.r @ 16188

Last change on this file since 16188 was 16174, checked in by gkronber, 7 years ago

#2886 made several changes to clustering scripts for GPTP paper

File size: 2.2 KB
Line 
1library(Rtsne) # Load package
2
3setwd("C:/reps/HEAL/Publications-2018-GPTP/data")
4#write subset of expressions
5write.csv2(x=evalData[evalData$R2.keijzer4>0.2,c(2,4,9)], file = "evalData-subset-keijzer4_0.2.csv")
6
7
8
9# EXECUTE EXTERNAL PROGRAM TO PRODUCE genotypicSimilarity-subset-keijzer4_0.4.csv.gz
10genotypicMat <- read.csv2(file="genotypicSimilarity-subset-keijzer4_0.2.csv.gz", sep=" ", header = FALSE)
11
12genotypicDist <- as.dist(genotypicMat)
13
14
15
16#mdsResult <- cmdscale(genotypicDist)
17#plot(mdsResult)
18
19#mapped <- tsne(genotypicDist, perplexity = 30, max_iter = 1000)
20
21mapped <- Rtsne(as.matrix(genotypicMat), dims = 2, perpexity=1, theta=0.01, max_iter=1000, verbose=TRUE, is_distance=TRUE)
22
23plot(mapped$Y)
24
25
26# join back with original for clusters and qualities
27library(dbscan)
28genotypicClusters <- hdbscan(x=as.dist(genotypicMat), minPts = 10)
29kmeansClusters <- kmeans(x=mapped$Y, centers = 10 )
30genotypicClusters <- hdbscan(x=mapped$Y, minPts = 10, gen_hdbscan_tree = TRUE)
31hdbscanClusters <- cutree(genotypicClusters$hc, h=8)
32
33genotypicClusters <- hclust(as.dist(genotypicMat))
34plot(genotypicClusters)
35temp <- cutree(genotypicClusters, k=3)
36plot(genotypicClusters)
37#(test <- identify(genotypicClusters))
38
39library(cluster)
40pamClusters <- pam(x= genotypicMat, k = 2, diss=TRUE, metric=NULL, cluster.only = TRUE, trace.lev=1, pamonce=2)
41
42mapped_qualities <- data.frame(mapped$Y,  m[m$q>0.2, c(3,4)], genCluster = genotypicClusters$cluster, evalData[evalData$R2.keijzer4>0.2, ])
43ggplot(mapped_qualities, aes(x=X1, y=X2, c=genCluster)) +
44  geom_point(aes(color=hdbscanClusters)) +
45  #geom_point(aes(color=R2.keijzer4)) +
46  theme(legend.position = "none") +
47  labs(color="R²", x="", y="") +
48  scale_color_distiller(type="qual", palette="Set1")
49  #scale_color_gradient2(low="blue", mid="yellow", high="red", midpoint=0.6)
50ggsave("genotypic_clusters_keijzer4_02.pdf", device=pdf, width=8, height=4)
51
52
53#histogram of R² values
54ggplot(evalData, aes(x=R2.keijzer4)) +
55  theme_classic() +
56  geom_histogram(binwidth=0.01) +
57  labs(x="R² (Keijzer-4)", y="Number of expressions") +
58  scale_y_log10()
59
60ggsave("quality_distribution_keijzer4.pdf", device=pdf)
Note: See TracBrowser for help on using the repository browser.