- Timestamp:
- 03/15/18 10:41:20 (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2886_SymRegGrammarEnumeration/ExpressionClustering/Program.cs
r15840 r15842 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Drawing; 3 4 using System.IO; 4 5 using System.Linq; 6 using HeuristicLab.Analysis; 7 using HeuristicLab.Analysis.Views; 8 using System.Windows.Forms; 5 9 6 10 namespace ExpressionClustering { 7 11 class Program { 8 12 private static readonly string folder = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory); 13 private static readonly string clusterFolder = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "clusters"); 9 14 private static readonly string distinctSentencesFileName = Path.Combine(folder, @"distinctSentences.csv"); 10 15 private static readonly string allSentencesFileName = Path.Combine(folder, "allSentences.csv"); 11 16 private static readonly string outputFileName = Path.Combine(folder, "evaluations.csv.gz"); 12 private static readonly string columnDelimiter = ";";13 17 private static int N = 100; 14 18 private static double[] evalBuf = new double[N]; … … 31 35 static void Main(string[] args) { 32 36 33 TestFLANN();34 35 37 var hash2Sentences = new Dictionary<string, List<string>>(); 38 // for debugging only 39 var postfix2infix = new Dictionary<string, string>(); 36 40 37 41 … … 52 56 } 53 57 ls.Add(toks[1]); 58 postfix2infix.Add(toks[1], toks[0]); 54 59 nSentences++; 55 60 } … … 59 64 } 60 65 61 using (var writer = new StreamWriter(new System.IO.Compression.GZipStream(new FileStream(outputFileName, FileMode.OpenOrCreate), System.IO.Compression.CompressionMode.Compress))) 62 foreach (var kvp in hash2Sentences) { 63 var ls = kvp.Value; 64 var sentence = FindShortest(ls); 65 Evaluate(sentence, xs, evalBuf); 66 Add(writer, sentence, evalBuf); 67 } 68 } 69 70 private static void TestFLANN() { 71 var rand = new Random(1234); 72 var dim = 100; 73 var N = 10000; 74 var dataset = new List<double[]>(); 75 for(int i=0;i<N;i++) { 76 var x = new double[dim]; 77 for (int j = 0; j < dim; j++) x[j] = rand.NextDouble(); 78 dataset.Add(x); 79 } 80 List<int> nnResults; 81 List<double> nnDists; 82 Flann.FindNearestNeighbours(dataset, dataset, out nnResults, out nnDists); 83 84 } 66 List<double[]> functions = new List<double[]>(); 67 List<string> sentences = new List<string>(); 68 List<double> qualities = new List<double>(); 69 70 foreach (var kvp in hash2Sentences) { 71 var ls = kvp.Value; 72 var sentence = FindShortest(ls); 73 Evaluate(sentence, xs, evalBuf); 74 if (evalBuf.Any(ei => float.IsInfinity((float)ei) || float.IsNaN((float)ei))) { 75 Console.WriteLine("skipping {0} {1}", evalBuf.Average(), sentence); 76 } else { 77 try { 78 Scale(evalBuf, ys); 79 functions.Add((double[])evalBuf.Clone()); 80 sentences.Add(sentence); 81 HeuristicLab.Problems.DataAnalysis.OnlineCalculatorError error; 82 qualities.Add(HeuristicLab.Problems.DataAnalysis.OnlinePearsonsRSquaredCalculator.Calculate(evalBuf, ys, out error)); 83 } catch (ArgumentException e) { 84 // scaling failed 85 } 86 } 87 } 88 89 List<int> clusters; 90 List<double> distances; 91 Flann.FindClusters(functions, out clusters, out distances, 100); 92 93 // output all clusters and functions 94 using (var writer = new StreamWriter(new System.IO.Compression.GZipStream(new FileStream(outputFileName, FileMode.OpenOrCreate), System.IO.Compression.CompressionMode.Compress))) { 95 for (int i = 0; i < functions.Count; i++) { 96 writer.WriteLine("{0};{1};{2};{3};{4};{5}", clusters[i], distances[i], qualities[i], sentences[i], postfix2infix[sentences[i]], string.Join(";", functions[i].Select(fi => fi.ToString()))); 97 } 98 } 99 100 var funClusters = functions.Zip(clusters, (f, c) => Tuple.Create(f, c)).GroupBy(t => t.Item2); 101 var dtView = new DataTableView(); 102 dtView.Size = new Size(800, 600); 103 104 foreach (var funCluster in funClusters) { 105 // draw the functions for each cluster into a separate png 106 var dtName = string.Format("R² {0}", Enumerable.Range(0, qualities.Count).Where(idx => clusters[idx] == funCluster.Key).Select(idx => qualities[idx]).Average()); 107 var dt = new DataTable(dtName, dtName); 108 var rows = new List<DataRow>(); 109 int i = 0; 110 foreach (var fun in funCluster.Select(t => t.Item1)) { 111 var name = i.ToString(); 112 var dr = new DataRow(name, name, fun); 113 rows.Add(dr); 114 i++; 115 } 116 dt.Rows.AddRange(rows); 117 dtView.Content = dt; 118 using (var bm = new Bitmap(800, 600)) { 119 dtView.DrawToBitmap(bm, new Rectangle(0, 0, 800, 600)); 120 bm.Save(Path.Combine(clusterFolder, string.Format("cluster_{0,3}.png", funCluster.Key))); 121 } 122 } 123 } 124 125 85 126 86 127 private static string FindShortest(List<string> ls) { … … 91 132 return minElem; 92 133 } 134 93 135 94 136 … … 102 144 HeuristicLab.Problems.DataAnalysis.OnlineLinearScalingParameterCalculator.Calculate(evalBuf, ys, out alpha, out beta, out error); 103 145 if (error != HeuristicLab.Problems.DataAnalysis.OnlineCalculatorError.None) { 104 alpha = 0.0; 105 beta = 1.0; 106 Console.WriteLine("WARNING: error in scaling"); 146 throw new ArgumentException(); 107 147 } 108 148 … … 110 150 evalBuf[i] = beta * evalBuf[i] + alpha; 111 151 } 112 113 114 //115 // var meanE = 0.0;116 // var meanE2 = 0.0;117 // var meanY = 0.0;118 // var meanY2 = 0.0;119 // for(int i=0;i<evalBuf.Length;i++) {120 // var deltaE = evalBuf[i] - meanE;121 // meanE += deltaE / (i+1);122 // var deltaE2 = evalBuf[i] - meanE;123 // meanE2 += deltaE * deltaE2;124 //125 //126 // var deltaY = ys[i] - meanY;127 // meanY += deltaY / (i + 1);128 // var deltaY2 = ys[i] - meanY;129 // meanY2 += deltaY * deltaY2;130 // TODO COVARIANCE131 // Linear Scaling: b = cov(y,e) / var(e); a = meanY - b*meanE;132 // }133 //134 // var varE = meanE2 / evalBuf.Length;135 // var varY = meanY2 / evalBuf.Length;136 // Console.WriteLine("{0} {1} {2} {3}", meanE, evalBuf.Average(), meanY, ys.Average());137 // Console.WriteLine("{0} {1}", varE, varY);138 //139 // var factor = varY / varE;140 // for(int i=0;i<evalBuf.Length;i++) {141 // evalBuf[i] = (evalBuf[i] - meanE) * factor + meanY;142 // }143 152 } 144 153 … … 158 167 var b = stack[topOfStack - 1]; 159 168 for (int i = 0; i < N; i++) { 160 a[i] += b[i]; 161 } 169 b[i] += a[i]; 170 } 171 topOfStack--; 162 172 break; 163 173 } … … 167 177 var b = stack[topOfStack - 1]; 168 178 for (int i = 0; i < N; i++) { 169 a[i] *= b[i]; 170 } 179 b[i] *= a[i]; 180 } 181 topOfStack--; 171 182 break; 172 183 } … … 231 242 } 232 243 #endregion 233 234 235 236 // add the line with it's evaluation result to a data structure for clustering237 private static void Add(StreamWriter writer, string line, double[] evalBuf) {238 var avg = evalBuf.Average();239 if (double.IsNaN(avg) || double.IsInfinity(avg)) {240 Console.WriteLine("skipping {0} {1}", evalBuf.Average(), line);241 } else {242 Scale(evalBuf, ys);243 244 writer.WriteLine(string.Join("\t", evalBuf.Select(ei => ei.ToString(System.Globalization.CultureInfo.InvariantCulture))));245 }246 }247 244 } 248 245 }
Note: See TracChangeset
for help on using the changeset viewer.