- Timestamp:
- 03/15/18 10:41:20 (7 years ago)
- Location:
- branches/2886_SymRegGrammarEnumeration/ExpressionClustering
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2886_SymRegGrammarEnumeration/ExpressionClustering/ExpressionClustering.csproj
r15840 r15842 38 38 <HintPath>..\..\..\trunk\bin\ALGLIB-3.7.0.dll</HintPath> 39 39 </Reference> 40 <Reference Include="HeuristicLab.Analysis-3.3"> 41 <HintPath>..\..\..\trunk\bin\HeuristicLab.Analysis-3.3.dll</HintPath> 42 </Reference> 43 <Reference Include="HeuristicLab.Analysis.Views-3.3"> 44 <HintPath>..\..\..\trunk\bin\HeuristicLab.Analysis.Views-3.3.dll</HintPath> 45 </Reference> 46 <Reference Include="HeuristicLab.Collections-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 47 <SpecificVersion>False</SpecificVersion> 48 <HintPath>..\..\..\trunk\bin\HeuristicLab.Collections-3.3.dll</HintPath> 49 </Reference> 40 50 <Reference Include="HeuristicLab.Common-3.3"> 41 51 <HintPath>..\..\..\trunk\bin\HeuristicLab.Common-3.3.dll</HintPath> 52 </Reference> 53 <Reference Include="HeuristicLab.Core-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec" /> 54 <Reference Include="HeuristicLab.Core.Views-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 55 <SpecificVersion>False</SpecificVersion> 56 <HintPath>..\..\..\trunk\bin\HeuristicLab.Core.Views-3.3.dll</HintPath> 57 </Reference> 58 <Reference Include="HeuristicLab.MainForm-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 59 <SpecificVersion>False</SpecificVersion> 60 <HintPath>..\..\..\trunk\bin\HeuristicLab.MainForm-3.3.dll</HintPath> 61 </Reference> 62 <Reference Include="HeuristicLab.MainForm.WindowsForms-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 63 <SpecificVersion>False</SpecificVersion> 64 <HintPath>..\..\..\trunk\bin\HeuristicLab.MainForm.WindowsForms-3.3.dll</HintPath> 42 65 </Reference> 43 66 <Reference Include="HeuristicLab.Problems.DataAnalysis-3.4"> 44 67 <HintPath>..\..\..\trunk\bin\HeuristicLab.Problems.DataAnalysis-3.4.dll</HintPath> 45 68 </Reference> 69 <Reference Include="HeuristicLab.Visualization.ChartControlsExtensions-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 70 <SpecificVersion>False</SpecificVersion> 71 <HintPath>..\..\..\trunk\bin\HeuristicLab.Visualization.ChartControlsExtensions-3.3.dll</HintPath> 72 </Reference> 46 73 <Reference Include="System" /> 47 74 <Reference Include="System.Core" /> 75 <Reference Include="System.Drawing" /> 76 <Reference Include="System.Windows" /> 77 <Reference Include="System.Windows.Forms" /> 48 78 <Reference Include="System.Xml.Linq" /> 49 79 <Reference Include="System.Data.DataSetExtensions" /> -
branches/2886_SymRegGrammarEnumeration/ExpressionClustering/Flann.cs
r15841 r15842 86 86 public int checks; /* how many leafs (features) to check in one search */ 87 87 public float cb_index; /* cluster boundary index. Used when searching the kmeans tree */ 88 public float eps; 88 public float eps; 89 89 90 90 /* kdtree index parameters */ … … 153 153 [DllImport("flann-1.7.1.dll")] 154 154 public static extern int flann_compute_cluster_centers(float[] dataset, int rows, int cols, int clusters, float[] result, ref FLANNParameters flann_params); 155 156 public static int FindClusters(List<double[]> dataset, out List<int> results, out List<double> distances, int nClusters) { 157 var _nRows = dataset.Count; 158 var _dists = new float[_nRows]; 159 var _result = new int[_nRows]; 160 var _dim = dataset.First().Length; 161 FLANNParameters p = DEFAULT_FLANN_PARAMETERS; 162 p.algorithm = flann_algorithm_t.FLANN_INDEX_LINEAR; 163 p.centers_init = flann_centers_init_t.FLANN_CENTERS_RANDOM; 164 p.target_precision = 0.9f; 165 p.log_level = flann_log_level_t.FLANN_LOG_INFO; 166 // copy training set 167 var _ds = new float[dataset.Count * _dim]; 168 var i = 0; 169 foreach (var e in dataset) { 170 for (int d = 0; d < _dim; d++) { 171 _ds[i++] = (float)e[d]; 172 } 173 } 174 175 flann_set_distance_type(flann_distance_t.FLANN_DIST_EUCLIDEAN, 0); 176 177 float[] centers = new float[nClusters * _dim]; 178 int actualClusters = flann_compute_cluster_centers(_ds, rows: dataset.Count, cols: _dim, clusters: nClusters, result: centers, flann_params: ref p); 179 180 var res = flann_find_nearest_neighbors(centers, actualClusters, _dim, _ds, _nRows, _result, _dists, 1, ref p); 181 182 183 distances = _dists.Select(fi => (double)fi).ToList(); 184 results = _result.ToList(); 185 return res; 186 } 155 187 156 188 public static int FindNearestNeighbours(List<double[]> dataset, List<double[]> queryset, out List<int> results, out List<double> distances, int nearestNeighbours = 3) { … … 176 208 flann_set_distance_type(flann_distance_t.FLANN_DIST_EUCLIDEAN, 0); 177 209 178 int nClusters = 100; 179 float[] centers = new float[nClusters * _dim]; 180 flann_compute_cluster_centers(_ds, rows: dataset.Count, cols: _dim, clusters: nClusters, result: centers, flann_params: ref p); 181 182 float speedup = -1.0f; 210 // int nClusters = 100; 211 // float[] centers = new float[nClusters * _dim]; 212 // flann_compute_cluster_centers(_ds, rows: dataset.Count, cols: _dim, clusters: nClusters, result: centers, flann_params: ref p); 213 214 215 // for each point in the training set find the nearest cluster 216 217 // float speedup = -1.0f; 183 218 // _ds must be a rows × cols matrix stored in row-major order (one feature on each row) 184 219 //var index = flann_build_index(_ds, rows: dataset.Count, cols: _dim, speedup: ref speedup, flann_params: ref p); 185 220 186 221 187 222 // copy testset 188 223 var _testset = new float[_tRows * _dim]; 189 224 i = 0; 190 for (int d = 0; d < _dim; d++) {191 foreach (var e in queryset) {225 for (int d = 0; d < _dim; d++) { 226 foreach (var e in queryset) { 192 227 _testset[i++] = (float)e[d]; 193 228 } -
branches/2886_SymRegGrammarEnumeration/ExpressionClustering/Program.cs
r15840 r15842 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Drawing; 3 4 using System.IO; 4 5 using System.Linq; 6 using HeuristicLab.Analysis; 7 using HeuristicLab.Analysis.Views; 8 using System.Windows.Forms; 5 9 6 10 namespace ExpressionClustering { 7 11 class Program { 8 12 private static readonly string folder = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory); 13 private static readonly string clusterFolder = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "clusters"); 9 14 private static readonly string distinctSentencesFileName = Path.Combine(folder, @"distinctSentences.csv"); 10 15 private static readonly string allSentencesFileName = Path.Combine(folder, "allSentences.csv"); 11 16 private static readonly string outputFileName = Path.Combine(folder, "evaluations.csv.gz"); 12 private static readonly string columnDelimiter = ";";13 17 private static int N = 100; 14 18 private static double[] evalBuf = new double[N]; … … 31 35 static void Main(string[] args) { 32 36 33 TestFLANN();34 35 37 var hash2Sentences = new Dictionary<string, List<string>>(); 38 // for debugging only 39 var postfix2infix = new Dictionary<string, string>(); 36 40 37 41 … … 52 56 } 53 57 ls.Add(toks[1]); 58 postfix2infix.Add(toks[1], toks[0]); 54 59 nSentences++; 55 60 } … … 59 64 } 60 65 61 using (var writer = new StreamWriter(new System.IO.Compression.GZipStream(new FileStream(outputFileName, FileMode.OpenOrCreate), System.IO.Compression.CompressionMode.Compress))) 62 foreach (var kvp in hash2Sentences) { 63 var ls = kvp.Value; 64 var sentence = FindShortest(ls); 65 Evaluate(sentence, xs, evalBuf); 66 Add(writer, sentence, evalBuf); 67 } 68 } 69 70 private static void TestFLANN() { 71 var rand = new Random(1234); 72 var dim = 100; 73 var N = 10000; 74 var dataset = new List<double[]>(); 75 for(int i=0;i<N;i++) { 76 var x = new double[dim]; 77 for (int j = 0; j < dim; j++) x[j] = rand.NextDouble(); 78 dataset.Add(x); 79 } 80 List<int> nnResults; 81 List<double> nnDists; 82 Flann.FindNearestNeighbours(dataset, dataset, out nnResults, out nnDists); 83 84 } 66 List<double[]> functions = new List<double[]>(); 67 List<string> sentences = new List<string>(); 68 List<double> qualities = new List<double>(); 69 70 foreach (var kvp in hash2Sentences) { 71 var ls = kvp.Value; 72 var sentence = FindShortest(ls); 73 Evaluate(sentence, xs, evalBuf); 74 if (evalBuf.Any(ei => float.IsInfinity((float)ei) || float.IsNaN((float)ei))) { 75 Console.WriteLine("skipping {0} {1}", evalBuf.Average(), sentence); 76 } else { 77 try { 78 Scale(evalBuf, ys); 79 functions.Add((double[])evalBuf.Clone()); 80 sentences.Add(sentence); 81 HeuristicLab.Problems.DataAnalysis.OnlineCalculatorError error; 82 qualities.Add(HeuristicLab.Problems.DataAnalysis.OnlinePearsonsRSquaredCalculator.Calculate(evalBuf, ys, out error)); 83 } catch (ArgumentException e) { 84 // scaling failed 85 } 86 } 87 } 88 89 List<int> clusters; 90 List<double> distances; 91 Flann.FindClusters(functions, out clusters, out distances, 100); 92 93 // output all clusters and functions 94 using (var writer = new StreamWriter(new System.IO.Compression.GZipStream(new FileStream(outputFileName, FileMode.OpenOrCreate), System.IO.Compression.CompressionMode.Compress))) { 95 for (int i = 0; i < functions.Count; i++) { 96 writer.WriteLine("{0};{1};{2};{3};{4};{5}", clusters[i], distances[i], qualities[i], sentences[i], postfix2infix[sentences[i]], string.Join(";", functions[i].Select(fi => fi.ToString()))); 97 } 98 } 99 100 var funClusters = functions.Zip(clusters, (f, c) => Tuple.Create(f, c)).GroupBy(t => t.Item2); 101 var dtView = new DataTableView(); 102 dtView.Size = new Size(800, 600); 103 104 foreach (var funCluster in funClusters) { 105 // draw the functions for each cluster into a separate png 106 var dtName = string.Format("R² {0}", Enumerable.Range(0, qualities.Count).Where(idx => clusters[idx] == funCluster.Key).Select(idx => qualities[idx]).Average()); 107 var dt = new DataTable(dtName, dtName); 108 var rows = new List<DataRow>(); 109 int i = 0; 110 foreach (var fun in funCluster.Select(t => t.Item1)) { 111 var name = i.ToString(); 112 var dr = new DataRow(name, name, fun); 113 rows.Add(dr); 114 i++; 115 } 116 dt.Rows.AddRange(rows); 117 dtView.Content = dt; 118 using (var bm = new Bitmap(800, 600)) { 119 dtView.DrawToBitmap(bm, new Rectangle(0, 0, 800, 600)); 120 bm.Save(Path.Combine(clusterFolder, string.Format("cluster_{0,3}.png", funCluster.Key))); 121 } 122 } 123 } 124 125 85 126 86 127 private static string FindShortest(List<string> ls) { … … 91 132 return minElem; 92 133 } 134 93 135 94 136 … … 102 144 HeuristicLab.Problems.DataAnalysis.OnlineLinearScalingParameterCalculator.Calculate(evalBuf, ys, out alpha, out beta, out error); 103 145 if (error != HeuristicLab.Problems.DataAnalysis.OnlineCalculatorError.None) { 104 alpha = 0.0; 105 beta = 1.0; 106 Console.WriteLine("WARNING: error in scaling"); 146 throw new ArgumentException(); 107 147 } 108 148 … … 110 150 evalBuf[i] = beta * evalBuf[i] + alpha; 111 151 } 112 113 114 //115 // var meanE = 0.0;116 // var meanE2 = 0.0;117 // var meanY = 0.0;118 // var meanY2 = 0.0;119 // for(int i=0;i<evalBuf.Length;i++) {120 // var deltaE = evalBuf[i] - meanE;121 // meanE += deltaE / (i+1);122 // var deltaE2 = evalBuf[i] - meanE;123 // meanE2 += deltaE * deltaE2;124 //125 //126 // var deltaY = ys[i] - meanY;127 // meanY += deltaY / (i + 1);128 // var deltaY2 = ys[i] - meanY;129 // meanY2 += deltaY * deltaY2;130 // TODO COVARIANCE131 // Linear Scaling: b = cov(y,e) / var(e); a = meanY - b*meanE;132 // }133 //134 // var varE = meanE2 / evalBuf.Length;135 // var varY = meanY2 / evalBuf.Length;136 // Console.WriteLine("{0} {1} {2} {3}", meanE, evalBuf.Average(), meanY, ys.Average());137 // Console.WriteLine("{0} {1}", varE, varY);138 //139 // var factor = varY / varE;140 // for(int i=0;i<evalBuf.Length;i++) {141 // evalBuf[i] = (evalBuf[i] - meanE) * factor + meanY;142 // }143 152 } 144 153 … … 158 167 var b = stack[topOfStack - 1]; 159 168 for (int i = 0; i < N; i++) { 160 a[i] += b[i]; 161 } 169 b[i] += a[i]; 170 } 171 topOfStack--; 162 172 break; 163 173 } … … 167 177 var b = stack[topOfStack - 1]; 168 178 for (int i = 0; i < N; i++) { 169 a[i] *= b[i]; 170 } 179 b[i] *= a[i]; 180 } 181 topOfStack--; 171 182 break; 172 183 } … … 231 242 } 232 243 #endregion 233 234 235 236 // add the line with it's evaluation result to a data structure for clustering237 private static void Add(StreamWriter writer, string line, double[] evalBuf) {238 var avg = evalBuf.Average();239 if (double.IsNaN(avg) || double.IsInfinity(avg)) {240 Console.WriteLine("skipping {0} {1}", evalBuf.Average(), line);241 } else {242 Scale(evalBuf, ys);243 244 writer.WriteLine(string.Join("\t", evalBuf.Select(ei => ei.ToString(System.Globalization.CultureInfo.InvariantCulture))));245 }246 }247 244 } 248 245 }
Note: See TracChangeset
for help on using the changeset viewer.