Changeset 15903
- Timestamp:
- 04/13/18 16:48:56 (7 years ago)
- Location:
- branches/2886_SymRegGrammarEnumeration
- Files:
-
- 3 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2886_SymRegGrammarEnumeration/ExpressionClustering/ExpressionClustering.csproj
r15842 r15903 56 56 <HintPath>..\..\..\trunk\bin\HeuristicLab.Core.Views-3.3.dll</HintPath> 57 57 </Reference> 58 <Reference Include="HeuristicLab.Data-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec" /> 59 <Reference Include="HeuristicLab.Encodings.SymbolicExpressionTreeEncoding-3.4"> 60 <HintPath>..\..\..\trunk\bin\HeuristicLab.Encodings.SymbolicExpressionTreeEncoding-3.4.dll</HintPath> 61 </Reference> 58 62 <Reference Include="HeuristicLab.MainForm-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 59 63 <SpecificVersion>False</SpecificVersion> … … 66 70 <Reference Include="HeuristicLab.Problems.DataAnalysis-3.4"> 67 71 <HintPath>..\..\..\trunk\bin\HeuristicLab.Problems.DataAnalysis-3.4.dll</HintPath> 72 </Reference> 73 <Reference Include="HeuristicLab.Problems.DataAnalysis.Symbolic-3.4"> 74 <HintPath>..\..\..\trunk\bin\HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.dll</HintPath> 68 75 </Reference> 69 76 <Reference Include="HeuristicLab.Visualization.ChartControlsExtensions-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> -
branches/2886_SymRegGrammarEnumeration/ExpressionClustering/Program.cs
r15842 r15903 1 1 using System; 2 using System.Collections; 2 3 using System.Collections.Generic; 3 4 using System.Drawing; … … 6 7 using HeuristicLab.Analysis; 7 8 using HeuristicLab.Analysis.Views; 8 using System.Windows.Forms; 9 9 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; 10 using HeuristicLab.Problems.DataAnalysis; 11 using HeuristicLab.Problems.DataAnalysis.Symbolic; 12 13 // Evaluates sentences on randomly generated data 10 14 namespace ExpressionClustering { 11 15 class Program { 12 16 private static readonly string folder = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory); 13 17 private static readonly string clusterFolder = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "clusters"); 14 private static readonly string distinctSentencesFileName = Path.Combine(folder, @"distinctSentences .csv");15 private static readonly string allSentencesFileName = Path.Combine(folder, "allSentences .csv");16 private static readonly string outputFileName = Path.Combine(folder, "evaluations .csv.gz");18 private static readonly string distinctSentencesFileName = Path.Combine(folder, @"distinctSentences_2018-04-13_09-52_TreeSize-10.csv"); 19 private static readonly string allSentencesFileName = Path.Combine(folder, "allSentences_2018-04-13_09-52_TreeSize-10.csv"); 20 private static readonly string outputFileName = Path.Combine(folder, "evaluations_2018-04-13_09-52_TreeSize-10.csv.gz"); 17 21 private static int N = 100; 18 22 private static double[] evalBuf = new double[N]; … … 22 26 private static double max = +5.0; 23 27 private static double[] xs = Enumerable.Range(1, N).Select(xi => ((double)xi / N) * (max - min) + min).ToArray(); // input 24 private static double[] ys = xs.Select(xi => 1.0 / (1 + Math.Pow(xi, -4))).ToArray(); // target (necessary for scaling and clustering 28 29 private static double[] ys_pagie = xs.Select(xi => 1.0 / (1 + Math.Pow(xi, -4))).ToArray(); // a potential target (not used for search) 30 31 // x³ * exp(-x) * cos(x) * sin(x) * (sin(x)² * cos(x) - 1) 32 // for keijzer x should be in scale 0 - 10 inclusive 33 private static double[] ys_keijzer4 = xs 34 .Select(xi => xi + 10.0) // scale 35 .Select(xi => xi * xi * xi + Math.Exp(-xi) * Math.Cos(xi) * Math.Sin(xi) * (Math.Sin(xi) * Math.Sin(xi) * Math.Cos(xi) - 1)) 36 .ToArray(); 25 37 26 38 … … 35 47 static void Main(string[] args) { 36 48 37 var hash2Sentences = new Dictionary<string, List<string>>(); 38 // for debugging only 49 var hash2Postfix = new Dictionary<string, List<string>>(); 39 50 var postfix2infix = new Dictionary<string, string>(); 40 41 42 51 43 52 // read all sentences and determine shortest sentences … … 49 58 var line = reader.ReadLine(); 50 59 var toks = line.Split(';'); 51 var hash = toks[2]; 52 List<string> ls; 53 if (!hash2Sentences.TryGetValue(hash, out ls)) { 54 ls = new List<string>(1); 55 hash2Sentences.Add(hash, ls); 60 var hash = toks[0]; 61 var length = toks[1]; 62 var postfix = toks[2]; 63 var infix = toks[3]; 64 List<string> alternativesList; 65 if (!hash2Postfix.TryGetValue(hash, out alternativesList)) { 66 alternativesList = new List<string>(1); 67 hash2Postfix.Add(hash, alternativesList); 56 68 } 57 ls.Add(toks[1]);58 postfix2infix.Add( toks[1], toks[0]);69 alternativesList.Add(postfix); 70 postfix2infix.Add(postfix, infix); 59 71 nSentences++; 60 72 } 61 73 62 Console.WriteLine("{0} {1}", nSentences, hash2 Sentences.Count);74 Console.WriteLine("{0} {1}", nSentences, hash2Postfix.Count); 63 75 //Evaluate(toks[1], xs, evalBuf); 64 76 } … … 66 78 List<double[]> functions = new List<double[]>(); 67 79 List<string> sentences = new List<string>(); 68 List<double> qualities = new List<double>(); 69 70 foreach (var kvp in hash2Sentences) { 80 List<double[]> qualities = new List<double[]>(); // we might have multiple target functions to which we might compare 81 82 var ds = new Dataset(new string[] { "X" }, new IList[] { xs }); 83 foreach (var kvp in hash2Postfix) { 71 84 var ls = kvp.Value; 72 85 var sentence = FindShortest(ls); 73 Evaluate(sentence, xs, evalBuf); 74 if (evalBuf.Any(ei => float.IsInfinity((float)ei) || float.IsNaN((float)ei))) { 86 //EvaluatePostfix(sentence, xs, evalBuf); 87 evalBuf = EvaluateInfix(postfix2infix[sentence], ds).ToArray(); 88 if (evalBuf.Any(ei => double.IsInfinity(ei) || double.IsNaN(ei))) { 75 89 Console.WriteLine("skipping {0} {1}", evalBuf.Average(), sentence); 76 90 } else { 77 91 try { 78 Scale(evalBuf , ys);92 Scale(evalBuf); 79 93 functions.Add((double[])evalBuf.Clone()); 80 94 sentences.Add(sentence); 81 HeuristicLab.Problems.DataAnalysis.OnlineCalculatorError error; 82 qualities.Add(HeuristicLab.Problems.DataAnalysis.OnlinePearsonsRSquaredCalculator.Calculate(evalBuf, ys, out error)); 95 OnlineCalculatorError error; 96 var r2_pagie = OnlinePearsonsRSquaredCalculator.Calculate(evalBuf, ys_pagie, out error); 97 if (error != OnlineCalculatorError.None) r2_pagie = 0.0; 98 var r2_keijzer4 = OnlinePearsonsRSquaredCalculator.Calculate(evalBuf, ys_keijzer4, out error); 99 if (error != OnlineCalculatorError.None) r2_keijzer4 = 0.0; 100 qualities.Add(new double[] { r2_pagie, r2_keijzer4}); 83 101 } catch (ArgumentException e) { 84 102 // scaling failed … … 87 105 } 88 106 107 89 108 List<int> clusters; 90 109 List<double> distances; 91 Flann.FindClusters(functions, out clusters, out distances, 100); 92 110 // DEACTIVATED FOR NOW -> USE LARGEVIS in R instead 111 // Flann.FindClusters(functions, out clusters, out distances, 100); 112 clusters = functions.Select(_ => 0).ToList(); 113 distances = functions.Select(_ => 0.0).ToList(); 114 // 93 115 // output all clusters and functions 94 116 using (var writer = new StreamWriter(new System.IO.Compression.GZipStream(new FileStream(outputFileName, FileMode.OpenOrCreate), System.IO.Compression.CompressionMode.Compress))) { 95 117 for (int i = 0; i < functions.Count; i++) { 96 writer.WriteLine("{0};{1};{2};{3};{4};{5}", clusters[i], distances[i], qualities[i], sentences[i], postfix2infix[sentences[i]], string.Join(";", functions[i].Select(fi => fi.ToString())));118 writer.WriteLine("{0};{1};{2};{3};{4};{5}", clusters[i], distances[i], string.Join(";", qualities[i]), sentences[i], postfix2infix[sentences[i]], string.Join(";", functions[i].Select(fi => fi.ToString()))); 97 119 } 98 120 } 99 100 var funClusters = functions.Zip(clusters, (f, c) => Tuple.Create(f, c)).GroupBy(t => t.Item2); 101 var dtView = new DataTableView(); 102 dtView.Size = new Size(800, 600); 103 104 foreach (var funCluster in funClusters) { 105 // draw the functions for each cluster into a separate png 106 var dtName = string.Format("R² {0}", Enumerable.Range(0, qualities.Count).Where(idx => clusters[idx] == funCluster.Key).Select(idx => qualities[idx]).Average()); 107 var dt = new DataTable(dtName, dtName); 108 var rows = new List<DataRow>(); 109 int i = 0; 110 foreach (var fun in funCluster.Select(t => t.Item1)) { 111 var name = i.ToString(); 112 var dr = new DataRow(name, name, fun); 113 rows.Add(dr); 114 i++; 115 } 116 dt.Rows.AddRange(rows); 117 dtView.Content = dt; 118 using (var bm = new Bitmap(800, 600)) { 119 dtView.DrawToBitmap(bm, new Rectangle(0, 0, 800, 600)); 120 bm.Save(Path.Combine(clusterFolder, string.Format("cluster_{0,3}.png", funCluster.Key))); 121 } 122 } 121 // 122 // var funClusters = functions.Zip(clusters, (f, c) => Tuple.Create(f, c)).GroupBy(t => t.Item2); 123 // var dtView = new DataTableView(); 124 // dtView.Size = new Size(800, 600); 125 // 126 // foreach (var funCluster in funClusters) { 127 // // draw the functions for each cluster into a separate png 128 // // var dtName = string.Format("R² {0}", Enumerable.Range(0, qualities.Count).Where(idx => clusters[idx] == funCluster.Key).Select(idx => qualities[idx]).Average()); 129 // var dtName = "Cluster"; 130 // var dt = new DataTable(dtName, dtName); 131 // var rows = new List<DataRow>(); 132 // int i = 0; 133 // foreach (var fun in funCluster.Select(t => t.Item1)) { 134 // var name = i.ToString(); 135 // var dr = new DataRow(name, name, fun); 136 // rows.Add(dr); 137 // i++; 138 // } 139 // dt.Rows.AddRange(rows); 140 // dtView.Content = dt; 141 // using (var bm = new Bitmap(800, 600)) { 142 // dtView.DrawToBitmap(bm, new Rectangle(0, 0, 800, 600)); 143 // bm.Save(Path.Combine(clusterFolder, string.Format("cluster_{0,3}.png", funCluster.Key))); 144 // } 145 // } 123 146 } 124 147 … … 136 159 137 160 #region evaluation 138 // linear scaling 161 162 // scaling to zero-mean unit variance 163 private static void Scale(double[] evalBuf) { 164 double mean; 165 double variance; 166 var max = evalBuf.Max(); 167 for (int i = 0; i < evalBuf.Length; i++) { 168 evalBuf[i] /= max; 169 } 170 171 OnlineCalculatorError error, varError; 172 OnlineMeanAndVarianceCalculator.Calculate(evalBuf, out mean, out variance, out error, out varError); 173 if(error!=OnlineCalculatorError.None || varError != OnlineCalculatorError.None) { 174 throw new ArgumentException("Cannot scale vector"); 175 } 176 177 for (int i = 0; i < evalBuf.Length; i++) { 178 evalBuf[i] = 1.0 / variance * evalBuf[i] + mean; 179 } 180 } 181 182 // linear scaling to match target 139 183 private static void Scale(double[] evalBuf, double[] ys) { 140 184 double alpha; … … 152 196 } 153 197 198 // evaluates infix expressions (using the infix parser) 199 private static IEnumerable<double> EvaluateInfix(string infixExpr, Dataset ds) { 200 var parser = new HeuristicLab.Problems.DataAnalysis.Symbolic.InfixExpressionParser(); 201 var tree = parser.Parse(infixExpr); 202 var interpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter(); 203 return interpreter.GetSymbolicExpressionTreeValues(tree, ds, Enumerable.Range(0, ds.Rows)); 204 } 205 206 /* 154 207 // evaluates postfix expressions (only for a very specific format) 155 private static void Evaluate (string postfixExpr, double[] xs, double[] evalBuf) {208 private static void EvaluatePostfix(string postfixExpr, double[] xs, double[] evalBuf) { 156 209 int topOfStack = -1; 157 210 Evaluate(postfixExpr, 0, xs, ref topOfStack); … … 159 212 } 160 213 214 161 215 private static void Evaluate(string postfixExpr, int exprPos, double[] xs, ref int topOfStack) { 162 216 while (exprPos < postfixExpr.Length) { … … 189 243 } 190 244 case 'c': { 191 // cos 192 exprPos += 4; 193 var a = stack[topOfStack]; 194 for (int i = 0; i < N; i++) { 195 a[i] = Math.Cos(a[i]); 196 } 197 break; 245 if (postfixExpr[exprPos + 1] == 'o') { 246 // cos 247 exprPos += 4; 248 var a = stack[topOfStack]; 249 for (int i = 0; i < N; i++) { 250 a[i] = Math.Cos(a[i]); 251 } 252 break; 253 } else { 254 exprPos += 2; 255 // put 1 onto top of stack // BUG! 256 topOfStack++; 257 var a = stack[topOfStack]; 258 for (int i = 0; i < N; i++) a[i] = 1.0; 259 break; 260 } 198 261 } 199 262 case 's': { … … 241 304 } 242 305 } 306 */ 243 307 #endregion 244 308 }
Note: See TracChangeset
for help on using the changeset viewer.