Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
03/15/18 10:41:20 (6 years ago)
Author:
gkronber
Message:

#2886: added clustering of functions and output of clusters, fixed bug in evaluation

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/ExpressionClustering/Program.cs

    r15840 r15842  
    11using System;
    22using System.Collections.Generic;
     3using System.Drawing;
    34using System.IO;
    45using System.Linq;
     6using HeuristicLab.Analysis;
     7using HeuristicLab.Analysis.Views;
     8using System.Windows.Forms;
    59
    610namespace ExpressionClustering {
    711  class Program {
    812    private static readonly string folder = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);
     13    private static readonly string clusterFolder = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "clusters");
    914    private static readonly string distinctSentencesFileName = Path.Combine(folder, @"distinctSentences.csv");
    1015    private static readonly string allSentencesFileName = Path.Combine(folder, "allSentences.csv");
    1116    private static readonly string outputFileName = Path.Combine(folder, "evaluations.csv.gz");
    12     private static readonly string columnDelimiter = ";";
    1317    private static int N = 100;
    1418    private static double[] evalBuf = new double[N];
     
    3135    static void Main(string[] args) {
    3236
    33       TestFLANN();
    34 
    3537      var hash2Sentences = new Dictionary<string, List<string>>();
     38      // for debugging only
     39      var postfix2infix = new Dictionary<string, string>();
    3640
    3741
     
    5256          }
    5357          ls.Add(toks[1]);
     58          postfix2infix.Add(toks[1], toks[0]);
    5459          nSentences++;
    5560        }
     
    5964      }
    6065
    61       using (var writer = new StreamWriter(new System.IO.Compression.GZipStream(new FileStream(outputFileName, FileMode.OpenOrCreate), System.IO.Compression.CompressionMode.Compress)))
    62         foreach (var kvp in hash2Sentences) {
    63           var ls = kvp.Value;
    64           var sentence = FindShortest(ls);
    65           Evaluate(sentence, xs, evalBuf);
    66           Add(writer, sentence, evalBuf);
    67         }
    68     }
    69 
    70     private static void TestFLANN() {
    71       var rand = new Random(1234);
    72       var dim = 100;
    73       var N = 10000;
    74       var dataset = new List<double[]>();
    75       for(int i=0;i<N;i++) {
    76         var x = new double[dim];
    77         for (int j = 0; j < dim; j++) x[j] = rand.NextDouble();
    78         dataset.Add(x);
    79       }
    80       List<int> nnResults;
    81       List<double> nnDists;
    82       Flann.FindNearestNeighbours(dataset, dataset, out nnResults, out nnDists);
    83 
    84     }
     66      List<double[]> functions = new List<double[]>();
     67      List<string> sentences = new List<string>();
     68      List<double> qualities = new List<double>();
     69
     70      foreach (var kvp in hash2Sentences) {
     71        var ls = kvp.Value;
     72        var sentence = FindShortest(ls);
     73        Evaluate(sentence, xs, evalBuf);
     74        if (evalBuf.Any(ei => float.IsInfinity((float)ei) || float.IsNaN((float)ei))) {
     75          Console.WriteLine("skipping {0} {1}", evalBuf.Average(), sentence);
     76        } else {
     77          try {
     78            Scale(evalBuf, ys);
     79            functions.Add((double[])evalBuf.Clone());
     80            sentences.Add(sentence);
     81            HeuristicLab.Problems.DataAnalysis.OnlineCalculatorError error;
     82            qualities.Add(HeuristicLab.Problems.DataAnalysis.OnlinePearsonsRSquaredCalculator.Calculate(evalBuf, ys, out error));
     83          } catch (ArgumentException e) {
     84            // scaling failed
     85          }
     86        }
     87      }
     88
     89      List<int> clusters;
     90      List<double> distances;
     91      Flann.FindClusters(functions, out clusters, out distances, 100);
     92     
     93      // output all clusters and functions
     94      using (var writer = new StreamWriter(new System.IO.Compression.GZipStream(new FileStream(outputFileName, FileMode.OpenOrCreate), System.IO.Compression.CompressionMode.Compress))) {
     95        for (int i = 0; i < functions.Count; i++) {
     96          writer.WriteLine("{0};{1};{2};{3};{4};{5}", clusters[i], distances[i], qualities[i], sentences[i], postfix2infix[sentences[i]], string.Join(";", functions[i].Select(fi => fi.ToString())));
     97        }
     98      }
     99
     100      var funClusters = functions.Zip(clusters, (f, c) => Tuple.Create(f, c)).GroupBy(t => t.Item2);
     101      var dtView = new DataTableView();
     102      dtView.Size = new Size(800, 600);
     103
     104      foreach (var funCluster in funClusters) {
     105        // draw the functions for each cluster into a separate png
     106        var dtName = string.Format("R² {0}", Enumerable.Range(0, qualities.Count).Where(idx => clusters[idx] == funCluster.Key).Select(idx => qualities[idx]).Average());
     107        var dt = new DataTable(dtName, dtName);
     108        var rows = new List<DataRow>();
     109        int i = 0;
     110        foreach (var fun in funCluster.Select(t => t.Item1)) {
     111          var name = i.ToString();
     112          var dr = new DataRow(name, name, fun);
     113          rows.Add(dr);
     114          i++;
     115        }
     116        dt.Rows.AddRange(rows);
     117        dtView.Content = dt;
     118        using (var bm = new Bitmap(800, 600)) {
     119          dtView.DrawToBitmap(bm, new Rectangle(0, 0, 800, 600));
     120          bm.Save(Path.Combine(clusterFolder, string.Format("cluster_{0,3}.png", funCluster.Key)));
     121        }
     122      }
     123    }
     124
     125
    85126
    86127    private static string FindShortest(List<string> ls) {
     
    91132      return minElem;
    92133    }
     134
    93135
    94136
     
    102144      HeuristicLab.Problems.DataAnalysis.OnlineLinearScalingParameterCalculator.Calculate(evalBuf, ys, out alpha, out beta, out error);
    103145      if (error != HeuristicLab.Problems.DataAnalysis.OnlineCalculatorError.None) {
    104         alpha = 0.0;
    105         beta = 1.0;
    106         Console.WriteLine("WARNING: error in scaling");
     146        throw new ArgumentException();
    107147      }
    108148
     
    110150        evalBuf[i] = beta * evalBuf[i] + alpha;
    111151      }
    112 
    113 
    114       //
    115       //   var meanE = 0.0;
    116       //   var meanE2 = 0.0;
    117       //   var meanY = 0.0;
    118       //   var meanY2 = 0.0;
    119       //   for(int i=0;i<evalBuf.Length;i++) {
    120       //     var deltaE = evalBuf[i] - meanE;
    121       //     meanE +=  deltaE / (i+1);
    122       //     var deltaE2 = evalBuf[i] - meanE;
    123       //     meanE2 += deltaE * deltaE2;
    124       //
    125       //
    126       //     var deltaY = ys[i] - meanY;
    127       //     meanY += deltaY / (i + 1);
    128       //     var deltaY2 = ys[i] - meanY;
    129       //     meanY2 += deltaY * deltaY2;
    130       //     TODO COVARIANCE
    131       //     Linear Scaling: b = cov(y,e) / var(e); a = meanY - b*meanE;
    132       //   }
    133       //
    134       //   var varE = meanE2 / evalBuf.Length;
    135       //   var varY = meanY2 / evalBuf.Length;
    136       //   Console.WriteLine("{0} {1} {2} {3}", meanE, evalBuf.Average(), meanY, ys.Average());
    137       //   Console.WriteLine("{0} {1}", varE, varY);
    138       //
    139       //   var factor = varY / varE;
    140       //   for(int i=0;i<evalBuf.Length;i++) {
    141       //     evalBuf[i] = (evalBuf[i] - meanE) * factor + meanY;
    142       //   }
    143152    }
    144153
     
    158167              var b = stack[topOfStack - 1];
    159168              for (int i = 0; i < N; i++) {
    160                 a[i] += b[i];
    161               }
     169                b[i] += a[i];
     170              }
     171              topOfStack--;
    162172              break;
    163173            }
     
    167177              var b = stack[topOfStack - 1];
    168178              for (int i = 0; i < N; i++) {
    169                 a[i] *= b[i];
    170               }
     179                b[i] *= a[i];
     180              }
     181              topOfStack--;
    171182              break;
    172183            }
     
    231242    }
    232243    #endregion
    233 
    234 
    235 
    236     // add the line with it's evaluation result to a data structure for clustering
    237     private static void Add(StreamWriter writer, string line, double[] evalBuf) {
    238       var avg = evalBuf.Average();
    239       if (double.IsNaN(avg) || double.IsInfinity(avg)) {
    240         Console.WriteLine("skipping {0} {1}", evalBuf.Average(), line);
    241       } else {
    242         Scale(evalBuf, ys);
    243 
    244         writer.WriteLine(string.Join("\t", evalBuf.Select(ei => ei.ToString(System.Globalization.CultureInfo.InvariantCulture))));
    245       }
    246     }
    247244  }
    248245}
Note: See TracChangeset for help on using the changeset viewer.