Changeset 15903


Ignore:
Timestamp:
04/13/18 16:48:56 (4 years ago)
Author:
gkronber
Message:

#2886 worked on cluster analysis / visualization for GPTP

Location:
branches/2886_SymRegGrammarEnumeration
Files:
3 added
2 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/ExpressionClustering/ExpressionClustering.csproj

    r15842 r15903  
    5656      <HintPath>..\..\..\trunk\bin\HeuristicLab.Core.Views-3.3.dll</HintPath>
    5757    </Reference>
     58    <Reference Include="HeuristicLab.Data-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec" />
     59    <Reference Include="HeuristicLab.Encodings.SymbolicExpressionTreeEncoding-3.4">
     60      <HintPath>..\..\..\trunk\bin\HeuristicLab.Encodings.SymbolicExpressionTreeEncoding-3.4.dll</HintPath>
     61    </Reference>
    5862    <Reference Include="HeuristicLab.MainForm-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    5963      <SpecificVersion>False</SpecificVersion>
     
    6670    <Reference Include="HeuristicLab.Problems.DataAnalysis-3.4">
    6771      <HintPath>..\..\..\trunk\bin\HeuristicLab.Problems.DataAnalysis-3.4.dll</HintPath>
     72    </Reference>
     73    <Reference Include="HeuristicLab.Problems.DataAnalysis.Symbolic-3.4">
     74      <HintPath>..\..\..\trunk\bin\HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.dll</HintPath>
    6875    </Reference>
    6976    <Reference Include="HeuristicLab.Visualization.ChartControlsExtensions-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
  • branches/2886_SymRegGrammarEnumeration/ExpressionClustering/Program.cs

    r15842 r15903  
    11using System;
     2using System.Collections;
    23using System.Collections.Generic;
    34using System.Drawing;
     
    67using HeuristicLab.Analysis;
    78using HeuristicLab.Analysis.Views;
    8 using System.Windows.Forms;
    9 
     9using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
     10using HeuristicLab.Problems.DataAnalysis;
     11using HeuristicLab.Problems.DataAnalysis.Symbolic;
     12
     13// Evaluates sentences on randomly generated data
    1014namespace ExpressionClustering {
    1115  class Program {
    1216    private static readonly string folder = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);
    1317    private static readonly string clusterFolder = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "clusters");
    14     private static readonly string distinctSentencesFileName = Path.Combine(folder, @"distinctSentences.csv");
    15     private static readonly string allSentencesFileName = Path.Combine(folder, "allSentences.csv");
    16     private static readonly string outputFileName = Path.Combine(folder, "evaluations.csv.gz");
     18    private static readonly string distinctSentencesFileName = Path.Combine(folder, @"distinctSentences_2018-04-13_09-52_TreeSize-10.csv");
     19    private static readonly string allSentencesFileName = Path.Combine(folder, "allSentences_2018-04-13_09-52_TreeSize-10.csv");
     20    private static readonly string outputFileName = Path.Combine(folder, "evaluations_2018-04-13_09-52_TreeSize-10.csv.gz");
    1721    private static int N = 100;
    1822    private static double[] evalBuf = new double[N];
     
    2226    private static double max = +5.0;
    2327    private static double[] xs = Enumerable.Range(1, N).Select(xi => ((double)xi / N) * (max - min) + min).ToArray(); // input
    24     private static double[] ys = xs.Select(xi => 1.0 / (1 + Math.Pow(xi, -4))).ToArray(); // target (necessary for scaling and clustering
     28
     29    private static double[] ys_pagie = xs.Select(xi => 1.0 / (1 + Math.Pow(xi, -4))).ToArray(); // a potential target (not used for search)
     30
     31    // x³  * exp(-x) * cos(x) * sin(x) * (sin(x)² * cos(x) - 1)
     32    // for keijzer x should be in scale 0 - 10 inclusive
     33    private static double[] ys_keijzer4 = xs
     34      .Select(xi => xi + 10.0) // scale
     35      .Select(xi => xi * xi * xi + Math.Exp(-xi) * Math.Cos(xi) * Math.Sin(xi) * (Math.Sin(xi) * Math.Sin(xi) * Math.Cos(xi) - 1))
     36      .ToArray();
    2537
    2638
     
    3547    static void Main(string[] args) {
    3648
    37       var hash2Sentences = new Dictionary<string, List<string>>();
    38       // for debugging only
     49      var hash2Postfix = new Dictionary<string, List<string>>();
    3950      var postfix2infix = new Dictionary<string, string>();
    40 
    41 
    4251
    4352      // read all sentences and determine shortest sentences
     
    4958          var line = reader.ReadLine();
    5059          var toks = line.Split(';');
    51           var hash = toks[2];
    52           List<string> ls;
    53           if (!hash2Sentences.TryGetValue(hash, out ls)) {
    54             ls = new List<string>(1);
    55             hash2Sentences.Add(hash, ls);
     60          var hash = toks[0];
     61          var length = toks[1];
     62          var postfix = toks[2];
     63          var infix = toks[3];
     64          List<string> alternativesList;
     65          if (!hash2Postfix.TryGetValue(hash, out alternativesList)) {
     66            alternativesList = new List<string>(1);
     67            hash2Postfix.Add(hash, alternativesList);
    5668          }
    57           ls.Add(toks[1]);
    58           postfix2infix.Add(toks[1], toks[0]);
     69          alternativesList.Add(postfix);
     70          postfix2infix.Add(postfix, infix);
    5971          nSentences++;
    6072        }
    6173
    62         Console.WriteLine("{0} {1}", nSentences, hash2Sentences.Count);
     74        Console.WriteLine("{0} {1}", nSentences, hash2Postfix.Count);
    6375        //Evaluate(toks[1], xs, evalBuf);
    6476      }
     
    6678      List<double[]> functions = new List<double[]>();
    6779      List<string> sentences = new List<string>();
    68       List<double> qualities = new List<double>();
    69 
    70       foreach (var kvp in hash2Sentences) {
     80      List<double[]> qualities = new List<double[]>(); // we might have multiple target functions to which we might compare
     81
     82      var ds = new Dataset(new string[] { "X" }, new IList[] { xs });
     83      foreach (var kvp in hash2Postfix) {
    7184        var ls = kvp.Value;
    7285        var sentence = FindShortest(ls);
    73         Evaluate(sentence, xs, evalBuf);
    74         if (evalBuf.Any(ei => float.IsInfinity((float)ei) || float.IsNaN((float)ei))) {
     86        //EvaluatePostfix(sentence, xs, evalBuf);
     87        evalBuf = EvaluateInfix(postfix2infix[sentence], ds).ToArray();
     88        if (evalBuf.Any(ei => double.IsInfinity(ei) || double.IsNaN(ei))) {
    7589          Console.WriteLine("skipping {0} {1}", evalBuf.Average(), sentence);
    7690        } else {
    7791          try {
    78             Scale(evalBuf, ys);
     92            Scale(evalBuf);
    7993            functions.Add((double[])evalBuf.Clone());
    8094            sentences.Add(sentence);
    81             HeuristicLab.Problems.DataAnalysis.OnlineCalculatorError error;
    82             qualities.Add(HeuristicLab.Problems.DataAnalysis.OnlinePearsonsRSquaredCalculator.Calculate(evalBuf, ys, out error));
     95            OnlineCalculatorError error;
     96            var r2_pagie = OnlinePearsonsRSquaredCalculator.Calculate(evalBuf, ys_pagie, out error);
     97            if (error != OnlineCalculatorError.None) r2_pagie = 0.0;
     98            var r2_keijzer4 = OnlinePearsonsRSquaredCalculator.Calculate(evalBuf, ys_keijzer4, out error);
     99            if (error != OnlineCalculatorError.None) r2_keijzer4 = 0.0;
     100            qualities.Add(new double[] { r2_pagie, r2_keijzer4});
    83101          } catch (ArgumentException e) {
    84102            // scaling failed
     
    87105      }
    88106
     107
    89108      List<int> clusters;
    90109      List<double> distances;
    91       Flann.FindClusters(functions, out clusters, out distances, 100);
    92      
     110      // DEACTIVATED FOR NOW -> USE LARGEVIS in R instead
     111      // Flann.FindClusters(functions, out clusters, out distances, 100);
     112      clusters = functions.Select(_ => 0).ToList();
     113      distances = functions.Select(_ => 0.0).ToList();
     114      //
    93115      // output all clusters and functions
    94116      using (var writer = new StreamWriter(new System.IO.Compression.GZipStream(new FileStream(outputFileName, FileMode.OpenOrCreate), System.IO.Compression.CompressionMode.Compress))) {
    95117        for (int i = 0; i < functions.Count; i++) {
    96           writer.WriteLine("{0};{1};{2};{3};{4};{5}", clusters[i], distances[i], qualities[i], sentences[i], postfix2infix[sentences[i]], string.Join(";", functions[i].Select(fi => fi.ToString())));
     118          writer.WriteLine("{0};{1};{2};{3};{4};{5}", clusters[i], distances[i], string.Join(";", qualities[i]), sentences[i], postfix2infix[sentences[i]], string.Join(";", functions[i].Select(fi => fi.ToString())));
    97119        }
    98120      }
    99 
    100       var funClusters = functions.Zip(clusters, (f, c) => Tuple.Create(f, c)).GroupBy(t => t.Item2);
    101       var dtView = new DataTableView();
    102       dtView.Size = new Size(800, 600);
    103 
    104       foreach (var funCluster in funClusters) {
    105         // draw the functions for each cluster into a separate png
    106         var dtName = string.Format("R² {0}", Enumerable.Range(0, qualities.Count).Where(idx => clusters[idx] == funCluster.Key).Select(idx => qualities[idx]).Average());
    107         var dt = new DataTable(dtName, dtName);
    108         var rows = new List<DataRow>();
    109         int i = 0;
    110         foreach (var fun in funCluster.Select(t => t.Item1)) {
    111           var name = i.ToString();
    112           var dr = new DataRow(name, name, fun);
    113           rows.Add(dr);
    114           i++;
    115         }
    116         dt.Rows.AddRange(rows);
    117         dtView.Content = dt;
    118         using (var bm = new Bitmap(800, 600)) {
    119           dtView.DrawToBitmap(bm, new Rectangle(0, 0, 800, 600));
    120           bm.Save(Path.Combine(clusterFolder, string.Format("cluster_{0,3}.png", funCluster.Key)));
    121         }
    122       }
     121      //
     122      // var funClusters = functions.Zip(clusters, (f, c) => Tuple.Create(f, c)).GroupBy(t => t.Item2);
     123      // var dtView = new DataTableView();
     124      // dtView.Size = new Size(800, 600);
     125      //
     126      // foreach (var funCluster in funClusters) {
     127      //   // draw the functions for each cluster into a separate png
     128      //   // var dtName = string.Format("R² {0}", Enumerable.Range(0, qualities.Count).Where(idx => clusters[idx] == funCluster.Key).Select(idx => qualities[idx]).Average());
     129      //   var dtName = "Cluster";
     130      //   var dt = new DataTable(dtName, dtName);
     131      //   var rows = new List<DataRow>();
     132      //   int i = 0;
     133      //   foreach (var fun in funCluster.Select(t => t.Item1)) {
     134      //     var name = i.ToString();
     135      //     var dr = new DataRow(name, name, fun);
     136      //     rows.Add(dr);
     137      //     i++;
     138      //   }
     139      //   dt.Rows.AddRange(rows);
     140      //   dtView.Content = dt;
     141      //   using (var bm = new Bitmap(800, 600)) {
     142      //     dtView.DrawToBitmap(bm, new Rectangle(0, 0, 800, 600));
     143      //     bm.Save(Path.Combine(clusterFolder, string.Format("cluster_{0,3}.png", funCluster.Key)));
     144      //   }
     145      // }
    123146    }
    124147
     
    136159
    137160    #region evaluation
    138     // linear scaling
     161
     162    // scaling to zero-mean unit variance
     163    private static void Scale(double[] evalBuf) {
     164      double mean;
     165      double variance;
     166      var max = evalBuf.Max();
     167      for (int i = 0; i < evalBuf.Length; i++) {
     168        evalBuf[i] /= max;
     169      }
     170
     171      OnlineCalculatorError error, varError;
     172      OnlineMeanAndVarianceCalculator.Calculate(evalBuf, out mean, out variance, out error, out varError);
     173      if(error!=OnlineCalculatorError.None || varError != OnlineCalculatorError.None) {
     174        throw new ArgumentException("Cannot scale vector");
     175      }
     176
     177      for (int i = 0; i < evalBuf.Length; i++) {
     178        evalBuf[i] = 1.0 / variance * evalBuf[i] + mean;
     179      }
     180    }
     181
     182    // linear scaling to match target
    139183    private static void Scale(double[] evalBuf, double[] ys) {
    140184      double alpha;
     
    152196    }
    153197
     198    // evaluates infix expressions (using the infix parser)
     199    private static IEnumerable<double> EvaluateInfix(string infixExpr, Dataset ds) {
     200      var parser = new HeuristicLab.Problems.DataAnalysis.Symbolic.InfixExpressionParser();
     201      var tree = parser.Parse(infixExpr);
     202      var interpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter();
     203      return interpreter.GetSymbolicExpressionTreeValues(tree, ds, Enumerable.Range(0, ds.Rows));
     204    }                                                 
     205
     206    /*
    154207    // evaluates postfix expressions (only for a very specific format)
    155     private static void Evaluate(string postfixExpr, double[] xs, double[] evalBuf) {
     208    private static void EvaluatePostfix(string postfixExpr, double[] xs, double[] evalBuf) {
    156209      int topOfStack = -1;
    157210      Evaluate(postfixExpr, 0, xs, ref topOfStack);
     
    159212    }
    160213
     214   
    161215    private static void Evaluate(string postfixExpr, int exprPos, double[] xs, ref int topOfStack) {
    162216      while (exprPos < postfixExpr.Length) {
     
    189243            }
    190244          case 'c': {
    191               // cos
    192               exprPos += 4;
    193               var a = stack[topOfStack];
    194               for (int i = 0; i < N; i++) {
    195                 a[i] = Math.Cos(a[i]);
    196               }
    197               break;
     245              if (postfixExpr[exprPos + 1] == 'o') {
     246                // cos
     247                exprPos += 4;
     248                var a = stack[topOfStack];
     249                for (int i = 0; i < N; i++) {
     250                  a[i] = Math.Cos(a[i]);
     251                }
     252                break;
     253              } else {
     254                exprPos += 2;
     255                // put 1 onto top of stack     // BUG!
     256                topOfStack++;
     257                var a = stack[topOfStack];
     258                for (int i = 0; i < N; i++) a[i] = 1.0;
     259                break;
     260              }
    198261            }
    199262          case 's': {
     
    241304      }
    242305    }
     306    */
    243307    #endregion
    244308  }
Note: See TracChangeset for help on using the changeset viewer.