Changeset 15723


Ignore:
Timestamp:
02/06/18 13:18:31 (19 months ago)
Author:
lkammere
Message:

#2886: Add simple data analysis tests and further informations about algorithm run.

Location:
branches/2886_SymRegGrammarEnumeration
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs

    r15722 r15723  
    5959
    6060
    61       #region Production ruless
     61      #region Production rules
     62      // order of production is important, since they are accessed via index
     63      // in memoization.
    6264      StartSymbol = Expr;
    6365
     
    8991    }
    9092
     93    /*
     94    #region Memoize subtrees
     95
     96    public void MemoizeSubtrees(SymbolString sentence) {
     97      Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
     98
     99      // Parse root symbol "+"
     100      MemoizeSubtreeExpression(parseStack);
     101    }
     102
     103    private SymbolString MemoizeSubtreeExpression(Stack<TerminalSymbol> parseStack) {
     104      SymbolString subtree = new SymbolString();
     105
     106      if (ReferenceEquals(parseStack.Peek(), Addition)) {
     107        subtree.Add(parseStack.Pop());
     108        subtree.InsertRange(0, MemoizeSubtreeExpression(parseStack));
     109        subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack));
     110
     111        Expr.Alternatives[0].GeneratedSentences.Add(subtree);
     112      } else {
     113        subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack));
     114
     115        Expr.Alternatives[1].GeneratedSentences.Add(subtree);
     116      }
     117
     118      return subtree;
     119    }
     120
     121    private SymbolString MemoizeSubtreeTerm(Stack<TerminalSymbol> parseStack) {
     122      SymbolString subtree = new SymbolString();
     123
     124      if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
     125        subtree.Add(parseStack.Pop());
     126        subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack));
     127        subtree.InsertRange(0, MemoizeSubtreeFactor(parseStack));
     128
     129        Term.Alternatives[0].GeneratedSentences.Add(subtree);
     130      } else {
     131        subtree.InsertRange(0, MemoizeSubtreeFactor(parseStack));
     132
     133        Term.Alternatives[1].GeneratedSentences.Add(subtree);
     134      }
     135
     136      return subtree;
     137    }
     138
     139    private SymbolString MemoizeSubtreeFactor(Stack<TerminalSymbol> parseStack) {
     140      SymbolString subtree = new SymbolString(MemoizeSubtreeVar(parseStack));
     141
     142      Factor.Alternatives[0].GeneratedSentences.Add(subtree);
     143      return subtree;
     144    }
     145
     146    private SymbolString MemoizeSubtreeVar(Stack<TerminalSymbol> parseStack) {
     147      SymbolString subtree = new SymbolString(parseStack.Pop().ToEnumerable());
     148
     149      // ... not really
     150      //Var.Alternatives[0].GeneratedSentences.Add(subtree);
     151      return subtree;
     152    }
     153
     154
     155    #endregion
     156    */
     157   
     158    #region Hashing
    91159    public int CalcHashCode(SymbolString sentence) {
    92160      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
     
    99167    }
    100168
    101     #region Hashing
    102169    private int[] GetSubtreeHashes(TerminalSymbol currentSymbol, Stack<TerminalSymbol> parseStack) {
    103170      List<int> childHashes = null;
     
    166233      return childHashes.ToArray();
    167234    }
    168 
    169235
    170236    private int AggregateHashes(IEnumerable<int> hashes) {
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs

    r15722 r15723  
    1 using System;
    2 using System.Collections.Generic;
    3 using System.Collections.ObjectModel;
    4 using System.Diagnostics;
     1using System.Collections.Generic;
    52using System.Linq;
    63using System.Threading;
     
    2320  public class GrammarEnumerationAlgorithm : FixedDataAnalysisAlgorithm<IRegressionProblem> {
    2421    private readonly string BestTrainingSolution = "Best solution (training)";
     22    private readonly string BestTrainingSolutionString = "Best solution string (training)";
    2523    private readonly string BestTrainingSolutionQuality = "Best solution quality (training)";
    2624    private readonly string BestTestSolution = "Best solution (test)";
     25    private readonly string BestTestSolutionString = "Best solution string (test)";
    2726    private readonly string BestTestSolutionQuality = "Best solution quality (test)";
    2827
    2928    private readonly string MaxTreeSizeParameterName = "Max. Tree Nodes";
    3029    private readonly string GuiUpdateIntervalParameterName = "GUI Update Interval";
     30    private readonly string UseMemoizationParameterName = "Use Memoization?";
    3131
    3232
    3333    #region properties
    34     public IValueParameter<IntValue> MaxTreeSizeParameter {
     34    protected IValueParameter<IntValue> MaxTreeSizeParameter {
    3535      get { return (IValueParameter<IntValue>)Parameters[MaxTreeSizeParameterName]; }
    3636    }
    37 
    3837    public int MaxTreeSize {
    3938      get { return MaxTreeSizeParameter.Value.Value; }
     39      set { MaxTreeSizeParameter.Value.Value = value; }
    4040    }
    4141
    42     public IValueParameter<IntValue> GuiUpdateIntervalParameter {
    43       get { return (IValueParameter<IntValue>)Parameters[MaxTreeSizeParameterName]; }
     42    protected IValueParameter<IntValue> GuiUpdateIntervalParameter {
     43      get { return (IValueParameter<IntValue>)Parameters[GuiUpdateIntervalParameterName]; }
     44    }
     45    public int GuiUpdateInterval {
     46      get { return GuiUpdateIntervalParameter.Value.Value; }
     47      set { GuiUpdateIntervalParameter.Value.Value = value; }
    4448    }
    4549
    46     public int GuiUpdateInterval {
    47       get { return GuiUpdateIntervalParameter.Value.Value; }
     50    protected IValueParameter<BoolValue> UseMemoizationParameter {
     51      get { return (IValueParameter<BoolValue>)Parameters[UseMemoizationParameterName]; }
     52    }
     53    public bool UseMemoization {
     54      get { return UseMemoizationParameter.Value.Value; }
     55      set { UseMemoizationParameter.Value.Value = value; }
    4856    }
    4957
     
    5967
    6068    public GrammarEnumerationAlgorithm() {
    61       Problem = new RegressionProblem();
    6269
    63       Parameters.Add(new ValueParameter<IntValue>(MaxTreeSizeParameterName, "The number of clusters.", new IntValue(4)));
     70      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234);
     71      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
     72
     73      Problem = new RegressionProblem() {
     74        ProblemData = regProblem
     75      };
     76
     77      Parameters.Add(new ValueParameter<IntValue>(MaxTreeSizeParameterName, "The number of clusters.", new IntValue(6)));
    6478      Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(4000)));
     79      Parameters.Add(new ValueParameter<BoolValue>(UseMemoizationParameterName, "Should already subtrees be reused within a run.", new BoolValue(true)));
    6580    }
    6681
     
    7287      List<SymbolString> allGenerated = new List<SymbolString>();
    7388      List<SymbolString> distinctGenerated = new List<SymbolString>();
     89
     90      int expansions = 0;
     91
    7492      HashSet<int> evaluatedHashes = new HashSet<int>();
    7593
     
    87105          allGenerated.Add(currSymbolString);
    88106
    89           if (evaluatedHashes.Add(grammar.CalcHashCode(currSymbolString))) {
    90             EvaluateSentence(currSymbolString);
    91             distinctGenerated.Add(currSymbolString);
    92           }
     107          //if (evaluatedHashes.Add(grammar.CalcHashCode(currSymbolString))) {
     108          EvaluateSentence(currSymbolString);
     109          //distinctGenerated.Add(currSymbolString);
     110          //}
    93111
    94           UpdateView(allGenerated, distinctGenerated);
     112          UpdateView(allGenerated, distinctGenerated, expansions);
    95113
    96114        } else {
     
    111129      }
    112130
     131      UpdateView(allGenerated, distinctGenerated, expansions, force: true);
     132
    113133      StringArray sentences = new StringArray(allGenerated.Select(r => r.ToString()).ToArray());
    114134      Results.Add(new Result("All generated sentences", sentences));
     
    118138
    119139
    120     private void UpdateView(List<SymbolString> allGenerated, List<SymbolString> distinctGenerated) {
     140    private void UpdateView(List<SymbolString> allGenerated, List<SymbolString> distinctGenerated, int expansions, bool force = false) {
    121141      int generatedSolutions = allGenerated.Count;
    122142      int distinctSolutions = distinctGenerated.Count;
    123143
    124       if (generatedSolutions % GuiUpdateInterval == 0) {
     144      if (force || generatedSolutions % GuiUpdateInterval == 0) {
    125145        Results.AddOrUpdateResult("Generated Solutions", new IntValue(generatedSolutions));
    126         Results.Add(new Result("Distinct Solutions", new IntValue(distinctSolutions)));
     146        Results.AddOrUpdateResult("Distinct Solutions", new IntValue(distinctSolutions));
    127147
    128148        DoubleValue averageTreeLength = new DoubleValue(allGenerated.Select(r => r.Count).Average());
    129         Results.Add(new Result("Average Tree Length of Solutions", averageTreeLength));
     149        Results.AddOrUpdateResult("Average Tree Length of Solutions", averageTreeLength);
     150
     151        IntValue expansionsValue = new IntValue(expansions);
     152        Results.AddOrUpdateResult("Expansions", expansionsValue);
    130153      }
    131154    }
     
    146169
    147170        Results.Add(new Result(BestTrainingSolution, newSolution));
     171        Results.Add(new Result(BestTrainingSolutionString, new StringValue(symbolString.ToString()).AsReadOnly()));
    148172        Results.Add(new Result(BestTrainingSolutionQuality, new DoubleValue(newSolution.TrainingRSquared).AsReadOnly()));
    149173        Results.Add(new Result(BestTestSolution, newSolution));
     174        Results.Add(new Result(BestTestSolutionString, new StringValue(symbolString.ToString()).AsReadOnly()));
    150175        Results.Add(new Result(BestTestSolutionQuality, new DoubleValue(newSolution.TestRSquared).AsReadOnly()));
    151176
     
    154179        if (currBestTrainingSolution.TrainingRSquared < newSolution.TrainingRSquared) {
    155180          currBestTrainingSolutionResult.Value = newSolution;
     181          Results.AddOrUpdateResult(BestTrainingSolutionString, new StringValue(symbolString.ToString()));
    156182          Results.AddOrUpdateResult(BestTrainingSolutionQuality, new DoubleValue(newSolution.TrainingRSquared).AsReadOnly());
    157183        }
     
    160186        if (currBestTestSolution.TestRSquared < newSolution.TestRSquared) {
    161187          currBestTestSolutionResult.Value = newSolution;
     188          Results.AddOrUpdateResult(BestTestSolutionString, new StringValue(symbolString.ToString()));
    162189          Results.AddOrUpdateResult(BestTestSolutionQuality, new DoubleValue(newSolution.TestRSquared).AsReadOnly());
    163190        }
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Sentence.cs

    r15712 r15723  
    77namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration {
    88  public class SymbolString : List<Symbol> {
     9
     10    public SymbolString() { }
    911
    1012    public SymbolString(IEnumerable<Symbol> symbols) : base(symbols) { }
     
    2022    }
    2123
    22     public override string ToString()
    23     {
     24    public override string ToString() {
    2425      return string.Join(" ", this);
    2526    }
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Symbol.cs

    r15714 r15723  
    11using System.Collections.Generic;
    22using System.Linq;
     3using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
    34
    45namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
  • branches/2886_SymRegGrammarEnumeration/Test/GrammarEnumerationTest.cs

    r15714 r15723  
    11using System;
    2 using System.Collections.Generic;
    32using System.Linq;
    43using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration;
     
    109
    1110namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
    12   //[TestClass()]
     11  [TestClass()]
    1312  public class MctsSymbolicRegressionTest {
    1413    private const int Seed = 1234;
    1514    private IRandom rand;
    1615
    17     private const double SuccessThreshold = 0.99999;
     16    private const double SuccessThreshold = 0.9999999;
     17
     18    private GrammarEnumerationAlgorithm alg;
     19    private RegressionProblem problem;
    1820
    1921    [TestInitialize]
     
    2123      Console.Write("init called... ");
    2224      rand = new FastRandom(Seed);
    23     }
     25
     26      alg = new GrammarEnumerationAlgorithm();
     27      problem = new RegressionProblem();
     28      alg.Problem = problem;
     29      alg.GuiUpdateInterval = int.MaxValue;
     30    }
     31
     32
     33    private void TestGrammarEnumeration(IRegressionProblemData problemData) {
     34      alg.Problem.ProblemData = problemData;
     35
     36      // Run
     37      alg.Start();
     38
     39      // Evaluate results
     40      var eps = 1.0 - SuccessThreshold;
     41
     42      // Check if algorithm terminated correctly
     43      Assert.IsTrue(alg.Results.ContainsKey("Best solution quality (training)"), "No training solution returned!");
     44      Assert.IsTrue(alg.Results.ContainsKey("Best solution quality (test)"), "No test solution returned!");
     45
     46      // Check resultss
     47      Assert.AreEqual(1.0, ((DoubleValue)alg.Results["Best solution quality (training)"].Value).Value, eps, "Training quality too low!");
     48      Assert.AreEqual(1.0, ((DoubleValue)alg.Results["Best solution quality (test)"].Value).Value, eps, "Test quality too low!");
     49
     50      // Check overfitting
     51      Assert.AreEqual(alg.Results["Best solution string (training)"].Value.ToString(),
     52                      alg.Results["Best solution string (test)"].Value.ToString());
     53    }
     54
    2455
    2556    #region test structure search (no constants)
    2657    [TestMethod]
    27     [TestCategory("Algorithms.DataAnalysis")]
    28     [TestProperty("Time", "short")]
     58    [TestProperty("Goal", "structure search")]
    2959    public void MctsSymbReg_NoConstants_Nguyen1() {
    3060      // x³ + x² + x
     61      alg.MaxTreeSize = 15;
    3162      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
    3263      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F1 ")));
    3364      TestGrammarEnumeration(regProblem);
    34     }
    35     [TestMethod]
    36     [TestCategory("Algorithms.DataAnalysis")]
    37     [TestProperty("Time", "short")]
     65
     66      Console.WriteLine("Nguyen1: " + alg.Results["Best solution string (training)"].Value);
     67    }
     68
     69    [TestMethod]
     70    [TestProperty("Goal", "structure search")]
    3871    public void MctsSymbReg_NoConstants_Nguyen2() {
    3972      // x^4 + x³ + x² + x
     73      alg.MaxTreeSize = 20;
    4074      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
    4175      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F2 ")));
    4276      TestGrammarEnumeration(regProblem);
    43     }
    44     [TestMethod]
    45     [TestCategory("Algorithms.DataAnalysis")]
    46     [TestProperty("Time", "short")]
     77
     78      Console.WriteLine("Nguyen2: " + alg.Results["Best solution string (training)"].Value);
     79    }
     80
     81    /*  NEXT UP
     82    [TestMethod]
     83    [TestProperty("Goal", "structure search")]
    4784    public void MctsSymbReg_NoConstants_Nguyen3() {
    4885      // x^5 + x^4 + x³ + x² + x
     86      alg.MaxTreeSize = 25;
    4987      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
    5088      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F3 ")));
     
    5290    }
    5391    [TestMethod]
    54     [TestCategory("Algorithms.DataAnalysis")]
    55     [TestProperty("Time", "short")]
     92    [TestProperty("Goal", "structure search")]
    5693    public void MctsSymbReg_NoConstants_Nguyen4() {
     94
    5795      // x^6 + x^5 + x^4 + x³ + x² + x
     96      alg.MaxTreeSize = 30;
    5897      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
    5998      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F4 ")));
    6099      TestGrammarEnumeration(regProblem);
    61     }
    62 
     100    } */
     101
     102
     103    #endregion
     104
     105    #region TODO
     106
     107#if false
    63108    [TestMethod]
    64109    [TestCategory("Algorithms.DataAnalysis")]
     
    357402      TestGrammarEnumeration(problemData);
    358403    }
    359     #endregion
    360404
    361405    #region restricted structure but including numeric constants
     
    557601    #endregion
    558602
    559     private void TestGrammarEnumeration(IRegressionProblemData problemData)
    560     {
    561       // Configure algorithm and problem
    562       var alg = new GrammarEnumerationAlgorithm();
    563       var problem = new RegressionProblem();
    564       alg.Problem = problem;
    565       problem.ProblemData = problemData;
    566      
    567       // Run
    568       alg.Start();
    569 
    570       // Evaluate results
    571       var eps = 1.0 - SuccessThreshold;
    572 
    573       // Check if algorithm terminated correctly
    574       Assert.IsTrue(alg.Results.ContainsKey("Best solution quality (training)"), "No training solution returned!");
    575       Assert.IsTrue(alg.Results.ContainsKey("Best solution quality (test)"), "No test solution returned!");
    576 
    577       // Check resultss
    578       Assert.AreEqual(1.0, ((DoubleValue)alg.Results["Best solution quality (training)"].Value).Value, eps, "Training quality too low!");
    579       Assert.AreEqual(1.0, ((DoubleValue)alg.Results["Best solution quality (test)"].Value).Value, eps, "Test quality too low!");
    580     }
     603#endif
     604    #endregion
    581605  }
    582606}
Note: See TracChangeset for help on using the changeset viewer.