Changeset 15883


Ignore:
Timestamp:
04/04/18 16:23:55 (3 years ago)
Author:
lkammere
Message:

#2886: Priorize phrases whose (fully expanded) terms result in high R².

Location:
branches/2886_SymRegGrammarEnumeration
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Analysis/RSquaredEvaluator.cs

    r15861 r15883  
    1313namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
    1414  public class RSquaredEvaluator : Item, IGrammarEnumerationAnalyzer {
    15     private readonly string BestTrainingQualityResultName = "Best R² (Training)";
    16     private readonly string BestTrainingModelResultName = "Best model (Training)";
    17     private readonly string BestTrainingSolutionResultName = "Best solution (Training)";
     15    public static readonly string BestTrainingQualityResultName = "Best R² (Training)";
     16    public static readonly string BestTrainingModelResultName = "Best model (Training)";
     17    public static readonly string BestTrainingSolutionResultName = "Best solution (Training)";
    1818
    19     private readonly ISymbolicDataAnalysisExpressionTreeInterpreter expressionTreeLinearInterpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter();
     19    private static readonly ISymbolicDataAnalysisExpressionTreeInterpreter expressionTreeLinearInterpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter();
    2020
    2121    public bool OptimizeConstants { get; set; }
     
    7373      Debug.Assert(SymbolicRegressionConstantOptimizationEvaluator.CanOptimizeConstants(tree));
    7474
    75       // TODO: Initialize constant values randomly
    76       // TODO: Restarts
     75      double r2 = Evaluate(problemData, tree, OptimizeConstants);
    7776
    78       double r2;
     77      var bestR2Result = (DoubleValue)algorithm.Results[BestTrainingQualityResultName].Value;
     78      bool better = r2 > bestR2Result.Value;
     79      bool equallyGood = r2.IsAlmost(bestR2Result.Value);
     80      bool shorter = false;
    7981
    80       SymbolicRegressionModel model = new SymbolicRegressionModel(
     82      if (!better && equallyGood) {
     83        shorter = algorithm.BestTrainingSentence != null &&
     84          algorithm.Grammar.GetComplexity(algorithm.BestTrainingSentence) > algorithm.Grammar.GetComplexity(symbolString);
     85      }
     86      if (better || (equallyGood && shorter)) {
     87        bestR2Result.Value = r2;
     88
     89        SymbolicRegressionModel model = new SymbolicRegressionModel(
    8190          problemData.TargetVariable,
    8291          tree,
    8392          expressionTreeLinearInterpreter);
    8493
    85       if (OptimizeConstants) {
     94        algorithm.Results.AddOrUpdateResult(BestTrainingModelResultName, model);
     95
     96        algorithm.BestTrainingSentence = symbolString;
     97      }
     98    }
     99
     100    public static double Evaluate(IRegressionProblemData problemData, SymbolicExpressionTree tree, bool optimizeConstants = true) {
     101      double r2;
     102
     103      // TODO: Initialize constant values randomly
     104      // TODO: Restarts
     105      if (optimizeConstants) {
    86106        r2 = SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants(expressionTreeLinearInterpreter,
    87107          tree,
     
    100120        }
    101121
    102 
    103122      } else {
    104123        var target = problemData.TargetVariableTrainingValues;
     124
     125        SymbolicRegressionModel model = new SymbolicRegressionModel(
     126          problemData.TargetVariable,
     127          tree,
     128          expressionTreeLinearInterpreter);
     129
    105130        var estVals = model.GetEstimatedValues(problemData.Dataset, problemData.TrainingIndices);
    106131        OnlineCalculatorError error;
     
    109134      }
    110135
    111       var bestR2Result = (DoubleValue)algorithm.Results[BestTrainingQualityResultName].Value;
    112       bool better = r2 > bestR2Result.Value;
    113       bool equallyGood = r2.IsAlmost(bestR2Result.Value);
    114       bool shorter = false;
    115 
    116       if (!better && equallyGood) {
    117         shorter = algorithm.BestTrainingSentence != null &&
    118           algorithm.Grammar.GetComplexity(algorithm.BestTrainingSentence) > algorithm.Grammar.GetComplexity(symbolString);
    119       }
    120       if (better || (equallyGood && shorter)) {
    121         bestR2Result.Value = r2;
    122         algorithm.Results.AddOrUpdateResult(BestTrainingModelResultName, model);
    123 
    124         algorithm.BestTrainingSentence = symbolString;
    125       }
     136      return r2;
    126137    }
    127138  }
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs

    r15861 r15883  
    44using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
    55using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
     6using HeuristicLab.Problems.DataAnalysis;
    67using HeuristicLab.Problems.DataAnalysis.Symbolic;
    78
     
    174175    }
    175176
     177    public double EvaluatePhrase(SymbolString s, IRegressionProblemData problemData, bool optimizeConstants) {
     178      // Create sentences without Expression symbols.
     179      Symbol[] sEval = new Symbol[s.Count()];
     180
     181      for (int i = 0; i < sEval.Length; i++) {
     182        Symbol currSym = s[i];
     183        if (currSym is NonterminalSymbol && currSym != Expr)
     184          return 0.0;
     185
     186        if (currSym == Expr) {
     187          sEval[i] = Const;
     188        } else {
     189          sEval[i] = currSym;
     190        }
     191      }
     192
     193      SymbolicExpressionTree tree = ParseSymbolicExpressionTree(new SymbolString(sEval));
     194      double r2 = RSquaredEvaluator.Evaluate(problemData, tree, optimizeConstants);
     195
     196      return r2;
     197    }
     198
    176199    #region Parse to SymbolicExpressionTree
    177200
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs

    r15861 r15883  
    153153      #endregion
    154154
    155       OpenPhrases.Store(phrase0Hash, phrase0);
     155      OpenPhrases.Store(phrase0Hash, 0.0, phrase0);
    156156      while (OpenPhrases.Count > 0) {
    157157        if (cancellationToken.IsCancellationRequested) break;
     
    185185              OnSentenceGenerated(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, appliedProductions[i]);
    186186
     187              // Is the best solution found? (only if RSquaredEvaluator is activated)
     188              if (Results.ContainsKey(RSquaredEvaluator.BestTrainingQualityResultName) && ((DoubleValue)Results[RSquaredEvaluator.BestTrainingQualityResultName].Value).Value == 1.0) {
     189                UpdateView(force: true);
     190                return;
     191              }
     192
    187193              if (!DistinctSentencesComplexity.ContainsKey(phraseHash) || DistinctSentencesComplexity[phraseHash] > newPhraseComplexity) {
    188194                if (DistinctSentencesComplexity.ContainsKey(phraseHash)) OverwrittenSentencesCount++; // for analysis only
     
    194200
    195201            } else if (!OpenPhrases.Contains(phraseHash) && !ArchivedPhrases.Contains(phraseHash)) {
    196               OpenPhrases.Store(phraseHash, newPhrase);
     202              double phrasePriority = GetPriority(newPhrase);
     203              OpenPhrases.Store(phraseHash, phrasePriority, newPhrase);
    197204            }
    198205          }
     
    200207      }
    201208      UpdateView(force: true);
     209    }
     210
     211    protected double GetPriority(SymbolString phrase) {
     212      return 1.0 - Grammar.EvaluatePhrase(phrase, Problem.ProblemData, OptimizeConstants);
    202213    }
    203214
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/SearchDataStructure.cs

    r15828 r15883  
    1616
    1717  public enum StorageType {
    18     Stack, Queue, RandomList
     18    PriorityQueue, Stack, Queue, RandomList
    1919  }
    2020
     
    2222
    2323    private Dictionary<int, SymbolString> storedValues; // Store hash-references and associated, actual values
    24     private Action<int> storeInternal; // Store hash-references
     24    private Action<int, double> storeInternal; // Store hash-references
    2525    private Func<int> fetchInternal; // Fetch hash-reference
    2626
     
    2929
    3030      switch (storageType) {
     31        case StorageType.PriorityQueue:
     32          InitPriorityQueue();
     33          break;
    3134        case StorageType.Stack:
    3235          InitStack();
     
    4447    #region SearchStrategies
    4548
     49    private void InitPriorityQueue() {
     50      PriorityQueue<double, int> queue = new PriorityQueue<double, int>(double.MaxValue, double.MinValue, (int)Math.Pow(2.0, 20.0) / 4);
     51      storeInternal = (hash, prio) => queue.Insert(prio, hash);
     52      fetchInternal = () => {
     53        int ret = queue.PeekMinValue();
     54        queue.RemoveMin();
     55        return ret;
     56      };
     57    }
     58
    4659    private void InitStack() {
    4760      Stack<int> stack = new Stack<int>();
    4861
    49       storeInternal = hash => stack.Push(hash);
     62      storeInternal = (hash, prio) => stack.Push(hash);
    5063      fetchInternal = () => stack.Pop();
    5164    }
     
    5467      Queue<int> queue = new Queue<int>();
    5568
    56       storeInternal = hash => queue.Enqueue(hash);
     69      storeInternal = (hash, prio) => queue.Enqueue(hash);
    5770      fetchInternal = () => queue.Dequeue();
    5871    }
     
    6275      System.Random rand = new System.Random(999);
    6376
    64       storeInternal = hash => list.Add(hash);
     77      storeInternal = (hash, prio) => list.Add(hash);
    6578      fetchInternal = () => {
    6679        int indexOfHash = rand.Next(list.Count);
    6780        int result = list[indexOfHash];
    68         list.RemoveAt(indexOfHash);  // TODO: beware this is O(n), at some point in time we should fix this
     81        list.RemoveAt(indexOfHash);  // TODO: beware this is O(n), at some point in time we should fix this. Maybe change to priority queue with random key.
    6982        return result;
    7083      };
     
    8295    }
    8396
    84     public void Store(int hash, SymbolString s) {
    85       storeInternal(hash);
     97    public void Store(int hash, double priority, SymbolString s) {
     98      storeInternal(hash, priority);
    8699      storedValues[hash] = s;
    87100    }
  • branches/2886_SymRegGrammarEnumeration/Test/GrammarEnumerationTest.cs

    r15861 r15883  
    3434        alg.Analyzers.SetItemCheckedState(grammarEnumerationAnalyzer, grammarEnumerationAnalyzer is RSquaredEvaluator);
    3535      }
     36      alg.SearchDataStructure = StorageType.PriorityQueue;
    3637    }
    3738
     
    250251
    251252    [TestMethod]
    252     [TestProperty("Goal", "structure search + const op")]
     253    [TestProperty("Goal", "sinnus const op")]
    253254    public void Constants_Keijzer3() {
    254255      // 0.3*x*sin(2*pi*x)
Note: See TracChangeset for help on using the changeset viewer.