Changeset 15910


Ignore:
Timestamp:
04/17/18 16:59:57 (16 months ago)
Author:
lkammere
Message:

#2886: Fix length parameter when priorizing phrases and add weighting parameter to control exploration/exploitation during search, fix copy constructors in Analyzers

Location:
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Analysis/RSquaredEvaluator.cs

    r15883 r15910  
    2323    public RSquaredEvaluator() { }
    2424
    25     protected RSquaredEvaluator(RSquaredEvaluator original, Cloner cloner) {
     25    protected RSquaredEvaluator(RSquaredEvaluator original, Cloner cloner) : base(original, cloner) {
    2626      this.OptimizeConstants = original.OptimizeConstants;
    2727    }
     
    5252    private void OnStarted(object sender, EventArgs eventArgs) {
    5353      GrammarEnumerationAlgorithm algorithm = (GrammarEnumerationAlgorithm)sender;
    54       algorithm.Results.Add(new Result(BestTrainingQualityResultName, new DoubleValue(-1.0)));
    5554
    5655      algorithm.BestTrainingSentence = null;
     
    7574      double r2 = Evaluate(problemData, tree, OptimizeConstants);
    7675
    77       var bestR2Result = (DoubleValue)algorithm.Results[BestTrainingQualityResultName].Value;
    78       bool better = r2 > bestR2Result.Value;
    79       bool equallyGood = r2.IsAlmost(bestR2Result.Value);
     76      double bestR2 = 0.0;
     77      if (algorithm.Results.ContainsKey(BestTrainingQualityResultName))
     78        bestR2 = ((DoubleValue)algorithm.Results[BestTrainingQualityResultName].Value).Value;
     79      bool better = r2 > bestR2;
     80      bool equallyGood = r2.IsAlmost(bestR2);
    8081      bool shorter = false;
    8182
     
    8586      }
    8687      if (better || (equallyGood && shorter)) {
    87         bestR2Result.Value = r2;
     88        algorithm.Results.AddOrUpdateResult(BestTrainingQualityResultName, new DoubleValue(r2));
    8889
    8990        SymbolicRegressionModel model = new SymbolicRegressionModel(
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Analysis/SearchGraphVisualizer.cs

    r15832 r15910  
    1414    public SearchGraphVisualizer() { }
    1515
    16     protected SearchGraphVisualizer(SearchGraphVisualizer original, Cloner cloner) { }
     16    protected SearchGraphVisualizer(SearchGraphVisualizer original, Cloner cloner) : base(original, cloner) { }
    1717
    1818    public override IDeepCloneable Clone(Cloner cloner) {
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Analysis/SentenceLogger.cs

    r15860 r15910  
    2020    public SentenceLogger() { }
    2121
    22     protected SentenceLogger(SentenceLogger original, Cloner cloner) { }
     22    protected SentenceLogger(SentenceLogger original, Cloner cloner) : base(original, cloner) { }
    2323
    2424    public override IDeepCloneable Clone(Cloner cloner) {
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs

    r15907 r15910  
    22using System.Collections.Generic;
    33using System.Linq;
    4 using System.Security.Cryptography;
    54using System.Threading;
    65using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
     
    3231
    3332    private readonly string OptimizeConstantsParameterName = "Optimize Constants";
     33    private readonly string ErrorWeightParameterName = "Error Weight";
    3434    private readonly string SearchDataStructureParameterName = "Search Data Structure";
    3535    private readonly string MaxComplexityParameterName = "Max. Complexity";
     
    5353      get { return MaxComplexityParameter.Value.Value; }
    5454      set { MaxComplexityParameter.Value.Value = value; }
     55    }
     56
     57    protected IValueParameter<DoubleValue> ErrorWeightParameter {
     58      get { return (IValueParameter<DoubleValue>)Parameters[ErrorWeightParameterName]; }
     59    }
     60    public double ErrorWeight {
     61      get { return ErrorWeightParameter.Value.Value; }
     62      set { ErrorWeightParameter.Value.Value = value; }
    5563    }
    5664
     
    105113    public GrammarEnumerationAlgorithm() {
    106114      Problem = new RegressionProblem() {
    107         ProblemData = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenFunctionNine(seed: 1234).GenerateRegressionData()
     115        ProblemData = new HeuristicLab.Problems.Instances.DataAnalysis.PolyTen(seed: 1234).GenerateRegressionData()
    108116      };
    109117
    110       Parameters.Add(new ValueParameter<BoolValue>(OptimizeConstantsParameterName, "Run constant optimization in sentence evaluation.", new BoolValue(true)));
    111       Parameters.Add(new ValueParameter<IntValue>(MaxComplexityParameterName, "The maximum number of variable symbols in a sentence.", new IntValue(5)));
    112       Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(100000)));
    113       Parameters.Add(new ValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.Stack)));
     118      Parameters.Add(new ValueParameter<BoolValue>(OptimizeConstantsParameterName, "Run constant optimization in sentence evaluation.", new BoolValue(false)));
     119      Parameters.Add(new ValueParameter<DoubleValue>(ErrorWeightParameterName, "Defines, how much weight is put on a phrase's r² value when priorizing phrases during search.", new DoubleValue(0.8)));
     120      Parameters.Add(new ValueParameter<IntValue>(MaxComplexityParameterName, "The maximum number of variable symbols in a sentence.", new IntValue(12)));
     121      Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(5000)));
     122      Parameters.Add(new ValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.PriorityQueue)));
    114123
    115124      var availableAnalyzers = new IGrammarEnumerationAnalyzer[] {
     
    125134        Analyzers.SetItemCheckedState(analyzer, false);
    126135      }
    127       Analyzers.CheckedItemsChanged += AnalyzersOnCheckedItemsChanged;
    128136      Analyzers.SetItemCheckedState(Analyzers.First(analyzer => analyzer is RSquaredEvaluator), true);
    129       Analyzers.SetItemCheckedState(Analyzers.First(analyzer => analyzer is SentenceLogger), true);
    130     }
    131 
    132     public GrammarEnumerationAlgorithm(GrammarEnumerationAlgorithm original, Cloner cloner) : base(original, cloner) { }
     137      //Analyzers.SetItemCheckedState(Analyzers.First(analyzer => analyzer is SentenceLogger), true);
     138    }
     139
     140    public GrammarEnumerationAlgorithm(GrammarEnumerationAlgorithm original, Cloner cloner) : base(original, cloner) {
     141
     142
     143    }
    133144    #endregion
    134145
     
    136147      #region init
    137148      InitResults();
     149
     150      foreach (IGrammarEnumerationAnalyzer grammarEnumerationAnalyzer in Analyzers) {
     151        if (Analyzers.ItemChecked(grammarEnumerationAnalyzer)) {
     152          grammarEnumerationAnalyzer.Register(this);
     153        } else {
     154          grammarEnumerationAnalyzer.Deregister(this);
     155        }
     156      }
    138157
    139158      Analyzers.OfType<RSquaredEvaluator>().First().OptimizeConstants = OptimizeConstants;
     
    152171      var phrase0Hash = Grammar.Hasher.CalcHashCode(phrase0);
    153172      #endregion
     173
     174      int maxSentenceLength = GetMaxSentenceLength();
    154175
    155176      OpenPhrases.Store(phrase0Hash, 0.0, phrase0);
     
    203224
    204225            } else if (!OpenPhrases.Contains(phraseHash) && !ArchivedPhrases.Contains(phraseHash)) {
    205               double phrasePriority = GetPriority(newPhrase);
     226              double phrasePriority = GetPriority(newPhrase, maxSentenceLength);
    206227              OpenPhrases.Store(phraseHash, phrasePriority, newPhrase);
    207228            }
     
    212233    }
    213234
    214     protected double GetPriority(SymbolString phrase) {
    215       double complexity = (double)Grammar.GetComplexity(phrase);
    216 
    217       double length = phrase.Count();
    218       double relLength = (length - 2) / (MaxComplexity * 7);
     235    protected double GetPriority(SymbolString phrase, int maxSentenceLength) {
     236      double relLength = (double)phrase.Count() / maxSentenceLength;
     237
    219238      double r2 = Grammar.EvaluatePhrase(phrase, Problem.ProblemData, OptimizeConstants);
    220239      double error = 1.0 - r2;
    221240
    222       double variables = 0;
    223       for (int i = 0; i < phrase.Count(); i++) {
    224         if (phrase[i] is VariableTerminalSymbol) variables++;
    225       }
    226 
    227       double variableRatio = 1.0 - variables / complexity;
    228 
    229       return 1.5*relLength + error;
     241      return relLength + ErrorWeight * error;
     242    }
     243
     244    private int GetMaxSentenceLength() {
     245      SymbolString s = new SymbolString(Grammar.StartSymbol);
     246
     247      while (Grammar.GetComplexity(s) <= MaxComplexity) {
     248        int expandedSymbolIndex = s.NextNonterminalIndex();
     249        NonterminalSymbol expandedSymbol = (NonterminalSymbol)s[expandedSymbolIndex];
     250
     251        var productions = Grammar.Productions[expandedSymbol];
     252        var longestProduction = productions // Find production with most terminal symbols to expand as much as possible...
     253          .OrderBy(CountTerminals)          // but with lowest complexity/nonterminal count to keep complexity low.                                                                                     
     254          .ThenByDescending(CountNonTerminals)
     255          .First();
     256
     257        s = s.DerivePhrase(expandedSymbolIndex, longestProduction);
     258      }
     259
     260      return s.Count();
     261    }
     262
     263    private int CountTerminals(Production p) {
     264      return p.Count(s => s is TerminalSymbol);
     265    }
     266
     267    private int CountNonTerminals(Production p) {
     268      return p.Count(s => s is NonterminalSymbol);
    230269    }
    231270
     
    263302
    264303    #region events
    265     private void AnalyzersOnCheckedItemsChanged(object sender, CollectionItemsChangedEventArgs<IGrammarEnumerationAnalyzer> collectionItemsChangedEventArgs) {
    266       foreach (IGrammarEnumerationAnalyzer grammarEnumerationAnalyzer in collectionItemsChangedEventArgs.Items) {
    267         if (Analyzers.ItemChecked(grammarEnumerationAnalyzer)) {
    268           grammarEnumerationAnalyzer.Register(this);
    269         } else {
    270           grammarEnumerationAnalyzer.Deregister(this);
    271         }
    272       }
    273     }
    274 
    275304    public event EventHandler<PhraseEventArgs> PhraseFetched;
    276305    private void OnPhraseFetched(int hash, SymbolString symbolString) {
Note: See TracChangeset for help on using the changeset viewer.