Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs @ 15726

Last change on this file since 15726 was 15726, checked in by lkammere, 6 years ago

#2886: Overwrite long sentences when a shorter one with same hash was found.

File size: 9.8 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Threading;
5using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
6using HeuristicLab.Common;
7using HeuristicLab.Core;
8using HeuristicLab.Data;
9using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
10using HeuristicLab.Optimization;
11using HeuristicLab.Parameters;
12using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
13using HeuristicLab.Problems.DataAnalysis;
14using HeuristicLab.Problems.DataAnalysis.Symbolic;
15using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
16
17namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
18  [Item("Grammar Enumeration Symbolic Regression", "Iterates all possible model structures for a fixed grammar.")]
19  [StorableClass]
20  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 250)]
21  public class GrammarEnumerationAlgorithm : FixedDataAnalysisAlgorithm<IRegressionProblem> {
22    private readonly string BestTrainingSolution = "Best solution (training)";
23    private readonly string BestTrainingSolutionQuality = "Best solution quality (training)";
24    private readonly string BestTestSolution = "Best solution (test)";
25    private readonly string BestTestSolutionQuality = "Best solution quality (test)";
26
27    private readonly string MaxTreeSizeParameterName = "Max. Tree Nodes";
28    private readonly string GuiUpdateIntervalParameterName = "GUI Update Interval";
29    private readonly string UseMemoizationParameterName = "Use Memoization?";
30
31    #region properties
32    protected IValueParameter<IntValue> MaxTreeSizeParameter {
33      get { return (IValueParameter<IntValue>)Parameters[MaxTreeSizeParameterName]; }
34    }
35    public int MaxTreeSize {
36      get { return MaxTreeSizeParameter.Value.Value; }
37      set { MaxTreeSizeParameter.Value.Value = value; }
38    }
39
40    protected IValueParameter<IntValue> GuiUpdateIntervalParameter {
41      get { return (IValueParameter<IntValue>)Parameters[GuiUpdateIntervalParameterName]; }
42    }
43    public int GuiUpdateInterval {
44      get { return GuiUpdateIntervalParameter.Value.Value; }
45      set { GuiUpdateIntervalParameter.Value.Value = value; }
46    }
47
48    protected IValueParameter<BoolValue> UseMemoizationParameter {
49      get { return (IValueParameter<BoolValue>)Parameters[UseMemoizationParameterName]; }
50    }
51    public bool UseMemoization {
52      get { return UseMemoizationParameter.Value.Value; }
53      set { UseMemoizationParameter.Value.Value = value; }
54    }
55
56    public SymbolString BestTrainingSentence;
57    public SymbolString BestTestSentence;
58
59    public List<Tuple<SymbolString, int>> DistinctGenerated;
60    public List<Tuple<SymbolString, int>> AllGenerated;
61    #endregion
62
63    public Grammar Grammar;
64
65
66    #region ctors
67    public override IDeepCloneable Clone(Cloner cloner) {
68      return new GrammarEnumerationAlgorithm(this, cloner);
69    }
70
71    public GrammarEnumerationAlgorithm() {
72
73      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234);
74      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
75
76      Problem = new RegressionProblem() {
77        ProblemData = regProblem
78      };
79
80      Parameters.Add(new ValueParameter<IntValue>(MaxTreeSizeParameterName, "The number of clusters.", new IntValue(6)));
81      Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(4000)));
82      Parameters.Add(new ValueParameter<BoolValue>(UseMemoizationParameterName, "Should already subtrees be reused within a run.", new BoolValue(true)));
83    }
84
85    private GrammarEnumerationAlgorithm(GrammarEnumerationAlgorithm original, Cloner cloner) : base(original, cloner) { }
86    #endregion
87
88
89    protected override void Run(CancellationToken cancellationToken) {
90      BestTrainingSentence = null;
91      BestTrainingSentence = null;
92      AllGenerated = new List<Tuple<SymbolString, int>>();
93      DistinctGenerated = new List<Tuple<SymbolString, int>>();
94
95      Dictionary<int, SymbolString> evaluatedHashes = new Dictionary<int, SymbolString>();
96
97      Grammar = new Grammar(Problem.ProblemData.AllowedInputVariables.ToArray());
98
99      Stack<SymbolString> remainingTrees = new Stack<SymbolString>();
100      remainingTrees.Push(new SymbolString(new[] { Grammar.StartSymbol }));
101
102      while (remainingTrees.Any()) {
103        if (cancellationToken.IsCancellationRequested) break;
104
105        SymbolString currSymbolString = remainingTrees.Pop();
106
107        if (currSymbolString.IsSentence()) {
108          AllGenerated.Add(new Tuple<SymbolString, int>(currSymbolString, Grammar.CalcHashCode(currSymbolString)));
109
110          int currSymbolStringHash = Grammar.CalcHashCode(currSymbolString);
111          if (!evaluatedHashes.ContainsKey(currSymbolStringHash)
112              || evaluatedHashes[currSymbolStringHash].Count > currSymbolString.Count) {
113            evaluatedHashes[currSymbolStringHash] = currSymbolString;
114
115            DistinctGenerated.Add(new Tuple<SymbolString, int>(currSymbolString, Grammar.CalcHashCode(currSymbolString)));
116            EvaluateSentence(currSymbolString);
117          }
118          UpdateView(AllGenerated, DistinctGenerated);
119
120        } else {
121          // expand next nonterminal symbols
122          int nonterminalSymbolIndex = currSymbolString.FindIndex(s => s is NonterminalSymbol);
123          NonterminalSymbol expandedSymbol = currSymbolString[nonterminalSymbolIndex] as NonterminalSymbol;
124
125          foreach (Production productionAlternative in expandedSymbol.Alternatives) {
126            SymbolString newSentence = new SymbolString(currSymbolString);
127            newSentence.RemoveAt(nonterminalSymbolIndex);
128            newSentence.InsertRange(nonterminalSymbolIndex, productionAlternative);
129
130            if (newSentence.Count <= MaxTreeSize) {
131              remainingTrees.Push(newSentence);
132            }
133          }
134        }
135      }
136
137      UpdateView(AllGenerated, DistinctGenerated, force: true);
138
139      string[,] sentences = new string[AllGenerated.Count, 3];
140      for (int i = 0; i < AllGenerated.Count; i++) {
141        sentences[i, 0] = AllGenerated[i].Item1.ToString();
142        sentences[i, 1] = Grammar.PostfixToInfixParser(AllGenerated[i].Item1).ToString();
143        sentences[i, 2] = AllGenerated[i].Item2.ToString();
144      }
145      Results.Add(new Result("All generated sentences", new StringMatrix(sentences)));
146
147      string[,] distinctSentences = new string[DistinctGenerated.Count, 3];
148      for (int i = 0; i < DistinctGenerated.Count; i++) {
149        distinctSentences[i, 0] = DistinctGenerated[i].Item1.ToString();
150        distinctSentences[i, 1] = Grammar.PostfixToInfixParser(DistinctGenerated[i].Item1).ToString();
151        distinctSentences[i, 2] = DistinctGenerated[i].Item2.ToString();
152      }
153      Results.Add(new Result("Distinct generated sentences", new StringMatrix(distinctSentences)));
154    }
155
156
157    private void UpdateView(List<Tuple<SymbolString, int>> allGenerated,
158        List<Tuple<SymbolString, int>> distinctGenerated, bool force = false) {
159      int generatedSolutions = allGenerated.Count;
160      int distinctSolutions = distinctGenerated.Count;
161
162      if (force || generatedSolutions % GuiUpdateInterval == 0) {
163        Results.AddOrUpdateResult("Generated Solutions", new IntValue(generatedSolutions));
164        Results.AddOrUpdateResult("Distinct Solutions", new IntValue(distinctSolutions));
165
166        DoubleValue averageTreeLength = new DoubleValue(allGenerated.Select(r => r.Item1.Count).Average());
167        Results.AddOrUpdateResult("Average Tree Length of Solutions", averageTreeLength);
168      }
169    }
170
171    private void EvaluateSentence(SymbolString symbolString) {
172      SymbolicExpressionTree tree = Grammar.ParseSymbolicExpressionTree(symbolString);
173      SymbolicRegressionModel model = new SymbolicRegressionModel(
174        Problem.ProblemData.TargetVariable,
175        tree,
176        new SymbolicDataAnalysisExpressionTreeLinearInterpreter());
177
178      IRegressionSolution newSolution = model.CreateRegressionSolution(Problem.ProblemData);
179
180      IResult currBestTrainingSolutionResult;
181      IResult currBestTestSolutionResult;
182      if (!Results.TryGetValue(BestTrainingSolution, out currBestTrainingSolutionResult)
183           || !Results.TryGetValue(BestTestSolution, out currBestTestSolutionResult)) {
184
185        BestTrainingSentence = symbolString;
186        Results.Add(new Result(BestTrainingSolution, newSolution));
187        Results.Add(new Result(BestTrainingSolutionQuality, new DoubleValue(newSolution.TrainingRSquared).AsReadOnly()));
188
189        BestTestSentence = symbolString;
190        Results.Add(new Result(BestTestSolution, newSolution));
191        Results.Add(new Result(BestTestSolutionQuality, new DoubleValue(newSolution.TestRSquared).AsReadOnly()));
192
193      } else {
194        IRegressionSolution currBestTrainingSolution = (IRegressionSolution)currBestTrainingSolutionResult.Value;
195        if (currBestTrainingSolution.TrainingRSquared <= newSolution.TrainingRSquared) {
196          BestTrainingSentence = symbolString;
197          currBestTrainingSolutionResult.Value = newSolution;
198          Results.AddOrUpdateResult(BestTrainingSolutionQuality, new DoubleValue(newSolution.TrainingRSquared).AsReadOnly());
199        }
200
201        IRegressionSolution currBestTestSolution = (IRegressionSolution)currBestTestSolutionResult.Value;
202        if (currBestTestSolution.TestRSquared <= newSolution.TestRSquared) {
203          BestTestSentence = symbolString;
204          currBestTestSolutionResult.Value = newSolution;
205          Results.AddOrUpdateResult(BestTestSolutionQuality, new DoubleValue(newSolution.TestRSquared).AsReadOnly());
206        }
207      }
208    }
209  }
210}
Note: See TracBrowser for help on using the repository browser.