Free cookie consent management tool by TermsFeed Policy Generator

source: branches/GP-MoveOperators/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/ReplaceBranchMultiMoveGenerator.cs @ 8286

Last change on this file since 8286 was 8286, checked in by gkronber, 10 years ago

#1847 experimental changes to try to get TS to work

File size: 16.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Operators;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33
34namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
35  [Item("ReplaceBranchMultiMoveGenerator", "")]
36  [StorableClass]
37  public class ReplaceBranchMultiMoveGenerator : SingleSuccessorOperator, IStochasticOperator, ISymbolicExpressionTreeMoveOperator, IMultiMoveGenerator,
38    ISymbolicDataAnalysisInterpreterOperator, ISymbolicExpressionTreeGrammarBasedOperator, ISymbolicExpressionTreeSizeConstraintOperator {
39    public ILookupParameter<IRandom> RandomParameter {
40      get { return (ILookupParameter<IRandom>)Parameters["Random"]; }
41    }
42    public IValueLookupParameter<IntValue> SampleSizeParameter {
43      get { return (IValueLookupParameter<IntValue>)Parameters["SampleSize"]; }
44    }
45    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> SymbolicDataAnalysisTreeInterpreterParameter {
46      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters["Interpreter"]; }
47    }
48
49    public IValueLookupParameter<ISymbolicExpressionGrammar> SymbolicExpressionTreeGrammarParameter {
50      get { return (IValueLookupParameter<ISymbolicExpressionGrammar>)Parameters["Grammar"]; }
51    }
52    public ILookupParameter<IRegressionProblemData> ProblemDataParameter {
53      get { return (ILookupParameter<IRegressionProblemData>)Parameters["ProblemData"]; }
54    }
55    public IntValue SampleSize {
56      get { return SampleSizeParameter.Value; }
57      set { SampleSizeParameter.Value = value; }
58    }
59    public ILookupParameter<ISymbolicExpressionTree> SymbolicExpressionTreeParameter {
60      get { return (ILookupParameter<ISymbolicExpressionTree>)Parameters["SymbolicExpressionTree"]; }
61    }
62    public ILookupParameter<ReplaceBranchMove> ReplaceBranchMoveParameter {
63      get { return (LookupParameter<ReplaceBranchMove>)Parameters["ReplaceBranchMove"]; }
64    }
65
66    public IValueParameter<IntValue> ReplacementBranchesPoolSize {
67      get { return (IValueParameter<IntValue>)Parameters["ReplacementBranchesPoolSize"]; }
68    }
69
70    public IValueParameter<IntValue> MaxReplacementBranchLength {
71      get { return (IValueParameter<IntValue>)Parameters["MaxReplacementBranchLength"]; }
72    }
73
74    public IValueParameter<IntValue> MaxReplacementBranchDepth {
75      get { return (IValueParameter<IntValue>)Parameters["MaxReplacementBranchDepth"]; }
76    }
77
78    protected ScopeParameter CurrentScopeParameter {
79      get { return (ScopeParameter)Parameters["CurrentScope"]; }
80    }
81
82    public IValueLookupParameter<IntValue> MaximumSymbolicExpressionTreeDepthParameter {
83      get { return (IValueLookupParameter<IntValue>)Parameters["MaximumSymbolicExpressionTreeDepth"]; }
84    }
85
86    public IValueLookupParameter<IntValue> MaximumSymbolicExpressionTreeLengthParameter {
87      get { return (IValueLookupParameter<IntValue>)Parameters["MaximumSymbolicExpressionTreeLength"]; }
88    }
89
90    public IValueLookupParameter<IntValue> NeighbourhoodSizeParameter {
91      get { return (IValueLookupParameter<IntValue>)Parameters["NeighbourhoodSize"]; }
92    }
93    public IValueLookupParameter<BoolValue> SemanticParameter {
94      get { return (IValueLookupParameter<BoolValue>)Parameters["Semantic"]; }
95    }
96
97    private IList<ISymbolicExpressionTree> fragments;
98    private IList<double[]> fragmentOutput;
99
100    [StorableConstructor]
101    protected ReplaceBranchMultiMoveGenerator(bool deserializing) : base(deserializing) { }
102    protected ReplaceBranchMultiMoveGenerator(ReplaceBranchMultiMoveGenerator original, Cloner cloner) : base(original, cloner) { }
103    public ReplaceBranchMultiMoveGenerator()
104      : base() {
105      Parameters.Add(new LookupParameter<IRandom>("Random", "The random number generator."));
106      Parameters.Add(new ValueLookupParameter<IntValue>("SampleSize", "The number of moves to generate."));
107
108      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>("SymbolicExpressionTree", "The symbolic expression tree for which moves should be generated."));
109      Parameters.Add(new LookupParameter<ReplaceBranchMove>("ReplaceBranchMove", "The moves that should be generated in subscopes."));
110      Parameters.Add(new ScopeParameter("CurrentScope", "The current scope where the moves should be added as subscopes."));
111      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionGrammar>("Grammar"));
112      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>("Interpreter"));
113      Parameters.Add(new LookupParameter<IRegressionProblemData>("ProblemData"));
114      Parameters.Add(new ValueParameter<IntValue>("ReplacementBranchesPoolSize", new IntValue(10000)));
115      Parameters.Add(new ValueParameter<IntValue>("MaxReplacementBranchLength", new IntValue(8)));
116      Parameters.Add(new ValueParameter<IntValue>("MaxReplacementBranchDepth", new IntValue(4)));
117      Parameters.Add(new ValueLookupParameter<IntValue>("MaximumSymbolicExpressionTreeDepth"));
118      Parameters.Add(new ValueLookupParameter<IntValue>("MaximumSymbolicExpressionTreeLength"));
119      Parameters.Add(new ValueLookupParameter<IntValue>("NeighbourhoodSize", new IntValue(5)));
120      Parameters.Add(new ValueLookupParameter<BoolValue>("Semantic", new BoolValue()));
121    }
122
123
124    [StorableHook(HookType.AfterDeserialization)]
125    private void AfterDeserialization() {
126      if (!Parameters.ContainsKey("MaximumSymbolicExpressionTreeDepth")) {
127        Parameters.Add(new ValueLookupParameter<IntValue>("MaximumSymbolicExpressionTreeDepth"));
128        Parameters.Add(new ValueLookupParameter<IntValue>("MaximumSymbolicExpressionTreeLength"));
129      }
130      if (!Parameters.ContainsKey("NeighbourhoodSize")) {
131        Parameters.Add(new ValueLookupParameter<IntValue>("NeighbourhoodSize", new IntValue(5)));
132      }
133
134    }
135
136
137    public override IDeepCloneable Clone(Cloner cloner) {
138      return new ReplaceBranchMultiMoveGenerator(this, cloner);
139    }
140
141    public override void ClearState() {
142      fragments = null;
143      fragmentOutput = null;
144      base.ClearState();
145    }
146    public override void InitializeState() {
147      fragments = null;
148      fragmentOutput = null;
149      base.InitializeState();
150    }
151
152    public override IOperation Apply() {
153      var random = RandomParameter.ActualValue;
154      if (fragments == null || fragmentOutput == null) {
155        InitializeOperator();
156      }
157
158      var tree = SymbolicExpressionTreeParameter.ActualValue;
159
160      string moveParameterName = ReplaceBranchMoveParameter.ActualName;
161      var moveScopes = new List<Scope>();
162      int n = SampleSizeParameter.ActualValue.Value;
163
164      var moves = GenerateMoves(tree, random, n);
165
166      foreach (var m in moves) {
167        var moveScope = new Scope(moveScopes.Count.ToString());
168        moveScope.Variables.Add(new HeuristicLab.Core.Variable(moveParameterName, m));
169        moveScopes.Add(moveScope);
170      }
171      CurrentScopeParameter.ActualValue.SubScopes.AddRange(moveScopes);
172      return base.Apply();
173    }
174
175    public IEnumerable<ReplaceBranchMove> GenerateMoves(ISymbolicExpressionTree tree, IRandom random, int n) {
176      int maxDepth = MaximumSymbolicExpressionTreeDepthParameter.ActualValue.Value;
177      int maxLength = MaximumSymbolicExpressionTreeLengthParameter.ActualValue.Value;
178      var possibleInternalChildren = (from parent in tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix()
179                                      from i in Enumerable.Range(0, parent.SubtreeCount)
180                                      let currentChild = parent.GetSubtree(i)
181                                      where currentChild.SubtreeCount > 0
182                                      where tree.Root.GetBranchLevel(currentChild) <= maxDepth + 2
183                                      where tree.Length - currentChild.GetLength() < maxLength
184                                      select new CutPoint(parent, i)).ToArray();
185
186      var possibleLeaveChildren = (from parent in tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix()
187                                   from i in Enumerable.Range(0, parent.SubtreeCount)
188                                   let currentChild = parent.GetSubtree(i)
189                                   where currentChild.SubtreeCount == 0
190                                   where tree.Root.GetBranchLevel(currentChild) <= maxDepth + 2
191                                   where tree.Length - 1 < maxLength
192                                   select new CutPoint(parent, i)).ToArray();
193
194
195      var root = (new ProgramRootSymbol()).CreateTreeNode();
196      var start = (new StartSymbol()).CreateTreeNode();
197      root.AddSubtree(start);
198      var t = new SymbolicExpressionTree(root);
199      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
200      var ds = ProblemDataParameter.ActualValue.Dataset;
201      var rows = ProblemDataParameter.ActualValue.TrainingIndices;
202
203      bool semantic = SemanticParameter.ActualValue.Value;
204
205      // select a random replacement point
206      CutPoint[] possibleChildren;
207      if (random.NextDouble() < 0.9)
208        possibleChildren = possibleInternalChildren;
209      else possibleChildren = possibleLeaveChildren;
210      var selected = possibleChildren[random.Next(possibleChildren.Length)];
211      // evaluate
212      start.AddSubtree(selected.Parent.GetSubtree(selected.ChildIndex));
213      var output = interpreter.GetSymbolicExpressionTreeValues(t, ds, rows).ToArray();
214      start.RemoveSubtree(0);
215
216      if (semantic) {
217        return FindMostSimilarFragments(tree, maxLength, maxDepth, selected, random, n, output);
218      } else {
219        return FindRandomFragments(tree, maxLength, maxDepth, selected, random, n, output);
220      }
221    }
222
223    private IEnumerable<ReplaceBranchMove> FindRandomFragments(ISymbolicExpressionTree tree, int maxLength, int maxDepth, CutPoint selected,
224  IRandom random, int maxNeighbours, double[] output) {
225      var selectedFragments = new List<Tuple<ISymbolicExpressionTree, double[]>>(maxNeighbours);
226      int treeLength = tree.Length;
227      int removedFragementLength = selected.Parent.GetSubtree(selected.ChildIndex).GetLength();
228      int parentBranchLevel = tree.Root.GetBranchLevel(selected.Parent);
229      int iterations = 0;
230      int maxIterations = maxNeighbours + 100;
231      // select random fragments
232      while (selectedFragments.Count < maxNeighbours && iterations++ < maxIterations) {
233        int r = random.Next(fragments.Count);
234        var selectedFragment = fragments[r];
235        var selectedFragmentOutput = fragmentOutput[r];
236        // if the branch is allowed in the selected point
237        if (treeLength - removedFragementLength + selectedFragment.Length <= maxLength + 4 &&
238          parentBranchLevel + selectedFragment.Depth - 2 <= maxDepth + 2 &&
239          tree.Root.Grammar.IsAllowedChildSymbol(selected.Parent.Symbol, selectedFragment.Root.GetSubtree(0).GetSubtree(0).Symbol, selected.ChildIndex)) {
240          selectedFragments.Add(Tuple.Create(selectedFragment, selectedFragmentOutput));
241        }
242      }
243      // yield moves (we need to add linear scaling parameters for the inserted tree)
244      return selectedFragments
245        .Select(pair => new ReplaceBranchMove(tree, selected.Parent, selected.ChildIndex, pair.Item1.Root.GetSubtree(0).GetSubtree(0), output, pair.Item2));
246    }
247
248    private IEnumerable<ReplaceBranchMove> FindMostSimilarFragments(ISymbolicExpressionTree tree, int maxLength, int maxDepth, CutPoint selected,
249      IRandom random, int maxNeighbours, double[] output) {
250      var bestTrees = new SortedList<double, List<Tuple<ISymbolicExpressionTree, double[]>>>(fragments.Count);
251      int treeLength = tree.Length;
252      int removedFragementLength = selected.Parent.GetSubtree(selected.ChildIndex).GetLength();
253      int parentBranchLevel = tree.Root.GetBranchLevel(selected.Parent);
254      // iterate over the whole pool of branches for replacement
255      for (int i = 0; i < fragments.Count; i++) {
256        // if the branch is allowed in the selected point
257        if (treeLength - removedFragementLength + fragments[i].Length <= maxLength + 4 &&
258          parentBranchLevel + fragments[i].Depth - 2 <= maxDepth + 2 &&
259          tree.Root.Grammar.IsAllowedChildSymbol(selected.Parent.Symbol, fragments[i].Root.GetSubtree(0).GetSubtree(0).Symbol, selected.ChildIndex)) {
260          OnlineCalculatorError error;
261          // calculate the similarity
262          double similarity = OnlinePearsonsRSquaredCalculator.Calculate(output, fragmentOutput[i], out error);
263          similarity = Math.Round(similarity, 5);
264          if (error != OnlineCalculatorError.None) similarity = 0.0;
265          // if we found a new bestSimilarity then keep the replacement branch in a sorted list (keep maximally the n best for this replacement point)
266          if (similarity < 1 && ((bestTrees.Count < maxNeighbours) || similarity > bestTrees.ElementAt(0).Key)) {
267            if (!bestTrees.ContainsKey(similarity)) {
268              var l = new List<Tuple<ISymbolicExpressionTree, double[]>>();
269              bestTrees.Add(similarity, l);
270            }
271            bestTrees[similarity].Add(Tuple.Create(fragments[i], fragmentOutput[i]));
272            if (bestTrees.Count > maxNeighbours) bestTrees.RemoveAt(0);
273          }
274        }
275      }
276      int c = 0;
277      // yield moves (we need to add linear scaling parameters for the inserted tree)
278      while (c < maxNeighbours) {
279        var l = bestTrees.ElementAt(c % bestTrees.Count).Value;
280        var pair = l[random.Next(l.Count)];
281        yield return
282          new ReplaceBranchMove(tree, selected.Parent, selected.ChildIndex, pair.Item1.Root.GetSubtree(0).GetSubtree(0),
283                                output, pair.Item2);
284        c++;
285      }
286    }
287
288    private void InitializeOperator() {
289      // init locally and set only at the end in case of exceptions
290      var trees = new List<ISymbolicExpressionTree>();
291      var treeOutput = new List<double[]>();
292      var random = RandomParameter.ActualValue;
293      var g = SymbolicExpressionTreeGrammarParameter.ActualValue;
294      var constSym = g.Symbols.Single(s => s is Constant);
295      // temporarily disable constants
296      double oldConstFreq = constSym.InitialFrequency;
297      constSym.InitialFrequency = 0.0;
298      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
299      var ds = ProblemDataParameter.ActualValue.Dataset;
300      var rows = ProblemDataParameter.ActualValue.TrainingIndices;
301      // create pool of random branches for replacement (no constants)
302      // and evaluate the output
303      // only keep fragments if the output does not contain invalid values
304      var n = ReplacementBranchesPoolSize.Value.Value;
305      while (trees.Count < n) {
306        var t = ProbabilisticTreeCreator.Create(random, g, MaxReplacementBranchLength.Value.Value, MaxReplacementBranchDepth.Value.Value);
307        var output = interpreter.GetSymbolicExpressionTreeValues(t, ds, rows);
308        if (!output.Any(x => double.IsInfinity(x) || double.IsNaN(x))) {
309          trees.Add(t);
310          treeOutput.Add(output.ToArray());
311        }
312      }
313      // enable constants again
314      constSym.InitialFrequency = oldConstFreq;
315      this.fragments = trees;
316      this.fragmentOutput = treeOutput;
317    }
318  }
319}
Note: See TracBrowser for help on using the repository browser.