Free cookie consent management tool by TermsFeed Policy Generator

source: branches/GP-MoveOperators/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/ReplaceBranchMultiMoveGenerator.cs @ 9573

Last change on this file since 9573 was 8292, checked in by gkronber, 12 years ago

#1847: worked on move operators. version of the final EMSS submission

File size: 17.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Operators;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33
34namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
35  [Item("ReplaceBranchMultiMoveGenerator", "")]
36  [StorableClass]
37  public class ReplaceBranchMultiMoveGenerator : SingleSuccessorOperator, IStochasticOperator, ISymbolicExpressionTreeMoveOperator, IMultiMoveGenerator,
38    ISymbolicDataAnalysisInterpreterOperator, ISymbolicExpressionTreeGrammarBasedOperator, ISymbolicExpressionTreeSizeConstraintOperator {
39    public ILookupParameter<IRandom> RandomParameter {
40      get { return (ILookupParameter<IRandom>)Parameters["Random"]; }
41    }
42    public IValueLookupParameter<IntValue> SampleSizeParameter {
43      get { return (IValueLookupParameter<IntValue>)Parameters["SampleSize"]; }
44    }
45    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> SymbolicDataAnalysisTreeInterpreterParameter {
46      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters["Interpreter"]; }
47    }
48
49    public IValueLookupParameter<ISymbolicExpressionGrammar> SymbolicExpressionTreeGrammarParameter {
50      get { return (IValueLookupParameter<ISymbolicExpressionGrammar>)Parameters["Grammar"]; }
51    }
52    public ILookupParameter<IRegressionProblemData> ProblemDataParameter {
53      get { return (ILookupParameter<IRegressionProblemData>)Parameters["ProblemData"]; }
54    }
55    public IntValue SampleSize {
56      get { return SampleSizeParameter.Value; }
57      set { SampleSizeParameter.Value = value; }
58    }
59    public ILookupParameter<ISymbolicExpressionTree> SymbolicExpressionTreeParameter {
60      get { return (ILookupParameter<ISymbolicExpressionTree>)Parameters["SymbolicExpressionTree"]; }
61    }
62    public ILookupParameter<ReplaceBranchMove> ReplaceBranchMoveParameter {
63      get { return (LookupParameter<ReplaceBranchMove>)Parameters["ReplaceBranchMove"]; }
64    }
65
66    public IValueParameter<IntValue> ReplacementBranchesPoolSize {
67      get { return (IValueParameter<IntValue>)Parameters["ReplacementBranchesPoolSize"]; }
68    }
69
70    public IValueParameter<IntValue> MaxReplacementBranchLength {
71      get { return (IValueParameter<IntValue>)Parameters["MaxReplacementBranchLength"]; }
72    }
73
74    public IValueParameter<IntValue> MaxReplacementBranchDepth {
75      get { return (IValueParameter<IntValue>)Parameters["MaxReplacementBranchDepth"]; }
76    }
77
78    protected ScopeParameter CurrentScopeParameter {
79      get { return (ScopeParameter)Parameters["CurrentScope"]; }
80    }
81
82    public IValueLookupParameter<IntValue> MaximumSymbolicExpressionTreeDepthParameter {
83      get { return (IValueLookupParameter<IntValue>)Parameters["MaximumSymbolicExpressionTreeDepth"]; }
84    }
85
86    public IValueLookupParameter<IntValue> MaximumSymbolicExpressionTreeLengthParameter {
87      get { return (IValueLookupParameter<IntValue>)Parameters["MaximumSymbolicExpressionTreeLength"]; }
88    }
89
90    public IValueLookupParameter<IntValue> NeighbourhoodSizeParameter {
91      get { return (IValueLookupParameter<IntValue>)Parameters["NeighbourhoodSize"]; }
92    }
93    public IValueLookupParameter<BoolValue> SemanticParameter {
94      get { return (IValueLookupParameter<BoolValue>)Parameters["Semantic"]; }
95    }
96    public ILookupParameter<ItemList<ISymbolicExpressionTree>> FragmentsParameter {
97      get { return (ILookupParameter<ItemList<ISymbolicExpressionTree>>)Parameters["Fragments"]; }
98    }
99    public ILookupParameter<ItemList<DoubleArray>> FragmentOutputsParameter {
100      get { return (ILookupParameter<ItemList<DoubleArray>>)Parameters["FragmentOutputs"]; }
101    }
102
103    [StorableConstructor]
104    protected ReplaceBranchMultiMoveGenerator(bool deserializing) : base(deserializing) { }
105    protected ReplaceBranchMultiMoveGenerator(ReplaceBranchMultiMoveGenerator original, Cloner cloner) : base(original, cloner) { }
106    public ReplaceBranchMultiMoveGenerator()
107      : base() {
108      Parameters.Add(new LookupParameter<IRandom>("Random", "The random number generator."));
109      Parameters.Add(new ValueLookupParameter<IntValue>("SampleSize", "The number of moves to generate."));
110
111      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>("SymbolicExpressionTree", "The symbolic expression tree for which moves should be generated."));
112      Parameters.Add(new LookupParameter<ReplaceBranchMove>("ReplaceBranchMove", "The moves that should be generated in subscopes."));
113      Parameters.Add(new ScopeParameter("CurrentScope", "The current scope where the moves should be added as subscopes."));
114      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionGrammar>("Grammar"));
115      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>("Interpreter"));
116      Parameters.Add(new LookupParameter<IRegressionProblemData>("ProblemData"));
117      Parameters.Add(new ValueParameter<IntValue>("ReplacementBranchesPoolSize", new IntValue(10000)));
118      Parameters.Add(new ValueParameter<IntValue>("MaxReplacementBranchLength", new IntValue(8)));
119      Parameters.Add(new ValueParameter<IntValue>("MaxReplacementBranchDepth", new IntValue(4)));
120      Parameters.Add(new ValueLookupParameter<IntValue>("MaximumSymbolicExpressionTreeDepth"));
121      Parameters.Add(new ValueLookupParameter<IntValue>("MaximumSymbolicExpressionTreeLength"));
122      Parameters.Add(new ValueLookupParameter<IntValue>("NeighbourhoodSize", new IntValue(5)));
123      Parameters.Add(new ValueLookupParameter<BoolValue>("Semantic", new BoolValue()));
124      Parameters.Add(new LookupParameter<ItemList<ISymbolicExpressionTree>>("Fragments"));
125      Parameters.Add(new LookupParameter<ItemList<DoubleArray>>("FragmentOutputs"));
126    }
127
128
129    [StorableHook(HookType.AfterDeserialization)]
130    private void AfterDeserialization() {
131      if (!Parameters.ContainsKey("MaximumSymbolicExpressionTreeDepth")) {
132        Parameters.Add(new ValueLookupParameter<IntValue>("MaximumSymbolicExpressionTreeDepth"));
133        Parameters.Add(new ValueLookupParameter<IntValue>("MaximumSymbolicExpressionTreeLength"));
134      }
135      if (!Parameters.ContainsKey("NeighbourhoodSize")) {
136        Parameters.Add(new ValueLookupParameter<IntValue>("NeighbourhoodSize", new IntValue(5)));
137      }
138
139    }
140
141
142    public override IDeepCloneable Clone(Cloner cloner) {
143      return new ReplaceBranchMultiMoveGenerator(this, cloner);
144    }
145
146    public override IOperation Apply() {
147      var random = RandomParameter.ActualValue;
148      if (FragmentsParameter.ActualValue == null || FragmentOutputsParameter.ActualValue == null) {
149        InitializeOperator();
150      }
151
152      var tree = SymbolicExpressionTreeParameter.ActualValue;
153
154      string moveParameterName = ReplaceBranchMoveParameter.ActualName;
155      var moveScopes = new List<Scope>();
156      int n = SampleSizeParameter.ActualValue.Value;
157
158      var moves = GenerateMoves(tree, random, n);
159
160      foreach (var m in moves) {
161        var moveScope = new Scope(moveScopes.Count.ToString());
162        moveScope.Variables.Add(new HeuristicLab.Core.Variable(moveParameterName, m));
163        moveScopes.Add(moveScope);
164      }
165      CurrentScopeParameter.ActualValue.SubScopes.AddRange(moveScopes);
166      return base.Apply();
167    }
168
169    public IEnumerable<ReplaceBranchMove> GenerateMoves(ISymbolicExpressionTree tree, IRandom random, int n) {
170      int maxDepth = MaximumSymbolicExpressionTreeDepthParameter.ActualValue.Value;
171      int maxLength = MaximumSymbolicExpressionTreeLengthParameter.ActualValue.Value;
172      var possibleInternalChildren = (from parent in tree.Root.GetSubtree(0).IterateNodesPrefix()
173                                      from i in Enumerable.Range(0, parent.SubtreeCount)
174                                      let currentChild = parent.GetSubtree(i)
175                                      where currentChild.SubtreeCount > 0
176                                      where tree.Root.GetBranchLevel(currentChild) < maxDepth + 2
177                                      where tree.Length - currentChild.GetLength() < maxLength
178                                      select new CutPoint(parent, i)).ToArray();
179
180      var possibleLeaveChildren = (from parent in tree.Root.GetSubtree(0).IterateNodesPrefix()
181                                   from i in Enumerable.Range(0, parent.SubtreeCount)
182                                   let currentChild = parent.GetSubtree(i)
183                                   where currentChild.SubtreeCount == 0
184                                   where tree.Root.GetBranchLevel(currentChild) < maxDepth + 2
185                                   where tree.Length - 1 < maxLength
186                                   select new CutPoint(parent, i)).ToArray();
187      if (possibleInternalChildren.Length == 0) possibleInternalChildren = possibleLeaveChildren;
188      if (possibleLeaveChildren.Length == 0) possibleLeaveChildren = possibleInternalChildren;
189
190      var root = (new ProgramRootSymbol()).CreateTreeNode();
191      var start = (new StartSymbol()).CreateTreeNode();
192      root.AddSubtree(start);
193      var t = new SymbolicExpressionTree(root);
194      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
195      var ds = ProblemDataParameter.ActualValue.Dataset;
196      var rows = ProblemDataParameter.ActualValue.TrainingIndices;
197
198      bool semantic = SemanticParameter.ActualValue.Value;
199      int maxNeighbours = NeighbourhoodSizeParameter.ActualValue.Value;
200      var count = 0;
201      while (count < n) {
202        // select a random replacement point
203        CutPoint[] possibleChildren;
204        if (random.NextDouble() < 0.9)
205          possibleChildren = possibleInternalChildren;
206        else possibleChildren = possibleLeaveChildren;
207        var selected = possibleChildren[random.Next(possibleChildren.Length)];
208        // evaluate
209        start.AddSubtree(selected.Parent.GetSubtree(selected.ChildIndex));
210        var output = interpreter.GetSymbolicExpressionTreeValues(t, ds, rows).ToArray();
211        start.RemoveSubtree(0);
212
213        if (semantic) {
214          foreach (var m in FindMostSimilarFragments(tree, maxLength, maxDepth, selected, random, maxNeighbours, output)) {
215            yield return m;
216            count++;
217          }
218        } else {
219          foreach (var m in FindRandomFragments(tree, maxLength, maxDepth, selected, random, maxNeighbours, output)) {
220            yield return m;
221            count++;
222          }
223        }
224      }
225    }
226
227    private IEnumerable<ReplaceBranchMove> FindRandomFragments(ISymbolicExpressionTree tree, int maxLength, int maxDepth, CutPoint selected,
228  IRandom random, int maxNeighbours, double[] output) {
229      var selectedFragments = new List<int>(maxNeighbours);
230      int treeLength = tree.Length;
231      int removedFragementLength = selected.Parent.GetSubtree(selected.ChildIndex).GetLength();
232      int parentBranchLevel = tree.Root.GetBranchLevel(selected.Parent);
233      int iterations = 0;
234      int maxIterations = maxNeighbours + 100;
235      var fragments = FragmentsParameter.ActualValue;
236      var fragmentOutput = FragmentOutputsParameter.ActualValue;
237      // select random fragments
238      while (selectedFragments.Count < maxNeighbours && iterations++ < maxIterations) {
239        int r = random.Next(fragments.Count);
240        var selectedFragment = fragments[r];
241        var selectedFragmentOutput = fragmentOutput[r];
242        // if the branch is allowed in the selected point
243        if (treeLength - removedFragementLength + selectedFragment.Length <= maxLength + 4 &&
244          parentBranchLevel + selectedFragment.Depth - 2 <= maxDepth + 2 &&
245          tree.Root.Grammar.IsAllowedChildSymbol(selected.Parent.Symbol, selectedFragment.Root.GetSubtree(0).GetSubtree(0).Symbol, selected.ChildIndex)) {
246          selectedFragments.Add(r);
247        }
248      }
249      // yield moves (we need to add linear scaling parameters for the inserted tree)
250      return selectedFragments
251        .Select(i => new ReplaceBranchMove(tree, selected.Parent, selected.ChildIndex, fragments[i].Root.GetSubtree(0).GetSubtree(0), output, fragmentOutput[i].ToArray(), i));
252    }
253
254    private IEnumerable<ReplaceBranchMove> FindMostSimilarFragments(ISymbolicExpressionTree tree, int maxLength, int maxDepth, CutPoint selected,
255      IRandom random, int maxNeighbours, double[] output) {
256      var fragments = FragmentsParameter.ActualValue;
257      var fragmentOutput = FragmentOutputsParameter.ActualValue;
258      var bestTrees = new SortedList<double, List<int>>(fragments.Count);
259      int treeLength = tree.Length;
260      int removedFragementLength = selected.Parent.GetSubtree(selected.ChildIndex).GetLength();
261      int parentBranchLevel = tree.Root.GetBranchLevel(selected.Parent);
262      // iterate over the whole pool of branches for replacement
263      for (int i = 0; i < fragments.Count; i++) {
264        // if the branch is allowed in the selected point
265        if (treeLength - removedFragementLength + fragments[i].Length <= maxLength + 4 &&
266          parentBranchLevel + fragments[i].Depth - 2 <= maxDepth + 2 &&
267          tree.Root.Grammar.IsAllowedChildSymbol(selected.Parent.Symbol, fragments[i].Root.GetSubtree(0).GetSubtree(0).Symbol, selected.ChildIndex)) {
268          OnlineCalculatorError error;
269          // calculate the similarity
270          double similarity = OnlinePearsonsRSquaredCalculator.Calculate(output, fragmentOutput[i], out error);
271          similarity = Math.Round(similarity, 5);
272          if (error != OnlineCalculatorError.None) similarity = 0.0;
273          // if we found a new bestSimilarity then keep the replacement branch in a sorted list (keep maximally the n best for this replacement point)
274          if (similarity < 1 && ((bestTrees.Count < maxNeighbours) || similarity > bestTrees.ElementAt(0).Key)) {
275            if (!bestTrees.ContainsKey(similarity)) {
276              var l = new List<int>();
277              bestTrees.Add(similarity, l);
278            }
279            bestTrees[similarity].Add(i);
280            if (bestTrees.Count > maxNeighbours) bestTrees.RemoveAt(0);
281          }
282        }
283      }
284      int c = 0;
285      // yield moves (we need to add linear scaling parameters for the inserted tree)
286      while (c < maxNeighbours) {
287        var l = bestTrees.ElementAt(c % bestTrees.Count).Value;
288        var index = l[random.Next(l.Count)];
289        yield return
290          new ReplaceBranchMove(tree, selected.Parent, selected.ChildIndex, fragments[index].Root.GetSubtree(0).GetSubtree(0),
291                                output, fragmentOutput[index].ToArray(), index);
292        c++;
293      }
294    }
295
296    private void InitializeOperator() {
297      // init locally and set only at the end in case of exceptions
298      var trees = new List<ISymbolicExpressionTree>();
299      var treeOutput = new List<double[]>();
300      var random = RandomParameter.ActualValue;
301      var g = SymbolicExpressionTreeGrammarParameter.ActualValue;
302      var constSym = g.Symbols.Single(s => s is Constant);
303      // temporarily disable constants
304      double oldConstFreq = constSym.InitialFrequency;
305      constSym.InitialFrequency = 0.0;
306      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
307      var ds = ProblemDataParameter.ActualValue.Dataset;
308      var rows = ProblemDataParameter.ActualValue.TrainingIndices;
309      // create pool of random branches for replacement (no constants)
310      // and evaluate the output
311      // only keep fragments if the output does not contain invalid values
312      var n = ReplacementBranchesPoolSize.Value.Value;
313      while (trees.Count < n) {
314        var t = ProbabilisticTreeCreator.Create(random, g, MaxReplacementBranchLength.Value.Value, MaxReplacementBranchDepth.Value.Value);
315        var output = interpreter.GetSymbolicExpressionTreeValues(t, ds, rows);
316        if (!output.Any(x => double.IsInfinity(x) || double.IsNaN(x))) {
317          trees.Add(t);
318          treeOutput.Add(output.ToArray());
319        }
320      }
321      // enable constants again
322      constSym.InitialFrequency = oldConstFreq;
323      // set parameters
324      FragmentsParameter.ActualValue = new ItemList<ISymbolicExpressionTree>(trees);
325      FragmentOutputsParameter.ActualValue = new ItemList<DoubleArray>(treeOutput.Select(a => new DoubleArray(a)));
326    }
327  }
328}
Note: See TracBrowser for help on using the repository browser.