Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis Refactoring/HeuristicLab.Encodings.SymbolicExpressionTreeEncoding/3.4/Creators/ProbabilisticTreeCreator.cs @ 5618

Last change on this file since 5618 was 5618, checked in by mkommend, 12 years ago

#1418: Added symbolic data analysis problems.

File size: 18.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Text;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Parameters;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31
32namespace HeuristicLab.Encodings.SymbolicExpressionTreeEncoding {
33  [StorableClass]
34  [Item("ProbabilisticTreeCreator", "An operator that creates new symbolic expression trees with uniformly distributed length")]
35  public class ProbabilisticTreeCreator : SymbolicExpressionTreeCreator,
36    ISymbolicExpressionTreeSizeConstraintOperator, ISymbolicExpressionTreeGrammarBasedOperator, ISymbolicExpressionTreeArchitectureAlteringOperator {
37    private const int MAX_TRIES = 100;
38    private const string MaximumSymbolicExpressionTreeLengthParameterName = "MaximumSymbolicExpressionTreeLength";
39    private const string MaximumSymbolicExpressionTreeDepthParameterName = "MaximumSymbolicExpressionTreeDepth";
40    private const string MaximumFunctionDefinitionsParameterName = "MaximumFunctionDefinitions";
41    private const string MaximumFunctionArgumentsParameterName = "MaximumFunctionArguments";
42    private const string SymbolicExpressionTreeGrammarParameterName = "SymbolicExpressionTreeGrammar";
43    #region Parameter Properties
44    public IValueLookupParameter<IntValue> MaximumSymbolicExpressionTreeLengthParameter {
45      get { return (IValueLookupParameter<IntValue>)Parameters[MaximumSymbolicExpressionTreeLengthParameterName]; }
46    }
47    public IValueLookupParameter<IntValue> MaximumSymbolicExpressionTreeDepthParameter {
48      get { return (IValueLookupParameter<IntValue>)Parameters[MaximumSymbolicExpressionTreeDepthParameterName]; }
49    }
50    public IValueLookupParameter<IntValue> MaximumFunctionDefinitionsParameter {
51      get { return (IValueLookupParameter<IntValue>)Parameters[MaximumFunctionDefinitionsParameterName]; }
52    }
53    public IValueLookupParameter<IntValue> MaximumFunctionArgumentsParameter {
54      get { return (IValueLookupParameter<IntValue>)Parameters[MaximumFunctionArgumentsParameterName]; }
55    }
56    public IValueLookupParameter<ISymbolicExpressionTreeGrammar> SymbolicExpressionTreeGrammarParameter {
57      get { return (IValueLookupParameter<ISymbolicExpressionTreeGrammar>)Parameters[SymbolicExpressionTreeGrammarParameterName]; }
58    }
59    #endregion
60    #region Properties
61    public IntValue MaximumSymbolicExpressionTreeLength {
62      get { return MaximumSymbolicExpressionTreeLengthParameter.ActualValue; }
63    }
64    public IntValue MaximumSymbolicExpressionTreeDepth {
65      get { return MaximumSymbolicExpressionTreeDepthParameter.ActualValue; }
66    }
67    public IntValue MaximumFunctionDefinitions {
68      get { return MaximumFunctionDefinitionsParameter.ActualValue; }
69    }
70    public IntValue MaximumFunctionArguments {
71      get { return MaximumFunctionArgumentsParameter.ActualValue; }
72    }
73    public ISymbolicExpressionTreeGrammar SymbolicExpressionTreeGrammar {
74      get { return SymbolicExpressionTreeGrammarParameter.ActualValue; }
75    }
76    #endregion
77
78    [StorableConstructor]
79    protected ProbabilisticTreeCreator(bool deserializing) : base(deserializing) { }
80    protected ProbabilisticTreeCreator(ProbabilisticTreeCreator original, Cloner cloner) : base(original, cloner) { }
81    public ProbabilisticTreeCreator()
82      : base() {
83      Parameters.Add(new ValueLookupParameter<IntValue>(MaximumSymbolicExpressionTreeLengthParameterName, "The maximal length (number of nodes) of the symbolic expression tree."));
84      Parameters.Add(new ValueLookupParameter<IntValue>(MaximumSymbolicExpressionTreeDepthParameterName, "The maximal depth of the symbolic expression tree (a tree with one node has depth = 0)."));
85      Parameters.Add(new ValueLookupParameter<IntValue>(MaximumFunctionDefinitionsParameterName, "The maximum allowed number of automatically defined functions."));
86      Parameters.Add(new ValueLookupParameter<IntValue>(MaximumFunctionArgumentsParameterName, "The maximum allowed number of arguments of automatically defined functions."));
87      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeGrammar>(SymbolicExpressionTreeGrammarParameterName, "The tree grammar that defines the correct syntax of symbolic expression trees that should be created."));
88    }
89
90    public override IDeepCloneable Clone(Cloner cloner) {
91      return new ProbabilisticTreeCreator(this, cloner);
92    }
93
94    protected override ISymbolicExpressionTree Create(IRandom random) {
95      return Create(random, SymbolicExpressionTreeGrammar, MaximumSymbolicExpressionTreeLength.Value, MaximumSymbolicExpressionTreeDepth.Value,
96        MaximumFunctionDefinitions.Value, MaximumFunctionArguments.Value);
97    }
98
99    public static ISymbolicExpressionTree Create(IRandom random, ISymbolicExpressionTreeGrammar grammar,
100      int maxTreeLength, int maxTreeDepth,
101      int maxFunctionDefinitions, int maxFunctionArguments
102      ) {
103      SymbolicExpressionTree tree = new SymbolicExpressionTree();
104      var rootNode = (SymbolicExpressionTreeTopLevelNode)grammar.StartSymbol.CreateTreeNode();
105      if (rootNode.HasLocalParameters) rootNode.ResetLocalParameters(random);
106      rootNode.SetGrammar(new SymbolicExpressionTreeGrammar(grammar));
107      tree.Root = PTC2(random, rootNode, maxTreeLength, maxTreeDepth, maxFunctionDefinitions, maxFunctionArguments);
108      return tree;
109    }
110
111    private class TreeExtensionPoint {
112      public ISymbolicExpressionTreeNode Parent { get; set; }
113      public int ChildIndex { get; set; }
114      public int ExtensionPointDepth { get; set; }
115    }
116
117    public static ISymbolicExpressionTreeNode PTC2(IRandom random, ISymbolicExpressionTreeNode seedNode,
118      int maxLength, int maxDepth, int maxFunctionDefinitions, int maxFunctionArguments) {
119      // tree length is limited by the grammar and by the explicit size constraints
120      int allowedMinLength = seedNode.Grammar.GetMinExpressionLength(seedNode.Symbol);
121      int allowedMaxLength = Math.Min(maxLength, seedNode.Grammar.GetMaxExpressionLength(seedNode.Symbol));
122      int tries = 0;
123      while (tries++ < MAX_TRIES) {
124        // select a target tree length uniformly in the possible range (as determined by explicit limits and limits of the grammar)
125        int targetTreeLength;
126        targetTreeLength = random.Next(allowedMinLength, allowedMaxLength + 1);
127        if (targetTreeLength <= 1 || maxDepth <= 1) return seedNode;
128
129        bool success = CreateFullTreeFromSeed(random, seedNode, seedNode.Grammar, targetTreeLength, maxDepth, maxFunctionDefinitions, maxFunctionArguments);
130
131        // if successful => check constraints and return the tree if everything looks ok       
132        if (success && seedNode.GetLength() <= maxLength && seedNode.GetDepth() <= maxDepth) {
133          return seedNode;
134        } else {
135          // clean seedNode
136          while (seedNode.SubTrees.Count() > 0) seedNode.RemoveSubTree(0);
137        }
138        // try a different length MAX_TRIES times
139      }
140      throw new ArgumentException("Couldn't create a random valid tree.");
141    }
142
143    private static bool CreateFullTreeFromSeed(IRandom random, ISymbolicExpressionTreeNode root, ISymbolicExpressionTreeGrammar globalGrammar,
144      int targetLength, int maxDepth, int maxFunctionDefinitions, int maxFunctionArguments) {
145      try {
146        TryCreateFullTreeFromSeed(random, root, globalGrammar, targetLength, maxDepth, maxFunctionDefinitions, maxFunctionArguments);
147        return true;
148      }
149      catch (ArgumentException) { return false; }
150    }
151
152    private static void TryCreateFullTreeFromSeed(IRandom random, ISymbolicExpressionTreeNode root, ISymbolicExpressionTreeGrammar globalGrammar,
153      int targetLength, int maxDepth, int maxFunctionDefinitions, int maxFunctionArguments) {
154      List<TreeExtensionPoint> extensionPoints = new List<TreeExtensionPoint>();
155      int currentLength = 1;
156      int totalListMinLength = globalGrammar.GetMinExpressionLength(root.Symbol) - 1;
157      int actualArity = SampleArity(random, root, targetLength);
158      for (int i = 0; i < actualArity; i++) {
159        // insert a dummy sub-tree and add the pending extension to the list
160        var dummy = new SymbolicExpressionTreeNode();
161        root.AddSubTree(dummy);
162        extensionPoints.Add(new TreeExtensionPoint { Parent = root, ChildIndex = i, ExtensionPointDepth = 0 });
163      }
164      // while there are pending extension points and we have not reached the limit of adding new extension points
165      while (extensionPoints.Count > 0 && totalListMinLength + currentLength < targetLength) {
166        int randomIndex = random.Next(extensionPoints.Count);
167        TreeExtensionPoint nextExtension = extensionPoints[randomIndex];
168        extensionPoints.RemoveAt(randomIndex);
169        ISymbolicExpressionTreeNode parent = nextExtension.Parent;
170        int argumentIndex = nextExtension.ChildIndex;
171        int extensionDepth = nextExtension.ExtensionPointDepth;
172        if (extensionDepth + parent.Grammar.GetMinExpressionDepth(parent.Symbol) >= maxDepth) {
173          ReplaceWithMinimalTree(random, root, parent, argumentIndex, maxFunctionDefinitions, maxFunctionArguments);
174        } else {
175          var allowedSymbols = (from s in parent.Grammar.Symbols
176                                where parent.Grammar.IsAllowedChild(parent.Symbol, s, argumentIndex)
177                                where parent.Grammar.GetMinExpressionDepth(s) + extensionDepth - 1 < maxDepth
178                                where parent.Grammar.GetMaxExpressionLength(s) > targetLength - totalListMinLength - currentLength
179                                select s)
180                               .ToList();
181          var weights = allowedSymbols.Select(x => x.InitialFrequency).ToList();
182          var selectedSymbol = allowedSymbols.SelectRandom(weights, random);
183          ISymbolicExpressionTreeNode newTree = selectedSymbol.CreateTreeNode();
184          if (newTree.HasLocalParameters) newTree.ResetLocalParameters(random);
185          parent.RemoveSubTree(argumentIndex);
186          parent.InsertSubTree(argumentIndex, newTree);
187
188          InitializeNewTreeNode(random, root, newTree, maxFunctionDefinitions, maxFunctionArguments);
189
190          currentLength++;
191          totalListMinLength--;
192
193          actualArity = SampleArity(random, newTree, targetLength - currentLength);
194          for (int i = 0; i < actualArity; i++) {
195            // insert a dummy sub-tree and add the pending extension to the list
196            var dummy = new SymbolicExpressionTreeNode();
197            newTree.AddSubTree(dummy);
198            extensionPoints.Add(new TreeExtensionPoint { Parent = newTree, ChildIndex = i, ExtensionPointDepth = extensionDepth + 1 });
199          }
200          totalListMinLength += newTree.Grammar.GetMinExpressionLength(newTree.Symbol);
201        }
202      }
203      // fill all pending extension points
204      while (extensionPoints.Count > 0) {
205        int randomIndex = random.Next(extensionPoints.Count);
206        TreeExtensionPoint nextExtension = extensionPoints[randomIndex];
207        extensionPoints.RemoveAt(randomIndex);
208        ISymbolicExpressionTreeNode parent = nextExtension.Parent;
209        int a = nextExtension.ChildIndex;
210        int d = nextExtension.ExtensionPointDepth;
211        ReplaceWithMinimalTree(random, root, parent, a, maxFunctionDefinitions, maxFunctionArguments);
212      }
213    }
214
215    private static void ReplaceWithMinimalTree(IRandom random, ISymbolicExpressionTreeNode root, ISymbolicExpressionTreeNode parent,
216      int argumentIndex, int maxFunctionDefinitions, int maxFunctionArguments) {
217      // determine possible symbols that will lead to the smallest possible tree
218      var possibleSymbols = (from s in parent.Grammar.GetAllowedSymbols(parent.Symbol, argumentIndex)
219                             group s by parent.Grammar.GetMinExpressionLength(s) into g
220                             orderby g.Key
221                             select g).First().ToList();
222      var weights = possibleSymbols.Select(x => x.InitialFrequency).ToList();
223      var selectedSymbol = possibleSymbols.SelectRandom(weights, random);
224      var tree = selectedSymbol.CreateTreeNode();
225      if (tree.HasLocalParameters) tree.ResetLocalParameters(random);
226      parent.RemoveSubTree(argumentIndex);
227      parent.InsertSubTree(argumentIndex, tree);
228      InitializeNewTreeNode(random, root, tree, maxFunctionDefinitions, maxFunctionArguments);
229      for (int i = 0; i < tree.Grammar.GetMinSubtreeCount(tree.Symbol); i++) {
230        // insert a dummy sub-tree and add the pending extension to the list
231        var dummy = new SymbolicExpressionTreeNode();
232        tree.AddSubTree(dummy);
233        // replace the just inserted dummy by recursive application
234        ReplaceWithMinimalTree(random, root, tree, i, maxFunctionDefinitions, maxFunctionArguments);
235      }
236    }
237
238    private static void InitializeNewTreeNode(IRandom random, ISymbolicExpressionTreeNode root, ISymbolicExpressionTreeNode newTree, int maxFunctionDefinitions, int maxFunctionArguments) {
239      // NB it is assumed that defuns are only allowed as children of root and nowhere else
240      // also assumes that newTree is already attached to root somewhere
241      if (IsTopLevelBranch(root, newTree)) {
242        ((SymbolicExpressionTreeTopLevelNode)newTree).SetGrammar((ISymbolicExpressionTreeGrammar)root.Grammar.Clone());
243
244        // allow invokes of existing ADFs with higher index
245        int argIndex = root.IndexOfSubTree(newTree);
246        for (int i = argIndex + 1; i < root.SubTrees.Count(); i++) {
247          var otherDefunNode = root.GetSubTree(i) as DefunTreeNode;
248          if (otherDefunNode != null) {
249            GrammarModifier.AddDynamicSymbol(newTree.Grammar, newTree.Symbol, otherDefunNode.FunctionName, otherDefunNode.NumberOfArguments);
250          }
251        }
252      }
253      if (newTree.Symbol is Defun) {
254        var defunTree = newTree as DefunTreeNode;
255        string formatString = new StringBuilder().Append('0', (int)Math.Log10(maxFunctionDefinitions * 10 - 1)).ToString(); // >= 100 functions => ###
256        var allowedNames = from index in Enumerable.Range(0, maxFunctionDefinitions)
257                           select "ADF" + index.ToString(formatString);
258        var takenNames = (from node in root.IterateNodesPrefix().OfType<DefunTreeNode>()
259                          select node.FunctionName).Distinct();
260        var remainingNames = allowedNames.Except(takenNames).ToList();
261        string functionName = remainingNames[random.Next(remainingNames.Count)];
262        // set name and number of arguments of the ADF
263        int nArgs = random.Next(maxFunctionArguments);
264        defunTree.FunctionName = functionName;
265        defunTree.NumberOfArguments = nArgs;
266        if (nArgs > 0) {
267          GrammarModifier.AddDynamicArguments(defunTree.Grammar, defunTree.Symbol, Enumerable.Range(0, nArgs));
268        }
269        // in existing branches with smaller index allow invoke of current function
270        int argIndex = root.IndexOfSubTree(newTree);
271        for (int i = 0; i < argIndex; i++) {
272          // if not dummy node
273          if (root.GetSubTree(i).Symbol != null) {
274            var existingBranch = root.GetSubTree(i);
275            GrammarModifier.AddDynamicSymbol(existingBranch.Grammar, existingBranch.Symbol, functionName, nArgs);
276          }
277        }
278      }
279    }
280
281    private static bool IsTopLevelBranch(ISymbolicExpressionTreeNode root, ISymbolicExpressionTreeNode branch) {
282      return branch is SymbolicExpressionTreeTopLevelNode;
283    }
284
285    private static int SampleArity(IRandom random, ISymbolicExpressionTreeNode node, int targetLength) {
286      // select actualArity randomly with the constraint that the sub-trees in the minimal arity can become large enough
287      int minArity = node.Grammar.GetMinSubtreeCount(node.Symbol);
288      int maxArity = node.Grammar.GetMaxSubtreeCount(node.Symbol);
289      if (maxArity > targetLength) {
290        maxArity = targetLength;
291      }
292      // the min number of sub-trees has to be set to a value that is large enough so that the largest possible tree is at least tree length
293      // if 1..3 trees are possible and the largest possible first sub-tree is smaller larger than the target length then minArity should be at least 2
294      long aggregatedLongestExpressionLength = 0;
295      for (int i = 0; i < maxArity; i++) {
296        aggregatedLongestExpressionLength += (from s in node.Grammar.GetAllowedSymbols(node.Symbol, i)
297                                              select node.Grammar.GetMaxExpressionLength(s)).Max();
298        if (aggregatedLongestExpressionLength < targetLength) minArity = i;
299        else break;
300      }
301
302      // the max number of sub-trees has to be set to a value that is small enough so that the smallest possible tree is at most tree length
303      // if 1..3 trees are possible and the smallest possible first sub-tree is already larger than the target length then maxArity should be at most 0
304      long aggregatedShortestExpressionLength = 0;
305      for (int i = 0; i < maxArity; i++) {
306        aggregatedShortestExpressionLength += (from s in node.Grammar.GetAllowedSymbols(node.Symbol, i)
307                                               select node.Grammar.GetMinExpressionLength(s)).Min();
308        if (aggregatedShortestExpressionLength > targetLength) {
309          maxArity = i;
310          break;
311        }
312      }
313      if (minArity > maxArity) throw new ArgumentException();
314      return random.Next(minArity, maxArity + 1);
315    }
316  }
317}
Note: See TracBrowser for help on using the repository browser.