Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.StructureIdentification/TreeGardener.cs @ 214

Last change on this file since 214 was 202, checked in by gkronber, 16 years ago

bug fixes in struct-id operators

File size: 21.2 KB
RevLine 
[2]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Text;
25using HeuristicLab.Core;
26using HeuristicLab.Constraints;
27using System.Diagnostics;
28using HeuristicLab.Data;
29using System.Linq;
30using HeuristicLab.Random;
31using HeuristicLab.Operators;
32using HeuristicLab.Selection;
[155]33using HeuristicLab.Functions;
34using System.Collections;
[2]35
36namespace HeuristicLab.StructureIdentification {
37  internal class TreeGardener {
38    private IRandom random;
[155]39    private GPOperatorLibrary funLibrary;
40    private List<IFunction> functions;
[179]41
[155]42    private List<IFunction> terminals;
43    internal IList<IFunction> Terminals {
[179]44      get { return terminals; }
[2]45    }
[179]46
[155]47    private List<IFunction> allFunctions;
48    internal IList<IFunction> AllFunctions {
[179]49      get { return allFunctions; }
[2]50    }
51
[180]52    #region constructors
[155]53    internal TreeGardener(IRandom random, GPOperatorLibrary funLibrary) {
[2]54      this.random = random;
[155]55      this.funLibrary = funLibrary;
56      this.allFunctions = new List<IFunction>();
57      terminals = new List<IFunction>();
58      functions = new List<IFunction>();
[2]59      // init functions and terminals based on constraints
[179]60      foreach(IFunction fun in funLibrary.Group.Operators) {
[2]61        int maxA, minA;
[155]62        GetMinMaxArity(fun, out minA, out maxA);
[179]63        if(maxA == 0) {
[155]64          terminals.Add(fun);
[179]65          allFunctions.Add(fun);
[2]66        } else {
[155]67          functions.Add(fun);
[179]68          allFunctions.Add(fun);
[2]69        }
70      }
71    }
[180]72    #endregion
[2]73
74    #region random initialization
[180]75    /// <summary>
76    /// Creates a random balanced tree with a maximal size and height. When the max-height or max-size are 1 it will return a random terminal.
77    /// In other cases it will return either a terminal (tree of size 1) or any other tree with a function in it's root (at least height 2).
78    /// </summary>
79    /// <param name="maxTreeSize">Maximal size of the tree (number of nodes).</param>
80    /// <param name="maxTreeHeight">Maximal height of the tree.</param>
81    /// <returns></returns>
[179]82    internal IFunctionTree CreateBalancedRandomTree(int maxTreeSize, int maxTreeHeight) {
[202]83      IFunction rootFunction = GetRandomRoot(maxTreeSize, maxTreeHeight);
84      IFunctionTree tree = MakeBalancedTree(rootFunction, maxTreeSize - 1, maxTreeHeight - 1);
85      return tree;
[179]86    }
87
[180]88    /// <summary>
89    /// Creates a random (unbalanced) tree with a maximal size and height. When the max-height or max-size are 1 it will return a random terminal.
90    /// In other cases it will return either a terminal (tree of size 1) or any other tree with a function in it's root (at least height 2).
91    /// </summary>
92    /// <param name="maxTreeSize">Maximal size of the tree (number of nodes).</param>
93    /// <param name="maxTreeHeight">Maximal height of the tree.</param>
94    /// <returns></returns>
[179]95    internal IFunctionTree CreateUnbalancedRandomTree(int maxTreeSize, int maxTreeHeight) {
[202]96      IFunction rootFunction = GetRandomRoot(maxTreeSize, maxTreeHeight);
97      IFunctionTree tree = MakeUnbalancedTree(rootFunction, maxTreeSize - 1, maxTreeHeight - 1);
98      return tree;
[179]99    }
100
[180]101    /// <summary>
102    /// selects a random function from allowedFunctions and creates a random (unbalanced) tree with maximal size and height.
103    /// </summary>
104    /// <param name="allowedFunctions">Set of allowed functions.</param>
105    /// <param name="maxTreeSize">Maximal size of the tree (number of nodes).</param>
106    /// <param name="maxTreeHeight">Maximal height of the tree.</param>
107    /// <returns>New random unbalanced tree</returns>
[163]108    internal IFunctionTree CreateRandomTree(ICollection<IFunction> allowedFunctions, int maxTreeSize, int maxTreeHeight) {
109      // default is non-balanced trees
[179]110      return CreateRandomTree(allowedFunctions, maxTreeSize, maxTreeHeight, false);
[163]111    }
[179]112
[180]113    /// <summary>
[181]114    /// Selects a random function from allowedFunctions and creates a (un)balanced random tree with maximal size and height.
115    /// Max-size and max-height are not accepted as hard constraints, if all functions in the set of allowed functions would
116    /// lead to a bigger tree then the limits are automatically extended to guarantee that we can build a tree.
[180]117    /// </summary>
118    /// <param name="allowedFunctions">Set of allowed functions.</param>
119    /// <param name="maxTreeSize">Maximal size of the tree (number of nodes).</param>
120    /// <param name="maxTreeHeight">Maximal height of the tree.</param>
121    /// <param name="balanceTrees">Flag determining whether the tree should be balanced or not.</param>
122    /// <returns>New random tree</returns>
[155]123    internal IFunctionTree CreateRandomTree(ICollection<IFunction> allowedFunctions, int maxTreeSize, int maxTreeHeight, bool balanceTrees) {
[181]124      // get the minimal needed height based on allowed functions and extend the max-height if necessary
[155]125      int minTreeHeight = allowedFunctions.Select(f => ((IntData)f.GetVariable(GPOperatorLibrary.MIN_TREE_HEIGHT).Value).Data).Min();
[179]126      if(minTreeHeight > maxTreeHeight)
[2]127        maxTreeHeight = minTreeHeight;
[181]128      // get the minimal needed size based on allowed functions and extend the max-size if necessary
[155]129      int minTreeSize = allowedFunctions.Select(f => ((IntData)f.GetVariable(GPOperatorLibrary.MIN_TREE_SIZE).Value).Data).Min();
[179]130      if(minTreeSize > maxTreeSize)
[2]131        maxTreeSize = minTreeSize;
132
[181]133      // select a random value for the size and height
[2]134      int treeHeight = random.Next(minTreeHeight, maxTreeHeight + 1);
135      int treeSize = random.Next(minTreeSize, maxTreeSize + 1);
136
[181]137      // filter the set of allowed functions and select only from those that fit into the given maximal size and height limits
[155]138      IFunction[] possibleFunctions = allowedFunctions.Where(f => ((IntData)f.GetVariable(GPOperatorLibrary.MIN_TREE_HEIGHT).Value).Data <= treeHeight &&
139        ((IntData)f.GetVariable(GPOperatorLibrary.MIN_TREE_SIZE).Value).Data <= treeSize).ToArray();
[182]140      IFunction selectedFunction = RandomSelect(possibleFunctions);
[2]141
[181]142      // build the tree
[202]143      IFunctionTree root;
[179]144      if(balanceTrees) {
[202]145        root = MakeBalancedTree(selectedFunction, maxTreeSize - 1, maxTreeHeight - 1);
[2]146      } else {
[202]147        root = MakeUnbalancedTree(selectedFunction, maxTreeSize - 1, maxTreeHeight - 1);
[2]148      }
149      return root;
150    }
151
[155]152    internal CompositeOperation CreateInitializationOperation(ICollection<IFunctionTree> trees, IScope scope) {
[2]153      // needed for the parameter shaking operation
154      CompositeOperation initializationOperation = new CompositeOperation();
155      Scope tempScope = new Scope("Temp. initialization scope");
156
[155]157      var parametricTrees = trees.Where(t => t.Function.GetVariable(GPOperatorLibrary.INITIALIZATION) != null);
[179]158      foreach(IFunctionTree tree in parametricTrees) {
[2]159        // enqueue an initialization operation for each operator with local variables
[155]160        IOperator initialization = (IOperator)tree.Function.GetVariable(GPOperatorLibrary.INITIALIZATION).Value;
[2]161        Scope initScope = new Scope();
162        // copy the local variables into a temporary scope used for initialization
[179]163        foreach(IVariable variable in tree.LocalVariables) {
[155]164          initScope.AddVariable(variable);
[2]165        }
166        tempScope.AddSubScope(initScope);
167        initializationOperation.AddOperation(new AtomicOperation(initialization, initScope));
168      }
169      Scope backupScope = new Scope("backup");
[179]170      foreach(Scope subScope in scope.SubScopes) {
[2]171        backupScope.AddSubScope(subScope);
172      }
173      scope.AddSubScope(tempScope);
174      scope.AddSubScope(backupScope);
175      // add an operation to remove the temporary scopes       
176      initializationOperation.AddOperation(new AtomicOperation(new RightReducer(), scope));
177      return initializationOperation;
178    }
179    #endregion
180
181    #region tree information gathering
[155]182    internal int GetTreeSize(IFunctionTree tree) {
183      return 1 + tree.SubTrees.Sum(f => GetTreeSize(f));
[2]184    }
185
[155]186    internal int GetTreeHeight(IFunctionTree tree) {
[179]187      if(tree.SubTrees.Count == 0) return 1;
[155]188      return 1 + tree.SubTrees.Max(f => GetTreeHeight(f));
[2]189    }
190
[155]191    internal IFunctionTree GetRandomParentNode(IFunctionTree tree) {
192      List<IFunctionTree> parentNodes = new List<IFunctionTree>();
[2]193
194      // add null for the parent of the root node
195      parentNodes.Add(null);
196
[155]197      TreeForEach(tree, delegate(IFunctionTree possibleParentNode) {
[179]198        if(possibleParentNode.SubTrees.Count > 0) {
[155]199          parentNodes.Add(possibleParentNode);
[2]200        }
201      });
202
203      return parentNodes[random.Next(parentNodes.Count)];
204    }
205
[155]206    internal ICollection<IFunctionTree> GetAllSubTrees(IFunctionTree root) {
207      List<IFunctionTree> allTrees = new List<IFunctionTree>();
208      TreeForEach(root, t => { allTrees.Add(t); });
209      return allTrees;
[2]210    }
211
212    /// <summary>
[155]213    /// returns the height level of branch in the tree
214    /// if the branch == tree => 1
215    /// if branch is in the sub-trees of tree => 2
[2]216    /// ...
[155]217    /// if branch is not found => -1
[2]218    /// </summary>
[155]219    /// <param name="tree">root of the function tree to process</param>
220    /// <param name="branch">branch that is searched in the tree</param>
[2]221    /// <returns></returns>
[155]222    internal int GetBranchLevel(IFunctionTree tree, IFunctionTree branch) {
223      return GetBranchLevelHelper(tree, branch, 1);
[2]224    }
225
[155]226    // 'tail-recursive' helper
227    private int GetBranchLevelHelper(IFunctionTree tree, IFunctionTree branch, int level) {
[179]228      if(branch == tree) return level;
[2]229
[179]230      foreach(IFunctionTree subTree in tree.SubTrees) {
[155]231        int result = GetBranchLevelHelper(subTree, branch, level + 1);
[179]232        if(result != -1) return result;
[2]233      }
234
235      return -1;
236    }
237
[155]238    internal bool IsValidTree(IFunctionTree tree) {
239      foreach(IConstraint constraint in tree.Function.Constraints) {
240        if(constraint is NumberOfSubOperatorsConstraint) {
241          int max = ((NumberOfSubOperatorsConstraint)constraint).MaxOperators.Data;
242          int min = ((NumberOfSubOperatorsConstraint)constraint).MinOperators.Data;
[179]243          if(tree.SubTrees.Count < min || tree.SubTrees.Count > max)
[155]244            return false;
245        }
[2]246      }
[155]247      foreach(IFunctionTree subTree in tree.SubTrees) {
248        if(!IsValidTree(subTree)) return false;
249      }
[2]250      return true;
251    }
252
[155]253    // returns a random branch from the specified level in the tree
254    internal IFunctionTree GetRandomBranch(IFunctionTree tree, int level) {
[179]255      if(level == 0) return tree;
[155]256      List<IFunctionTree> branches = GetBranchesAtLevel(tree, level);
257      return branches[random.Next(branches.Count)];
[2]258    }
259    #endregion
260
[179]261    #region function information (arity, allowed childs and parents)
[155]262    internal ICollection<IFunction> GetPossibleParents(List<IFunction> list) {
263      List<IFunction> result = new List<IFunction>();
[179]264      foreach(IFunction f in functions) {
265        if(IsPossibleParent(f, list)) {
[155]266          result.Add(f);
[2]267        }
268      }
269      return result;
270    }
271
[155]272    private bool IsPossibleParent(IFunction f, List<IFunction> children) {
[2]273      int minArity;
274      int maxArity;
[155]275      GetMinMaxArity(f, out minArity, out maxArity);
[2]276
277      // note: we can't assume that the operators in the children list have different types!
278
279      // when the maxArity of this function is smaller than the list of operators that
280      // should be included as sub-operators then it can't be a parent
[179]281      if(maxArity < children.Count()) {
[2]282        return false;
283      }
284      int nSlots = Math.Max(minArity, children.Count);
285
286      SubOperatorsConstraintAnalyser analyzer = new SubOperatorsConstraintAnalyser();
[155]287      analyzer.AllPossibleOperators = children.Cast<IOperator>().ToArray<IOperator>();
[2]288
[155]289      List<HashSet<IFunction>> slotSets = new List<HashSet<IFunction>>();
[2]290
[155]291      // we iterate through all slots for sub-trees and calculate the set of
292      // allowed functions for this slot.
[2]293      // we only count those slots that can hold at least one of the children that we should combine
[179]294      for(int slot = 0; slot < nSlots; slot++) {
[155]295        HashSet<IFunction> functionSet = new HashSet<IFunction>(analyzer.GetAllowedOperators(f, slot).Cast<IFunction>());
[179]296        if(functionSet.Count() > 0) {
[155]297          slotSets.Add(functionSet);
[2]298        }
299      }
300
301      // ok at the end of this operation we know how many slots of the parent can actually
302      // hold one of our children.
303      // if the number of slots is smaller than the number of children we can be sure that
[155]304      // we can never combine all children as sub-trees of the function and thus the function
[2]305      // can't be a parent.
[179]306      if(slotSets.Count() < children.Count()) {
[2]307        return false;
308      }
309
310      // finally we sort the sets by size and beginning from the first set select one
[155]311      // function for the slot and thus remove it as possible sub-tree from the remaining sets.
312      // when we can successfully assign all available children to a slot the function is a valid parent
313      // when only a subset of all children can be assigned to slots the function is no valid parent
[2]314      slotSets.Sort((p, q) => p.Count() - q.Count());
315
316      int assignments = 0;
[179]317      for(int i = 0; i < slotSets.Count() - 1; i++) {
318        if(slotSets[i].Count > 0) {
[155]319          IFunction selected = slotSets[i].ElementAt(0);
[2]320          assignments++;
[179]321          for(int j = i + 1; j < slotSets.Count(); j++) {
[2]322            slotSets[j].Remove(selected);
323          }
324        }
325      }
326
327      // sanity check
[179]328      if(assignments > children.Count) throw new InvalidProgramException();
[2]329      return assignments == children.Count - 1;
330    }
[179]331    internal IList<IFunction> GetAllowedParents(IFunction child, int childIndex) {
332      List<IFunction> parents = new List<IFunction>();
333      foreach(IFunction function in functions) {
334        ICollection<IFunction> allowedSubFunctions = GetAllowedSubFunctions(function, childIndex);
335        if(allowedSubFunctions.Contains(child)) {
336          parents.Add(function);
337        }
338      }
339      return parents;
340    }
341    internal bool IsTerminal(IFunction f) {
342      int minArity;
343      int maxArity;
344      GetMinMaxArity(f, out minArity, out maxArity);
345      return minArity == 0 && maxArity == 0;
346    }
347    internal IList<IFunction> GetAllowedSubFunctions(IFunction f, int index) {
348      if(f == null) {
349        return allFunctions;
350      } else {
351        ItemList slotList = (ItemList)f.GetVariable(GPOperatorLibrary.ALLOWED_SUBOPERATORS).Value;
352        List<IFunction> result = new List<IFunction>();
353        foreach(IFunction function in (ItemList)slotList[index]) {
354          result.Add(function);
355        }
356        return result;
357      }
358    }
359    internal void GetMinMaxArity(IFunction f, out int minArity, out int maxArity) {
360      foreach(IConstraint constraint in f.Constraints) {
361        NumberOfSubOperatorsConstraint theConstraint = constraint as NumberOfSubOperatorsConstraint;
362        if(theConstraint != null) {
363          minArity = theConstraint.MinOperators.Data;
364          maxArity = theConstraint.MaxOperators.Data;
365          return;
366        }
367      }
368      // the default arity is 2
369      minArity = 2;
370      maxArity = 2;
371    }
372    #endregion
373
374    #region private utility methods
[202]375    private IFunction GetRandomRoot(int maxTreeSize, int maxTreeHeight) {
[180]376      if(maxTreeHeight == 1 || maxTreeSize == 1) {
[182]377        IFunction selectedTerminal = RandomSelect(terminals);
[202]378        return selectedTerminal;
[180]379      } else {
380        IFunction[] possibleFunctions = allFunctions.Where(f => GetMinimalTreeHeight(f) <= maxTreeHeight &&
381          GetMinimalTreeSize(f) <= maxTreeSize).ToArray();
[182]382        IFunction selectedFunction = RandomSelect(possibleFunctions);
[202]383        return selectedFunction;
[180]384      }
385    }
[179]386
[202]387    private IFunctionTree MakeUnbalancedTree(IFunction parent, int maxTreeSize, int maxTreeHeight) {
388      if(maxTreeHeight == 0 || maxTreeSize == 0) return parent.GetTreeNode();
[180]389      int minArity;
390      int maxArity;
[202]391      GetMinMaxArity(parent, out minArity, out maxArity);
[180]392      if(maxArity >= maxTreeSize) {
393        maxArity = maxTreeSize;
394      }
395      int actualArity = random.Next(minArity, maxArity + 1);
396      if(actualArity > 0) {
[202]397        IFunctionTree parentTree = parent.GetTreeNode();
[180]398        int maxSubTreeSize = maxTreeSize / actualArity;
399        for(int i = 0; i < actualArity; i++) {
[202]400          IFunction[] possibleFunctions = GetAllowedSubFunctions(parent, i).Where(f => GetMinimalTreeHeight(f) <= maxTreeHeight &&
[180]401            GetMinimalTreeSize(f) <= maxSubTreeSize).ToArray();
[182]402          IFunction selectedFunction = RandomSelect(possibleFunctions);
[202]403          IFunctionTree newSubTree = MakeUnbalancedTree(selectedFunction, maxSubTreeSize - 1, maxTreeHeight - 1);
404          parentTree.InsertSubTree(i, newSubTree);
[180]405        }
[202]406        return parentTree;
[180]407      }
[202]408      return parent.GetTreeNode();
[180]409    }
410
411    // NOTE: this method doesn't build fully balanced trees because we have constraints on the
412    // types of possible sub-functions which can indirectly impose a limit for the depth of a given sub-tree
[202]413    private IFunctionTree MakeBalancedTree(IFunction parent, int maxTreeSize, int maxTreeHeight) {
414      if(maxTreeHeight == 0 || maxTreeSize == 0) return parent.GetTreeNode();
[180]415      int minArity;
416      int maxArity;
[202]417      GetMinMaxArity(parent, out minArity, out maxArity);
[180]418      if(maxArity >= maxTreeSize) {
419        maxArity = maxTreeSize;
420      }
421      int actualArity = random.Next(minArity, maxArity + 1);
422      if(actualArity > 0) {
[202]423        IFunctionTree parentTree = parent.GetTreeNode();
[180]424        int maxSubTreeSize = maxTreeSize / actualArity;
425        for(int i = 0; i < actualArity; i++) {
[199]426          // first try to find a function that fits into the maxHeight and maxSize limits
[202]427          IFunction[] possibleFunctions = GetAllowedSubFunctions(parent, i).Where(
[199]428            f => GetMinimalTreeHeight(f) <= maxTreeHeight &&
429            GetMinimalTreeSize(f) <= maxSubTreeSize &&
430            !IsTerminal(f)).ToArray();
431          // no possible function found => extend function set to terminals
432          if(possibleFunctions.Length == 0) {
[202]433            possibleFunctions = GetAllowedSubFunctions(parent, i).Where(f => IsTerminal(f)).ToArray();
[199]434            IFunction selectedTerminal = RandomSelect(possibleFunctions);
[189]435            IFunctionTree newTree = selectedTerminal.GetTreeNode();
[202]436            parentTree.InsertSubTree(i, newTree);
[180]437          } else {
[182]438            IFunction selectedFunction = RandomSelect(possibleFunctions);
[202]439            IFunctionTree newTree = MakeBalancedTree(selectedFunction, maxSubTreeSize - 1, maxTreeHeight - 1);
440            parentTree.InsertSubTree(i, newTree);
[180]441          }
442        }
[202]443        return parentTree;
[180]444      }
[202]445      return parent.GetTreeNode();
[180]446    }
447
[179]448    private int GetMinimalTreeHeight(IOperator op) {
449      return ((IntData)op.GetVariable(GPOperatorLibrary.MIN_TREE_HEIGHT).Value).Data;
450    }
451
452    private int GetMinimalTreeSize(IOperator op) {
453      return ((IntData)op.GetVariable(GPOperatorLibrary.MIN_TREE_SIZE).Value).Data;
454    }
455
456    private void TreeForEach(IFunctionTree tree, Action<IFunctionTree> action) {
457      action(tree);
458      foreach(IFunctionTree subTree in tree.SubTrees) {
459        TreeForEach(subTree, action);
460      }
461    }
462
463    private List<IFunctionTree> GetBranchesAtLevel(IFunctionTree tree, int level) {
464      if(level == 1) return new List<IFunctionTree>(tree.SubTrees);
465
466      List<IFunctionTree> branches = new List<IFunctionTree>();
467      foreach(IFunctionTree subTree in tree.SubTrees) {
468        branches.AddRange(GetBranchesAtLevel(subTree, level - 1));
469      }
470      return branches;
471    }
472
[182]473    private IFunction RandomSelect(IList<IFunction> functionSet) {
474      double[] accumulatedTickets = new double[functionSet.Count];
475      double ticketAccumulator = 0;
476      int i = 0;
477      // precalculate the slot-sizes
478      foreach(IFunction function in functionSet) {
479        ticketAccumulator += ((DoubleData)function.GetVariable(GPOperatorLibrary.TICKETS).Value).Data;
480        accumulatedTickets[i] = ticketAccumulator;
481        i++;
482      }
483      // throw ball
484      double r = random.NextDouble() * ticketAccumulator;
485      // find the slot that has been hit
486      for(i = 0; i < accumulatedTickets.Length; i++) {
487        if(r < accumulatedTickets[i]) return functionSet[i];
488      }
489      // sanity check
490      throw new InvalidProgramException(); // should never happen
491    }
[179]492
493    #endregion
494
[2]495  }
496}
Note: See TracBrowser for help on using the repository browser.