Free cookie consent management tool by TermsFeed Policy Generator

Changeset 13480


Ignore:
Timestamp:
12/17/15 20:13:57 (9 years ago)
Author:
bburlacu
Message:

#1772: Schema diversification strategy: implement adaptive replacement ratio and added number of evaluated solutions per generation to the diversification statistics operator

Location:
branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/DiversificationStatisticsOperator.cs

    r12988 r13480  
    3737    private const string AverageSchemaLengthParameterName = "AverageSchemaLength";
    3838    private const string ResultCollectionParameterName = "Results";
     39    private const string EvaluatedSolutionsParameterName = "EvaluatedSolutions";
    3940
    4041    public ILookupParameter<IntValue> NumberOfChangedTreesParameter {
     
    5051      get { return (ILookupParameter<ResultCollection>)Parameters[ResultCollectionParameterName]; }
    5152    }
     53    public ILookupParameter<IntValue> EvaluatedSolutionsParameter {
     54      get { return (ILookupParameter<IntValue>)Parameters[EvaluatedSolutionsParameterName]; }
     55    }
     56
     57    private int evaluatedSolutionsTracker;
    5258
    5359    public DiversificationStatisticsOperator() {
     
    5662      Parameters.Add(new LookupParameter<DoubleValue>(AverageSchemaLengthParameterName));
    5763      Parameters.Add(new LookupParameter<ResultCollection>(ResultCollectionParameterName));
     64      Parameters.Add(new LookupParameter<IntValue>(EvaluatedSolutionsParameterName));
     65    }
     66
     67    public override void ClearState() {
     68      evaluatedSolutionsTracker = 0;
     69      base.ClearState();
    5870    }
    5971
     
    6678
    6779    public override IOperation Apply() {
    68 
    6980      var results = ResultCollectionParameter.ActualValue;
    7081      DataTable table;
     
    8798        table.Rows.Add(row);
    8899      }
     100      if (!results.ContainsKey("EvaluatedSolutionsPerGeneration")) {
     101        table = new DataTable();
     102        results.Add(new Result("EvaluatedSolutionsPerGeneration", table));
     103        var row = new DataRow("Evaluated solutions");
     104        table.Rows.Add(row);
     105        row.Values.Add(0);
     106      }
     107
     108      var evaluatedSolutions = EvaluatedSolutionsParameter.ActualValue.Value - evaluatedSolutionsTracker;
    89109      ((DataTable)results["NumberOfChangedTrees"].Value).Rows["Changed trees"].Values.Add(NumberOfChangedTreesParameter.ActualValue.Value);
    90110      ((DataTable)results["AverageSchemaLength"].Value).Rows["Average schema length"].Values.Add(AverageSchemaLengthParameter.ActualValue.Value);
    91111      ((DataTable)results["NumberOfSchemas"].Value).Rows["Number of schemas"].Values.Add(NumberOfSchemasParameter.ActualValue.Value);
     112      ((DataTable)results["EvaluatedSolutionsPerGeneration"].Value).Rows["Evaluated solutions"].Values.Add(evaluatedSolutions);
     113      evaluatedSolutionsTracker += evaluatedSolutions;
    92114
    93115      return base.Apply();
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaCreator.cs

    r12988 r13480  
    3535  [StorableClass]
    3636  public class SchemaCreator : EvolutionTrackingOperator<ISymbolicExpressionTree> {
     37    #region parameter names
    3738    private const string MinimumSchemaLengthParameterName = "MinimumSchemaLength";
    3839    private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
     
    4849    private const string NumberOfSchemasParameterName = "NumberOfSchemas";
    4950    private const string AverageSchemaLengthParameterName = "AverageSchemaLength";
     51    private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio";
     52    private const string ReplacementRatioUpdateRuleParameterName = "ReplacementRatioUpdateRule";
     53    #endregion
    5054
    5155    #region parameters
     56
     57    public IConstrainedValueParameter<StringValue> ReplacementRatioUpdateRuleParameter {
     58      get { return (IConstrainedValueParameter<StringValue>)Parameters[ReplacementRatioUpdateRuleParameterName]; }
     59    }
     60    public IFixedValueParameter<BoolValue> UseAdaptiveReplacementRatioParameter {
     61      get { return (IFixedValueParameter<BoolValue>)Parameters[UseAdaptiveReplacementRatioParameterName]; }
     62    }
    5263    public IFixedValueParameter<BoolValue> ExclusiveMatchingParameter {
    5364      get { return (IFixedValueParameter<BoolValue>)Parameters[ExclusiveMatchingParameterName]; }
     
    99110
    100111    public SchemaCreator() {
     112      #region add parameters
    101113      Parameters.Add(new FixedValueParameter<IntValue>(MinimumSchemaLengthParameterName, new IntValue(10)));
    102114      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumSchemaFrequencyParameterName, new PercentValue(0.01)));
     
    112124      Parameters.Add(new ValueParameter<IntValue>(NumberOfSchemasParameterName, new IntValue(0)));
    113125      Parameters.Add(new ValueParameter<DoubleValue>(AverageSchemaLengthParameterName, new DoubleValue(0)));
    114 
     126      Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveReplacementRatioParameterName, new BoolValue(true)));
     127
     128      var replacementRatioUpdateRules = new ItemSet<StringValue>(new[] { new StringValue("f(x) = x"), new StringValue("f(x) = tanh(x)"), new StringValue("f(x) = tanh(2x)"), new StringValue("f(x) = tanh(3x)") });
     129      Parameters.Add(new ConstrainedValueParameter<StringValue>(ReplacementRatioUpdateRuleParameterName, replacementRatioUpdateRules));
     130      #endregion
    115131      NumberOfChangedTreesParameter.Hidden = true;
    116132      NumberOfSchemasParameter.Hidden = true;
     
    138154      var n = (int)Math.Round(ExecutionContext.Scope.SubScopes.Count * PercentageOfPopulation);
    139155      var scopes = new ScopeList(ExecutionContext.Scope.SubScopes.Take(n));
     156
     157      var updateEstimatedValues = new OperationCollection { Parallel = true };
     158      if (updateEstimatedValuesOperator == null)
     159        updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
     160
     161      foreach (var s in scopes) {
     162        if (!s.Variables.ContainsKey("EstimatedValues"))
     163          s.Variables.Add(new Core.Variable("EstimatedValues"));
     164        updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, s));
     165      }
     166
     167      Func<double, double> rule;
     168      var replacementRule = ReplacementRatioUpdateRuleParameter.Value.Value;
     169
     170      switch (replacementRule) {
     171        case "f(x) = x": {
     172            rule = x => x;
     173            break;
     174          }
     175        case "f(x) = tanh(x)": {
     176            rule = x => Math.Tanh(x);
     177            break;
     178          }
     179        case "f(x) = tanh(2x)": {
     180            rule = x => Math.Tanh(2 * x);
     181            break;
     182          }
     183        case "f(x) = tanh(3x)": {
     184            rule = x => Math.Tanh(3 * x);
     185            break;
     186          }
     187        default:
     188          throw new ArgumentException("Unknown replacement rule");
     189      }
     190
     191      var evaluateSchemas = new OperationCollection();
    140192
    141193      // for now, only consider crossover offspring
     
    146198                     select v;
    147199
     200      var schemas = new List<ISymbolicExpressionTree>(GenerateSchemas(vertices, MinimumSchemaLength));
     201
     202      #region create schemas and add subscopes representing the individuals
     203      foreach (var schema in schemas) {
     204        evaluateSchemas.Add(ExecutionContext.CreateChildOperation(new SchemaEvaluator { Schema = schema, ReplacementRule = rule }, ExecutionContext.Scope));
     205      }
     206      #endregion
     207
     208      if (diversificationStatisticsOperator == null)
     209        diversificationStatisticsOperator = new DiversificationStatisticsOperator();
     210
     211      var calculateStatistics = ExecutionContext.CreateChildOperation(diversificationStatisticsOperator);
     212
     213      // set parameters for statistics
     214      AverageSchemaLengthParameter.Value = new DoubleValue(schemas.Average(x => x.Length));
     215      NumberOfSchemasParameter.Value = new IntValue(schemas.Count);
     216      NumberOfChangedTreesParameter.Value = new IntValue(0);
     217
     218      // return an operation collection containing all the scope operations + base.Apply()
     219      return new OperationCollection { updateEstimatedValues, evaluateSchemas, calculateStatistics, base.Apply() };
     220    }
     221
     222    public static IEnumerable<ISymbolicExpressionTree> GenerateSchemas(IEnumerable<IGenealogyGraphNode<ISymbolicExpressionTree>> vertices, int minimumSchemaLength) {
     223      var anySubtreeSymbol = new AnySubtreeSymbol();
     224      //      var anyNodeSymbol = new AnyNodeSymbol();
    148225      var groups = vertices.GroupBy(x => x.Parents.First()).OrderByDescending(g => g.Count()).ToList();
    149       var anySubtreeSymbol = new AnySubtreeSymbol();
    150 
    151       var updateEstimatedValues = new OperationCollection { Parallel = true };
    152       if (updateEstimatedValuesOperator == null)
    153         updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
    154 
    155       foreach (var s in scopes) {
    156         if (!s.Variables.ContainsKey("EstimatedValues"))
    157           s.Variables.Add(new Core.Variable("EstimatedValues"));
    158         updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, s));
    159       }
    160 
    161       var evaluateSchemas = new OperationCollection();
    162       var schemas = new List<ISymbolicExpressionTree>();
    163       var wildCardCounts = new List<double>();
    164       #region create schemas and add subscopes representing the individuals
     226      var hash = new HashSet<string>();
     227      var formatter = new SymbolicExpressionTreeStringFormatter { Indent = false, AppendNewLines = false };
    165228      foreach (var g in groups) {
    166229        var parent = g.Key;
    167         if (parent.Data.Length < MinimumSchemaLength)
     230        if (parent.Data.Length < minimumSchemaLength)
    168231          continue;
    169232        bool replaced = false;
     
    171234        var nodes = schema.IterateNodesPrefix().ToList();
    172235        var arcs = g.Select(x => x.InArcs.Last()).Where(x => x.Data != null);
    173         var indices = arcs.Select(x => ((IFragment<ISymbolicExpressionTreeNode>)x.Data).Index1).Distinct().ToArray();
     236        var indices = (from arc in arcs
     237                       let fragment = (IFragment<ISymbolicExpressionTreeNode>)arc.Data
     238                       select fragment.Index1).Distinct().ToArray();
    174239        Array.Sort(indices);
    175240        var nodesToReplace = indices.Select(x => nodes[x]).ToList();
    176         int w = 0;
    177241        for (int i = nodesToReplace.Count - 1; i >= 0; --i) {
    178242          var node = nodesToReplace[i];
    179243
    180244          // do not replace the node with a wildcard if it would result in a length < MinimumSchemaLength
    181           if ((schema.Length - node.GetLength() + 1) < MinimumSchemaLength)
     245          if (schema.Length - node.GetLength() + 1 < minimumSchemaLength)
    182246            continue;
    183247
    184           var replacement = anySubtreeSymbol.CreateTreeNode();
    185           ReplaceSubtree(node, replacement, false);
     248          //          var replacement = anySubtreeSymbol.CreateTreeNode();
     249          //          ReplaceSubtree(node, replacement, false);
     250          var replacement = new AnyNodeSymbol(node.Symbol.MinimumArity, node.Symbol.MinimumArity).CreateTreeNode();
     251          ReplaceSubtree(node, replacement, true);
    186252          replaced = true;
    187           ++w;
    188253        }
    189254        if (replaced) {
    190           // store the schemas and the number of wildcards they contain in two lists
    191           schemas.Add(schema);
    192           wildCardCounts.Add(w);
     255          var str = formatter.Format(schema.Root.GetSubtree(0).GetSubtree(0));
     256          if (hash.Contains(str)) continue;
     257          yield return schema;
     258          hash.Add(str);
    193259        }
    194260      }
    195 
    196       for (int i = 0; i < schemas.Count; ++i) {
    197         var schema = schemas[i];
    198         evaluateSchemas.Add(ExecutionContext.CreateChildOperation(new SchemaEvaluator { Schema = schema }, ExecutionContext.Scope));
    199       }
    200       #endregion
    201 
    202       if (diversificationStatisticsOperator == null)
    203         diversificationStatisticsOperator = new DiversificationStatisticsOperator();
    204 
    205       var calculateStatistics = ExecutionContext.CreateChildOperation(diversificationStatisticsOperator);
    206 
    207       // set parameters for statistics
    208       AverageSchemaLengthParameter.Value = new DoubleValue(schemas.Average(x => x.Length));
    209       NumberOfSchemasParameter.Value = new IntValue(schemas.Count);
    210       NumberOfChangedTreesParameter.Value = new IntValue(0);
    211 
    212       // return an operation collection containing all the scope operations + base.Apply()
    213       return new OperationCollection { updateEstimatedValues, evaluateSchemas, calculateStatistics, base.Apply() };
    214     }
    215 
    216     private static void ReplaceSubtree(ISymbolicExpressionTreeNode original, ISymbolicExpressionTreeNode replacement,
    217       bool preserveChildren = true) {
     261    }
     262
     263    private static void ReplaceSubtree(ISymbolicExpressionTreeNode original, ISymbolicExpressionTreeNode replacement, bool preserveChildren = true) {
    218264      var parent = original.Parent;
    219265      if (parent == null)
  • branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaEvaluator.cs

    r12988 r13480  
    2121
    2222using System;
     23using System.Collections.Generic;
    2324using System.Linq;
    2425using System.Threading.Tasks;
     
    2829using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    2930using HeuristicLab.EvolutionTracking;
     31using HeuristicLab.Optimization;
    3032using HeuristicLab.Parameters;
    3133using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     
    4951    private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
    5052    private const string MutatorParameterName = "Mutator";
     53    private const string CrossoverParameterName = "Crossover";
    5154    private const string RandomReplacementParameterName = "RandomReplacement";
    5255    private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
     
    5457    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
    5558    private const string ExclusiveMatchingParameterName = "ExclusiveMatching";
     59    private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio";
    5660    #endregion
    5761
    5862    #region parameters
     63    public ILookupParameter<BoolValue> UseAdaptiveReplacementRatioParameter {
     64      get { return (ILookupParameter<BoolValue>)Parameters[UseAdaptiveReplacementRatioParameterName]; }
     65    }
    5966    public ILookupParameter<BoolValue> ExclusiveMatchingParameter {
    6067      get { return (ILookupParameter<BoolValue>)Parameters[ExclusiveMatchingParameterName]; }
     
    8693    public ILookupParameter<ISymbolicExpressionTreeManipulator> MutatorParameter {
    8794      get { return (ILookupParameter<ISymbolicExpressionTreeManipulator>)Parameters[MutatorParameterName]; }
     95    }
     96    public ILookupParameter<ISymbolicExpressionTreeCrossover> CrossoverParameter {
     97      get { return (ILookupParameter<ISymbolicExpressionTreeCrossover>)Parameters[CrossoverParameterName]; }
    8898    }
    8999    public ILookupParameter<IRandom> RandomParameter {
     
    120130    private readonly SymbolicExpressionTreePhenotypicSimilarityCalculator calculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator();
    121131    private readonly QueryMatch qm;
     132
     133    public Func<double, double> ReplacementRule { get; set; }
    122134
    123135    private readonly ISymbolicExpressionTreeNodeEqualityComparer comp = new SymbolicExpressionTreeNodeEqualityComparer {
     
    148160      Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName));
    149161      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeManipulator>(MutatorParameterName));
     162      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeCrossover>(CrossoverParameterName));
    150163      Parameters.Add(new LookupParameter<BoolValue>(RandomReplacementParameterName));
    151164      Parameters.Add(new LookupParameter<IntValue>(NumberOfChangedTreesParameterName));
     
    153166      Parameters.Add(new LookupParameter<IntValue>(MaxDegreeOfParalellismParameterName));
    154167      Parameters.Add(new LookupParameter<BoolValue>(ExclusiveMatchingParameterName));
     168      Parameters.Add(new LookupParameter<BoolValue>(UseAdaptiveReplacementRatioParameterName));
    155169      #endregion
    156170    }
     
    175189    public override IOperation Apply() {
    176190      var individuals = ExecutionContext.Scope.SubScopes; // the scopes represent the individuals
     191      var trees = individuals.Select(x => (ISymbolicExpressionTree)x.Variables["SymbolicExpressionTree"].Value).ToList();
     192      var qualities = individuals.Select(x => (DoubleValue)x.Variables["Quality"].Value).ToList();
    177193
    178194      var random = RandomParameter.ActualValue;
     
    185201      // first apply the length and root equality checks in order to filter the individuals
    186202      var exclusiveMatching = ExclusiveMatchingParameter.ActualValue.Value;
    187       var filtered = exclusiveMatching ? (from ind in individuals
    188                                           where !ind.Variables.ContainsKey("AlreadyMatched")
    189                                           let t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value
    190                                           where t.Length >= s.Length && qm.Comparer.Equals(t.Root.GetSubtree(0).GetSubtree(0), sRoot)
    191                                           select ind).ToList()
    192                                        : (from ind in individuals
    193                                           let t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value
    194                                           where t.Length >= s.Length && qm.Comparer.Equals(t.Root.GetSubtree(0).GetSubtree(0), sRoot)
    195                                           select ind).ToList();
     203      var filtered = new List<int>();
     204      for (int i = 0; i < individuals.Count; ++i) {
     205        if (exclusiveMatching && individuals[i].Variables.ContainsKey("AlreadyMatched")) continue;
     206        var t = trees[i];
     207        var tRoot = t.Root.GetSubtree(0).GetSubtree(0);
     208        if (t.Length < s.Length || !qm.Comparer.Equals(tRoot, sRoot)) continue;
     209        filtered.Add(i);
     210      }
    196211
    197212      // if we don't have enough filtered individuals, then we are done
     213      // if the schema exceeds the minimum frequency, it gets processed further
    198214      if (filtered.Count < countThreshold) {
    199215        return base.Apply();
     
    211227        Parallel.For(0, filtered.Count, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism },
    212228          i => {
    213             var t = (ISymbolicExpressionTree)filtered[i].Variables["SymbolicExpressionTree"].Value;
     229            var index = filtered[i];
     230            var t = trees[index];
    214231            var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0));
    215232            if (qm.Match(tNodes, sNodes)) {
     
    218235          });
    219236
    220         for (int i = 0; i < matching.Length; ++i) {
    221           if (matching[i])
    222             matchingIndividuals.Add(filtered[i]);
    223         }
     237        matchingIndividuals.AddRange(filtered.Where((x, i) => matching[i]).Select(x => individuals[x]));
    224238      } else {
    225239        for (int i = 0; i < filtered.Count; ++i) {
     
    228242            break;
    229243
    230           var ind = filtered[i];
    231           var t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value;
    232           var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0));
     244          var index = filtered[i];
     245          var tRoot = trees[index].Root.GetSubtree(0).GetSubtree(0);
     246          var tNodes = QueryMatch.InitializePostOrder(tRoot);
    233247          if (qm.Match(tNodes, sNodes))
    234             matchingIndividuals.Add(ind);
     248            matchingIndividuals.Add(individuals[index]);
    235249        }
    236250      }
    237251
    238       // additional condition: the average schema quality should be equal or greater than the population average quality
    239252      if (matchingIndividuals.Count < countThreshold) {
    240253        return base.Apply();
    241254      }
    242255
    243       var similarity = CalculatePhenotypicSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism);
     256      var similarity = CalculateSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism);
    244257      if (similarity < MinimumPhenotypicSimilarity.Value) {
    245258        return base.Apply();
    246259      }
    247260
    248       int n = (int)Math.Floor(matchingIndividuals.Count * ReplacementRatio.Value);
     261      double replacementRatio;
     262      var adaptiveReplacementRatio = UseAdaptiveReplacementRatioParameter.ActualValue.Value;
     263
     264      if (adaptiveReplacementRatio) {
     265        var r = (double)matchingIndividuals.Count / individuals.Count;
     266        replacementRatio = ReplacementRule(r);
     267      } else {
     268        replacementRatio = ReplacementRatio.Value;
     269      }
     270
     271      int n = (int)Math.Floor(matchingIndividuals.Count * replacementRatio);
    249272      var individualsToReplace = RandomReplacement.Value ? matchingIndividuals.SampleRandomWithoutRepetition(random, n).ToList()
    250273                                                         : matchingIndividuals.OrderBy(x => (DoubleValue)x.Variables["Quality"].Value).Take(n).ToList();
     
    265288    }
    266289
    267     private static double CalculatePhenotypicSimilarity(ScopeList individuals, SymbolicExpressionTreePhenotypicSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) {
     290    public static double CalculateSimilarity(ScopeList individuals, ISolutionSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) {
    268291      double similarity = 0;
    269292      int count = individuals.Count;
     293      if (count < 2) return double.NaN;
    270294      if (parallel) {
    271295        var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = nThreads };
Note: See TracChangeset for help on using the changeset viewer.