Changeset 13480 for branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification
- Timestamp:
- 12/17/15 20:13:57 (9 years ago)
- Location:
- branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/DiversificationStatisticsOperator.cs
r12988 r13480 37 37 private const string AverageSchemaLengthParameterName = "AverageSchemaLength"; 38 38 private const string ResultCollectionParameterName = "Results"; 39 private const string EvaluatedSolutionsParameterName = "EvaluatedSolutions"; 39 40 40 41 public ILookupParameter<IntValue> NumberOfChangedTreesParameter { … … 50 51 get { return (ILookupParameter<ResultCollection>)Parameters[ResultCollectionParameterName]; } 51 52 } 53 public ILookupParameter<IntValue> EvaluatedSolutionsParameter { 54 get { return (ILookupParameter<IntValue>)Parameters[EvaluatedSolutionsParameterName]; } 55 } 56 57 private int evaluatedSolutionsTracker; 52 58 53 59 public DiversificationStatisticsOperator() { … … 56 62 Parameters.Add(new LookupParameter<DoubleValue>(AverageSchemaLengthParameterName)); 57 63 Parameters.Add(new LookupParameter<ResultCollection>(ResultCollectionParameterName)); 64 Parameters.Add(new LookupParameter<IntValue>(EvaluatedSolutionsParameterName)); 65 } 66 67 public override void ClearState() { 68 evaluatedSolutionsTracker = 0; 69 base.ClearState(); 58 70 } 59 71 … … 66 78 67 79 public override IOperation Apply() { 68 69 80 var results = ResultCollectionParameter.ActualValue; 70 81 DataTable table; … … 87 98 table.Rows.Add(row); 88 99 } 100 if (!results.ContainsKey("EvaluatedSolutionsPerGeneration")) { 101 table = new DataTable(); 102 results.Add(new Result("EvaluatedSolutionsPerGeneration", table)); 103 var row = new DataRow("Evaluated solutions"); 104 table.Rows.Add(row); 105 row.Values.Add(0); 106 } 107 108 var evaluatedSolutions = EvaluatedSolutionsParameter.ActualValue.Value - evaluatedSolutionsTracker; 89 109 ((DataTable)results["NumberOfChangedTrees"].Value).Rows["Changed trees"].Values.Add(NumberOfChangedTreesParameter.ActualValue.Value); 90 110 ((DataTable)results["AverageSchemaLength"].Value).Rows["Average schema length"].Values.Add(AverageSchemaLengthParameter.ActualValue.Value); 91 111 ((DataTable)results["NumberOfSchemas"].Value).Rows["Number of schemas"].Values.Add(NumberOfSchemasParameter.ActualValue.Value); 112 ((DataTable)results["EvaluatedSolutionsPerGeneration"].Value).Rows["Evaluated solutions"].Values.Add(evaluatedSolutions); 113 evaluatedSolutionsTracker += evaluatedSolutions; 92 114 93 115 return base.Apply(); -
branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaCreator.cs
r12988 r13480 35 35 [StorableClass] 36 36 public class SchemaCreator : EvolutionTrackingOperator<ISymbolicExpressionTree> { 37 #region parameter names 37 38 private const string MinimumSchemaLengthParameterName = "MinimumSchemaLength"; 38 39 private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency"; … … 48 49 private const string NumberOfSchemasParameterName = "NumberOfSchemas"; 49 50 private const string AverageSchemaLengthParameterName = "AverageSchemaLength"; 51 private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio"; 52 private const string ReplacementRatioUpdateRuleParameterName = "ReplacementRatioUpdateRule"; 53 #endregion 50 54 51 55 #region parameters 56 57 public IConstrainedValueParameter<StringValue> ReplacementRatioUpdateRuleParameter { 58 get { return (IConstrainedValueParameter<StringValue>)Parameters[ReplacementRatioUpdateRuleParameterName]; } 59 } 60 public IFixedValueParameter<BoolValue> UseAdaptiveReplacementRatioParameter { 61 get { return (IFixedValueParameter<BoolValue>)Parameters[UseAdaptiveReplacementRatioParameterName]; } 62 } 52 63 public IFixedValueParameter<BoolValue> ExclusiveMatchingParameter { 53 64 get { return (IFixedValueParameter<BoolValue>)Parameters[ExclusiveMatchingParameterName]; } … … 99 110 100 111 public SchemaCreator() { 112 #region add parameters 101 113 Parameters.Add(new FixedValueParameter<IntValue>(MinimumSchemaLengthParameterName, new IntValue(10))); 102 114 Parameters.Add(new FixedValueParameter<PercentValue>(MinimumSchemaFrequencyParameterName, new PercentValue(0.01))); … … 112 124 Parameters.Add(new ValueParameter<IntValue>(NumberOfSchemasParameterName, new IntValue(0))); 113 125 Parameters.Add(new ValueParameter<DoubleValue>(AverageSchemaLengthParameterName, new DoubleValue(0))); 114 126 Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveReplacementRatioParameterName, new BoolValue(true))); 127 128 var replacementRatioUpdateRules = new ItemSet<StringValue>(new[] { new StringValue("f(x) = x"), new StringValue("f(x) = tanh(x)"), new StringValue("f(x) = tanh(2x)"), new StringValue("f(x) = tanh(3x)") }); 129 Parameters.Add(new ConstrainedValueParameter<StringValue>(ReplacementRatioUpdateRuleParameterName, replacementRatioUpdateRules)); 130 #endregion 115 131 NumberOfChangedTreesParameter.Hidden = true; 116 132 NumberOfSchemasParameter.Hidden = true; … … 138 154 var n = (int)Math.Round(ExecutionContext.Scope.SubScopes.Count * PercentageOfPopulation); 139 155 var scopes = new ScopeList(ExecutionContext.Scope.SubScopes.Take(n)); 156 157 var updateEstimatedValues = new OperationCollection { Parallel = true }; 158 if (updateEstimatedValuesOperator == null) 159 updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator(); 160 161 foreach (var s in scopes) { 162 if (!s.Variables.ContainsKey("EstimatedValues")) 163 s.Variables.Add(new Core.Variable("EstimatedValues")); 164 updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, s)); 165 } 166 167 Func<double, double> rule; 168 var replacementRule = ReplacementRatioUpdateRuleParameter.Value.Value; 169 170 switch (replacementRule) { 171 case "f(x) = x": { 172 rule = x => x; 173 break; 174 } 175 case "f(x) = tanh(x)": { 176 rule = x => Math.Tanh(x); 177 break; 178 } 179 case "f(x) = tanh(2x)": { 180 rule = x => Math.Tanh(2 * x); 181 break; 182 } 183 case "f(x) = tanh(3x)": { 184 rule = x => Math.Tanh(3 * x); 185 break; 186 } 187 default: 188 throw new ArgumentException("Unknown replacement rule"); 189 } 190 191 var evaluateSchemas = new OperationCollection(); 140 192 141 193 // for now, only consider crossover offspring … … 146 198 select v; 147 199 200 var schemas = new List<ISymbolicExpressionTree>(GenerateSchemas(vertices, MinimumSchemaLength)); 201 202 #region create schemas and add subscopes representing the individuals 203 foreach (var schema in schemas) { 204 evaluateSchemas.Add(ExecutionContext.CreateChildOperation(new SchemaEvaluator { Schema = schema, ReplacementRule = rule }, ExecutionContext.Scope)); 205 } 206 #endregion 207 208 if (diversificationStatisticsOperator == null) 209 diversificationStatisticsOperator = new DiversificationStatisticsOperator(); 210 211 var calculateStatistics = ExecutionContext.CreateChildOperation(diversificationStatisticsOperator); 212 213 // set parameters for statistics 214 AverageSchemaLengthParameter.Value = new DoubleValue(schemas.Average(x => x.Length)); 215 NumberOfSchemasParameter.Value = new IntValue(schemas.Count); 216 NumberOfChangedTreesParameter.Value = new IntValue(0); 217 218 // return an operation collection containing all the scope operations + base.Apply() 219 return new OperationCollection { updateEstimatedValues, evaluateSchemas, calculateStatistics, base.Apply() }; 220 } 221 222 public static IEnumerable<ISymbolicExpressionTree> GenerateSchemas(IEnumerable<IGenealogyGraphNode<ISymbolicExpressionTree>> vertices, int minimumSchemaLength) { 223 var anySubtreeSymbol = new AnySubtreeSymbol(); 224 // var anyNodeSymbol = new AnyNodeSymbol(); 148 225 var groups = vertices.GroupBy(x => x.Parents.First()).OrderByDescending(g => g.Count()).ToList(); 149 var anySubtreeSymbol = new AnySubtreeSymbol(); 150 151 var updateEstimatedValues = new OperationCollection { Parallel = true }; 152 if (updateEstimatedValuesOperator == null) 153 updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator(); 154 155 foreach (var s in scopes) { 156 if (!s.Variables.ContainsKey("EstimatedValues")) 157 s.Variables.Add(new Core.Variable("EstimatedValues")); 158 updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, s)); 159 } 160 161 var evaluateSchemas = new OperationCollection(); 162 var schemas = new List<ISymbolicExpressionTree>(); 163 var wildCardCounts = new List<double>(); 164 #region create schemas and add subscopes representing the individuals 226 var hash = new HashSet<string>(); 227 var formatter = new SymbolicExpressionTreeStringFormatter { Indent = false, AppendNewLines = false }; 165 228 foreach (var g in groups) { 166 229 var parent = g.Key; 167 if (parent.Data.Length < MinimumSchemaLength)230 if (parent.Data.Length < minimumSchemaLength) 168 231 continue; 169 232 bool replaced = false; … … 171 234 var nodes = schema.IterateNodesPrefix().ToList(); 172 235 var arcs = g.Select(x => x.InArcs.Last()).Where(x => x.Data != null); 173 var indices = arcs.Select(x => ((IFragment<ISymbolicExpressionTreeNode>)x.Data).Index1).Distinct().ToArray(); 236 var indices = (from arc in arcs 237 let fragment = (IFragment<ISymbolicExpressionTreeNode>)arc.Data 238 select fragment.Index1).Distinct().ToArray(); 174 239 Array.Sort(indices); 175 240 var nodesToReplace = indices.Select(x => nodes[x]).ToList(); 176 int w = 0;177 241 for (int i = nodesToReplace.Count - 1; i >= 0; --i) { 178 242 var node = nodesToReplace[i]; 179 243 180 244 // do not replace the node with a wildcard if it would result in a length < MinimumSchemaLength 181 if ( (schema.Length - node.GetLength() + 1) < MinimumSchemaLength)245 if (schema.Length - node.GetLength() + 1 < minimumSchemaLength) 182 246 continue; 183 247 184 var replacement = anySubtreeSymbol.CreateTreeNode(); 185 ReplaceSubtree(node, replacement, false); 248 // var replacement = anySubtreeSymbol.CreateTreeNode(); 249 // ReplaceSubtree(node, replacement, false); 250 var replacement = new AnyNodeSymbol(node.Symbol.MinimumArity, node.Symbol.MinimumArity).CreateTreeNode(); 251 ReplaceSubtree(node, replacement, true); 186 252 replaced = true; 187 ++w;188 253 } 189 254 if (replaced) { 190 // store the schemas and the number of wildcards they contain in two lists 191 schemas.Add(schema); 192 wildCardCounts.Add(w); 255 var str = formatter.Format(schema.Root.GetSubtree(0).GetSubtree(0)); 256 if (hash.Contains(str)) continue; 257 yield return schema; 258 hash.Add(str); 193 259 } 194 260 } 195 196 for (int i = 0; i < schemas.Count; ++i) { 197 var schema = schemas[i]; 198 evaluateSchemas.Add(ExecutionContext.CreateChildOperation(new SchemaEvaluator { Schema = schema }, ExecutionContext.Scope)); 199 } 200 #endregion 201 202 if (diversificationStatisticsOperator == null) 203 diversificationStatisticsOperator = new DiversificationStatisticsOperator(); 204 205 var calculateStatistics = ExecutionContext.CreateChildOperation(diversificationStatisticsOperator); 206 207 // set parameters for statistics 208 AverageSchemaLengthParameter.Value = new DoubleValue(schemas.Average(x => x.Length)); 209 NumberOfSchemasParameter.Value = new IntValue(schemas.Count); 210 NumberOfChangedTreesParameter.Value = new IntValue(0); 211 212 // return an operation collection containing all the scope operations + base.Apply() 213 return new OperationCollection { updateEstimatedValues, evaluateSchemas, calculateStatistics, base.Apply() }; 214 } 215 216 private static void ReplaceSubtree(ISymbolicExpressionTreeNode original, ISymbolicExpressionTreeNode replacement, 217 bool preserveChildren = true) { 261 } 262 263 private static void ReplaceSubtree(ISymbolicExpressionTreeNode original, ISymbolicExpressionTreeNode replacement, bool preserveChildren = true) { 218 264 var parent = original.Parent; 219 265 if (parent == null) -
branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaEvaluator.cs
r12988 r13480 21 21 22 22 using System; 23 using System.Collections.Generic; 23 24 using System.Linq; 24 25 using System.Threading.Tasks; … … 28 29 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; 29 30 using HeuristicLab.EvolutionTracking; 31 using HeuristicLab.Optimization; 30 32 using HeuristicLab.Parameters; 31 33 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; … … 49 51 private const string ApplyLinearScalingParameterName = "ApplyLinearScaling"; 50 52 private const string MutatorParameterName = "Mutator"; 53 private const string CrossoverParameterName = "Crossover"; 51 54 private const string RandomReplacementParameterName = "RandomReplacement"; 52 55 private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees"; … … 54 57 private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism"; 55 58 private const string ExclusiveMatchingParameterName = "ExclusiveMatching"; 59 private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio"; 56 60 #endregion 57 61 58 62 #region parameters 63 public ILookupParameter<BoolValue> UseAdaptiveReplacementRatioParameter { 64 get { return (ILookupParameter<BoolValue>)Parameters[UseAdaptiveReplacementRatioParameterName]; } 65 } 59 66 public ILookupParameter<BoolValue> ExclusiveMatchingParameter { 60 67 get { return (ILookupParameter<BoolValue>)Parameters[ExclusiveMatchingParameterName]; } … … 86 93 public ILookupParameter<ISymbolicExpressionTreeManipulator> MutatorParameter { 87 94 get { return (ILookupParameter<ISymbolicExpressionTreeManipulator>)Parameters[MutatorParameterName]; } 95 } 96 public ILookupParameter<ISymbolicExpressionTreeCrossover> CrossoverParameter { 97 get { return (ILookupParameter<ISymbolicExpressionTreeCrossover>)Parameters[CrossoverParameterName]; } 88 98 } 89 99 public ILookupParameter<IRandom> RandomParameter { … … 120 130 private readonly SymbolicExpressionTreePhenotypicSimilarityCalculator calculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator(); 121 131 private readonly QueryMatch qm; 132 133 public Func<double, double> ReplacementRule { get; set; } 122 134 123 135 private readonly ISymbolicExpressionTreeNodeEqualityComparer comp = new SymbolicExpressionTreeNodeEqualityComparer { … … 148 160 Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName)); 149 161 Parameters.Add(new LookupParameter<ISymbolicExpressionTreeManipulator>(MutatorParameterName)); 162 Parameters.Add(new LookupParameter<ISymbolicExpressionTreeCrossover>(CrossoverParameterName)); 150 163 Parameters.Add(new LookupParameter<BoolValue>(RandomReplacementParameterName)); 151 164 Parameters.Add(new LookupParameter<IntValue>(NumberOfChangedTreesParameterName)); … … 153 166 Parameters.Add(new LookupParameter<IntValue>(MaxDegreeOfParalellismParameterName)); 154 167 Parameters.Add(new LookupParameter<BoolValue>(ExclusiveMatchingParameterName)); 168 Parameters.Add(new LookupParameter<BoolValue>(UseAdaptiveReplacementRatioParameterName)); 155 169 #endregion 156 170 } … … 175 189 public override IOperation Apply() { 176 190 var individuals = ExecutionContext.Scope.SubScopes; // the scopes represent the individuals 191 var trees = individuals.Select(x => (ISymbolicExpressionTree)x.Variables["SymbolicExpressionTree"].Value).ToList(); 192 var qualities = individuals.Select(x => (DoubleValue)x.Variables["Quality"].Value).ToList(); 177 193 178 194 var random = RandomParameter.ActualValue; … … 185 201 // first apply the length and root equality checks in order to filter the individuals 186 202 var exclusiveMatching = ExclusiveMatchingParameter.ActualValue.Value; 187 var filtered = exclusiveMatching ? (from ind in individuals 188 where !ind.Variables.ContainsKey("AlreadyMatched") 189 let t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value 190 where t.Length >= s.Length && qm.Comparer.Equals(t.Root.GetSubtree(0).GetSubtree(0), sRoot) 191 select ind).ToList() 192 : (from ind in individuals 193 let t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value 194 where t.Length >= s.Length && qm.Comparer.Equals(t.Root.GetSubtree(0).GetSubtree(0), sRoot) 195 select ind).ToList(); 203 var filtered = new List<int>(); 204 for (int i = 0; i < individuals.Count; ++i) { 205 if (exclusiveMatching && individuals[i].Variables.ContainsKey("AlreadyMatched")) continue; 206 var t = trees[i]; 207 var tRoot = t.Root.GetSubtree(0).GetSubtree(0); 208 if (t.Length < s.Length || !qm.Comparer.Equals(tRoot, sRoot)) continue; 209 filtered.Add(i); 210 } 196 211 197 212 // if we don't have enough filtered individuals, then we are done 213 // if the schema exceeds the minimum frequency, it gets processed further 198 214 if (filtered.Count < countThreshold) { 199 215 return base.Apply(); … … 211 227 Parallel.For(0, filtered.Count, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, 212 228 i => { 213 var t = (ISymbolicExpressionTree)filtered[i].Variables["SymbolicExpressionTree"].Value; 229 var index = filtered[i]; 230 var t = trees[index]; 214 231 var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0)); 215 232 if (qm.Match(tNodes, sNodes)) { … … 218 235 }); 219 236 220 for (int i = 0; i < matching.Length; ++i) { 221 if (matching[i]) 222 matchingIndividuals.Add(filtered[i]); 223 } 237 matchingIndividuals.AddRange(filtered.Where((x, i) => matching[i]).Select(x => individuals[x])); 224 238 } else { 225 239 for (int i = 0; i < filtered.Count; ++i) { … … 228 242 break; 229 243 230 var ind = filtered[i];231 var t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value;232 var tNodes = QueryMatch.InitializePostOrder(t .Root.GetSubtree(0).GetSubtree(0));244 var index = filtered[i]; 245 var tRoot = trees[index].Root.GetSubtree(0).GetSubtree(0); 246 var tNodes = QueryMatch.InitializePostOrder(tRoot); 233 247 if (qm.Match(tNodes, sNodes)) 234 matchingIndividuals.Add(ind );248 matchingIndividuals.Add(individuals[index]); 235 249 } 236 250 } 237 251 238 // additional condition: the average schema quality should be equal or greater than the population average quality239 252 if (matchingIndividuals.Count < countThreshold) { 240 253 return base.Apply(); 241 254 } 242 255 243 var similarity = Calculate PhenotypicSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism);256 var similarity = CalculateSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism); 244 257 if (similarity < MinimumPhenotypicSimilarity.Value) { 245 258 return base.Apply(); 246 259 } 247 260 248 int n = (int)Math.Floor(matchingIndividuals.Count * ReplacementRatio.Value); 261 double replacementRatio; 262 var adaptiveReplacementRatio = UseAdaptiveReplacementRatioParameter.ActualValue.Value; 263 264 if (adaptiveReplacementRatio) { 265 var r = (double)matchingIndividuals.Count / individuals.Count; 266 replacementRatio = ReplacementRule(r); 267 } else { 268 replacementRatio = ReplacementRatio.Value; 269 } 270 271 int n = (int)Math.Floor(matchingIndividuals.Count * replacementRatio); 249 272 var individualsToReplace = RandomReplacement.Value ? matchingIndividuals.SampleRandomWithoutRepetition(random, n).ToList() 250 273 : matchingIndividuals.OrderBy(x => (DoubleValue)x.Variables["Quality"].Value).Take(n).ToList(); … … 265 288 } 266 289 267 p rivate static double CalculatePhenotypicSimilarity(ScopeList individuals, SymbolicExpressionTreePhenotypicSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) {290 public static double CalculateSimilarity(ScopeList individuals, ISolutionSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) { 268 291 double similarity = 0; 269 292 int count = individuals.Count; 293 if (count < 2) return double.NaN; 270 294 if (parallel) { 271 295 var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = nThreads };
Note: See TracChangeset
for help on using the changeset viewer.