Changeset 12979 for branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification
- Timestamp:
- 10/02/15 01:08:30 (9 years ago)
- Location:
- branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaCreator.cs
r12958 r12979 42 42 private const string ReplacementRatioParameterName = "ReplacementRatio"; 43 43 private const string RandomReplacementParameterName = "RandomReplacement"; 44 private const string ExecuteInParallelParameterName = "ExecuteInParallel"; 45 private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism"; 44 46 45 47 #region parameters … … 48 50 } 49 51 52 public IFixedValueParameter<BoolValue> ExecuteInParallelParameter { 53 get { return (IFixedValueParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; } 54 } 55 56 public IFixedValueParameter<IntValue> MaxDegreeOfParallelismParameter { 57 get { return (IFixedValueParameter<IntValue>)Parameters[MaxDegreeOfParalellismParameterName]; } 58 } 59 50 60 public IFixedValueParameter<PercentValue> MinimumSchemaFrequencyParameter { 51 61 get { return (IFixedValueParameter<PercentValue>)Parameters[MinimumSchemaFrequencyParameterName]; } … … 62 72 63 73 #region parameter properties 64 public int MinimumSchemaLength { 65 get { return MinimumSchemaLengthParameter.Value.Value;}66 }74 public int MinimumSchemaLength { get { return MinimumSchemaLengthParameter.Value.Value; } } 75 public int MaxDegreeOfParallelism { get { return MaxDegreeOfParallelismParameter.Value.Value; } } 76 public bool ExecuteInParallel { get { return ExecuteInParallelParameter.Value.Value; } } 67 77 #endregion 68 78 … … 72 82 private DataTableValuesCollector valuesCollector; 73 83 private ResultsCollector resultsCollector; 84 private UpdateEstimatedValuesOperator updateEstimatedValuesOperator; 74 85 75 86 private void ParameterizeOperators() { … … 89 100 resultsCollector = new ResultsCollector(); 90 101 resultsCollector.CollectedValues.Add(new LookupParameter<DataTable>("Diversification")); 102 103 updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator(); 91 104 } 92 105 93 106 public SchemaCreator() { 94 107 Parameters.Add(new FixedValueParameter<IntValue>(MinimumSchemaLengthParameterName, new IntValue(10))); 95 Parameters.Add(new FixedValueParameter<PercentValue>(MinimumSchemaFrequencyParameterName, new PercentValue(0.0 5)));96 Parameters.Add(new FixedValueParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName, new PercentValue(0. 8)));97 Parameters.Add(new FixedValueParameter<PercentValue>(ReplacementRatioParameterName, new PercentValue(0. 2)));108 Parameters.Add(new FixedValueParameter<PercentValue>(MinimumSchemaFrequencyParameterName, new PercentValue(0.01))); 109 Parameters.Add(new FixedValueParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName, new PercentValue(0.9))); 110 Parameters.Add(new FixedValueParameter<PercentValue>(ReplacementRatioParameterName, new PercentValue(0.9))); 98 111 Parameters.Add(new FixedValueParameter<BoolValue>(RandomReplacementParameterName, new BoolValue(false))); 112 Parameters.Add(new FixedValueParameter<BoolValue>(ExecuteInParallelParameterName, new BoolValue(false))); 113 Parameters.Add(new FixedValueParameter<IntValue>(MaxDegreeOfParalellismParameterName, new IntValue(-1))); 114 115 ExecuteInParallelParameter.Hidden = true; 116 MaxDegreeOfParallelismParameter.Hidden = true; 99 117 100 118 ParameterizeOperators(); … … 116 134 return base.Apply(); 117 135 // for now, only consider crossover offspring 136 137 var scopes = new ScopeList(this.ExecutionContext.Scope.SubScopes); 118 138 var vertices = GenealogyGraph.GetByRank(gen).Where(x => x.InDegree == 2).Cast<IGenealogyGraphNode<ISymbolicExpressionTree>>(); 119 139 var groups = vertices.GroupBy(x => x.Parents.First()).OrderByDescending(g => g.Count()).ToList(); 120 140 var anySubtreeSymbol = new AnySubtreeSymbol(); 121 141 122 var scopes = new ScopeList(this.ExecutionContext.Scope.SubScopes);123 124 142 if (schemaEvaluator == null || schemaCleanupOperator == null || changedTreesReducer == null || valuesCollector == null || resultsCollector == null) 125 143 ParameterizeOperators(); … … 129 147 var collectResults = ExecutionContext.CreateChildOperation(resultsCollector); 130 148 131 var oc = new OperationCollection(); 149 var updateEstimatedValues = new OperationCollection { Parallel = true }; 150 foreach (var s in scopes) { 151 if (!s.Variables.ContainsKey("EstimatedValues")) 152 s.Variables.Add(new Core.Variable("EstimatedValues")); 153 updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, s)); 154 } 155 var oc = new OperationCollection { updateEstimatedValues }; 132 156 133 157 #region create schemas and add subscopes representing the individuals -
branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaEvaluator.cs
r12970 r12979 22 22 using System; 23 23 using System.Linq; 24 using System.Threading.Tasks; 24 25 using HeuristicLab.Common; 25 26 using HeuristicLab.Core; … … 50 51 private const string RandomReplacementParameterName = "RandomReplacement"; 51 52 private const string ChangedTreesParameterName = "ChangedTrees"; 53 private const string ExecuteInParallelParameterName = "ExecuteInParallel"; 54 private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism"; 52 55 #endregion 53 56 54 57 #region parameters 58 public ILookupParameter<BoolValue> ExecuteInParallelParameter { 59 get { return (ILookupParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; } 60 } 61 public ILookupParameter<IntValue> MaxDegreeOfParallelismParameter { 62 get { return (ILookupParameter<IntValue>)Parameters[MaxDegreeOfParalellismParameterName]; } 63 } 55 64 public ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>> EvaluatorParameter { 56 65 get { return (ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>>)Parameters[EvaluatorParameterName]; } … … 98 107 99 108 #region parameter properties 100 public PercentValue MinimumSchemaFrequency { 101 get { return MinimumSchemaFrequencyParameter.ActualValue; } 102 } 103 104 public PercentValue ReplacementRatio { 105 get { return ReplacementRatioParameter.ActualValue; } 106 } 107 108 public PercentValue MinimumPhenotypicSimilarity { 109 get { return MinimumPhenotypicSimilarityParameter.ActualValue; } 110 } 111 112 public BoolValue RandomReplacement { 113 get { return RandomReplacementParameter.ActualValue; } 114 } 109 public PercentValue MinimumSchemaFrequency { get { return MinimumSchemaFrequencyParameter.ActualValue; } } 110 public PercentValue ReplacementRatio { get { return ReplacementRatioParameter.ActualValue; } } 111 public PercentValue MinimumPhenotypicSimilarity { get { return MinimumPhenotypicSimilarityParameter.ActualValue; } } 112 public BoolValue RandomReplacement { get { return RandomReplacementParameter.ActualValue; } } 115 113 #endregion 116 114 … … 124 122 }; 125 123 126 127 [StorableHook(HookType.AfterDeserialization)] 128 private void AfterDeserialization() { 129 if (!Parameters.ContainsKey(ChangedTreesParameterName)) 130 Parameters.Add(new LookupParameter<IntValue>(ChangedTreesParameterName)); 131 } 124 [Storable] 125 private readonly UpdateEstimatedValuesOperator updateEstimatedValuesOperator; 132 126 133 127 public SchemaEvaluator() { 134 128 qm = new QueryMatch(comp) { MatchParents = true }; 129 this.updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator(); 135 130 136 131 Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(SchemaParameterName, "The current schema to be evaluated")); … … 148 143 Parameters.Add(new LookupParameter<BoolValue>(RandomReplacementParameterName)); 149 144 Parameters.Add(new LookupParameter<IntValue>(ChangedTreesParameterName)); 150 } 151 145 Parameters.Add(new LookupParameter<BoolValue>(ExecuteInParallelParameterName)); 146 Parameters.Add(new LookupParameter<IntValue>(MaxDegreeOfParalellismParameterName)); 147 } 152 148 153 149 [StorableConstructor] … … 155 151 156 152 protected SchemaEvaluator(SchemaEvaluator original, Cloner cloner) : base(original, cloner) { 157 this.comp = (ISymbolicExpressionTreeNodeEqualityComparer)original.comp.Clone(); 158 this.qm = new QueryMatch(comp) { MatchParents = original.qm.MatchParents }; 153 this.comp = original.comp == null ? new SymbolicExpressionTreeNodeEqualityComparer { 154 MatchConstantValues = false, 155 MatchVariableWeights = false, 156 MatchVariableNames = true 157 } : (ISymbolicExpressionTreeNodeEqualityComparer)original.comp.Clone(); 158 this.qm = new QueryMatch(comp) { MatchParents = original.qm?.MatchParents ?? true }; 159 this.updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator(); 159 160 } 160 161 … … 163 164 } 164 165 165 private static double CalculatePhenotypicSimilarity(ScopeList individuals, SymbolicExpressionTreePhenotypicSimilarityCalculator calculator) {166 double similarity = 0;167 int count = individuals.Count;168 for (int i = 0; i < count - 1; ++i) {169 for (int j = i + 1; j < count; ++j) {170 similarity += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);171 }172 }173 return similarity / (count * (count - 1) / 2.0);174 }175 176 166 public override IOperation Apply() { 177 167 var individuals = ExecutionContext.Scope.SubScopes; // the scopes represent the individuals … … 179 169 var random = RandomParameter.ActualValue; 180 170 var mutator = MutatorParameter.ActualValue; 181 var evaluator = EvaluatorParameter.ActualValue;182 var updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();183 171 184 172 var s = SchemaParameter.ActualValue; 185 var matchingIndividuals = new ScopeList(); 186 foreach (var ind in individuals) { 187 var t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value; 188 if (t.Length >= s.Length && qm.Match(t, s)) 189 matchingIndividuals.Add(ind); 190 } 191 192 if (matchingIndividuals.Count < (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count))) { 173 var sRoot = s.Root.GetSubtree(0).GetSubtree(0); 174 int countThreshold = (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count)); 175 176 // first apply the length and root equality checks in order to filter the individuals 177 var filtered = (from ind in individuals 178 let t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value 179 where t.Length >= s.Length && qm.Comparer.Equals(t.Root.GetSubtree(0).GetSubtree(0), sRoot) 180 select ind).ToList(); 181 182 // if we don't have enough filtered individuals, then we are done 183 if (filtered.Count < countThreshold) { 193 184 ChangedTreesParameter.ActualValue = new IntValue(0); 194 185 return base.Apply(); 195 186 } 196 187 197 var similarity = CalculatePhenotypicSimilarity(matchingIndividuals, calculator); 188 // check if the filtered individuals match the schema 189 var sNodes = QueryMatch.InitializePostOrder(sRoot); 190 var matchingIndividuals = new ScopeList(); 191 bool executeInParallel = ExecuteInParallelParameter.ActualValue.Value; 192 int maxDegreeOfParallelism = MaxDegreeOfParallelismParameter.ActualValue.Value; 193 194 if (executeInParallel) { 195 var matching = new bool[filtered.Count]; 196 197 Parallel.For(0, filtered.Count, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, 198 i => { 199 var t = (ISymbolicExpressionTree)filtered[i].Variables["SymbolicExpressionTree"].Value; 200 var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0)); 201 if (qm.Match(tNodes, sNodes)) { 202 matching[i] = true; 203 } 204 }); 205 206 for (int i = 0; i < matching.Length; ++i) { 207 if (matching[i]) 208 matchingIndividuals.Add(filtered[i]); 209 } 210 } else { 211 for (int i = 0; i < filtered.Count; ++i) { 212 213 // break early if it becomes impossible to reach the minimum threshold 214 if (matchingIndividuals.Count + filtered.Count - i < countThreshold) 215 break; 216 217 var ind = filtered[i]; 218 var t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value; 219 var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0)); 220 if (qm.Match(tNodes, sNodes)) 221 matchingIndividuals.Add(ind); 222 } 223 } 224 225 if (matchingIndividuals.Count < countThreshold) { 226 ChangedTreesParameter.ActualValue = new IntValue(0); 227 return base.Apply(); 228 } 229 230 var similarity = CalculatePhenotypicSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism); 198 231 if (similarity < MinimumPhenotypicSimilarity.Value) { 199 232 ChangedTreesParameter.ActualValue = new IntValue(0); … … 201 234 } 202 235 203 var oc = new OperationCollection();204 236 int n = (int)Math.Floor(matchingIndividuals.Count * ReplacementRatio.Value); 205 237 var individualsToReplace = RandomReplacement.Value ? matchingIndividuals.SampleRandomWithoutRepetition(random, n).ToList() 206 238 : matchingIndividuals.OrderBy(x => (DoubleValue)x.Variables["Quality"].Value).Take(n).ToList(); 239 var mutationOc = new OperationCollection { Parallel = false }; // cannot be parallel due to the before/after operators which insert vertices in the genealogy graph 240 var updateEstimatedValues = new OperationCollection { Parallel = true }; // evaluation should be done in parallel when possible 207 241 foreach (var ind in individualsToReplace) { 208 242 var mutatorOp = ExecutionContext.CreateChildOperation(mutator, ind); 209 var evaluatorOp = ExecutionContext.CreateChildOperation(evaluator, ind); 210 var updateEstimatedValuesOp = ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, ind); 211 oc.Add(mutatorOp); 212 oc.Add(evaluatorOp); 213 oc.Add(updateEstimatedValuesOp); 243 var updateOp = ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, ind); 244 mutationOc.Add(mutatorOp); 245 updateEstimatedValues.Add(updateOp); 214 246 } 215 247 ChangedTreesParameter.ActualValue = new IntValue(individualsToReplace.Count); 216 return new OperationCollection(oc, base.Apply()); 248 return new OperationCollection(mutationOc, updateEstimatedValues, base.Apply()); 249 } 250 251 private static double CalculatePhenotypicSimilarity(ScopeList individuals, SymbolicExpressionTreePhenotypicSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) { 252 double similarity = 0; 253 int count = individuals.Count; 254 if (parallel) { 255 var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = nThreads }; 256 var simArray = new double[count - 1]; 257 Parallel.For(0, count - 1, parallelOptions, i => { 258 double innerSim = 0; 259 for (int j = i + 1; j < count; ++j) { 260 innerSim += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]); 261 } 262 simArray[i] = innerSim; 263 }); 264 similarity = simArray.Sum(); 265 } else { 266 for (int i = 0; i < count - 1; ++i) { 267 for (int j = i + 1; j < count; ++j) { 268 similarity += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]); 269 } 270 } 271 } 272 return similarity / (count * (count - 1) / 2.0); 217 273 } 218 274 } -
branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/UpdateEstimatedValuesOperator.cs
r12958 r12979 20 20 #endregion 21 21 22 using System; 22 23 using System.Linq; 23 24 using HeuristicLab.Common; … … 51 52 } 52 53 54 53 55 public UpdateEstimatedValuesOperator() { 54 56 Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName)); … … 74 76 var interpreter = InterpreterParameter.ActualValue; 75 77 76 var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndices) 77 .LimitToRange(estimationLimits.Lower, estimationLimits.Upper).ToArray(); 78 var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndices).ToArray(); 79 var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray(); 80 81 if (estimatedValues.Length != targetValues.Length) 82 throw new ArgumentException("Number of elements in target and estimated values enumeration do not match."); 83 84 var linearScalingCalculator = new OnlineLinearScalingParameterCalculator(); 85 86 for (int i = 0; i < estimatedValues.Length; ++i) { 87 var estimated = estimatedValues[i]; 88 var target = targetValues[i]; 89 if (!double.IsNaN(estimated) && !double.IsInfinity(estimated)) 90 linearScalingCalculator.Add(estimated, target); 91 } 92 double alpha = linearScalingCalculator.Alpha; 93 double beta = linearScalingCalculator.Beta; 94 if (linearScalingCalculator.ErrorState != OnlineCalculatorError.None) { 95 alpha = 0.0; 96 beta = 1.0; 97 } 98 99 var scaled = estimatedValues.Select(x => x * beta + alpha).LimitToRange(estimationLimits.Lower, estimationLimits.Upper).ToArray(); 100 OnlineCalculatorError error; 101 var r = OnlinePearsonsRCalculator.Calculate(targetValues, scaled, out error); 102 if (error != OnlineCalculatorError.None) r = 0; 103 104 var r2 = r * r; 105 if (r2 > 1.0) r2 = 1.0; 78 106 79 107 var variables = ExecutionContext.Scope.Variables; 80 if (variables.ContainsKey("EstimatedValues")) 81 variables["EstimatedValues"].Value = new DoubleArray(estimatedValues); 82 else 83 variables.Add(new Core.Variable("EstimatedValues", new DoubleArray(estimatedValues))); 108 ((DoubleValue)variables["Quality"].Value).Value = r2; 109 110 if (variables.ContainsKey("EstimatedValues")) { 111 variables["EstimatedValues"].Value = new DoubleArray(scaled); 112 } else { 113 variables.Add(new Core.Variable("EstimatedValues", new DoubleArray(scaled))); 114 } 84 115 return base.Apply(); 85 116 }
Note: See TracChangeset
for help on using the changeset viewer.