Changeset 13624
- Timestamp:
- 02/18/16 19:33:54 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/Analyzers/SymbolicDataAnalysisSchemaFrequencyAnalyzer.cs
r13481 r13624 20 20 #endregion 21 21 22 using System; 23 using System.Collections.Generic; 22 24 using System.Linq; 25 using System.Text; 26 using System.Threading.Tasks; 23 27 using HeuristicLab.Common; 24 28 using HeuristicLab.Core; … … 35 39 public class SymbolicDataAnalysisSchemaFrequencyAnalyzer : EvolutionTrackingAnalyzer<ISymbolicExpressionTree> { 36 40 private const string MinimumSchemaLengthParameterName = "MinimumSchemaLength"; 41 private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching"; 42 private const string ProblemDataParameterName = "ProblemData"; 43 private const string InterpreterParameterName = "SymbolicExpressionTreeInterpreter"; 44 private const string ExecuteInParallelParameterName = "ExecuteInParallel"; 45 private const string MaximumDegreeOfParallelismParameterName = "MaximumDegreeOfParallelism"; 46 47 private static readonly Dictionary<string, string> ShortNames = new Dictionary<string, string> { 48 { "Addition", "+" }, 49 { "Subtraction", "-" }, 50 { "Multiplication", "*" }, 51 { "Division", "/" }, 52 { "Exponential", "exp" }, 53 { "Logarithm", "log" } 54 }; 37 55 38 56 [Storable] … … 46 64 private QueryMatch qm; 47 65 66 public IFixedValueParameter<BoolValue> ExecuteInParallelParameter { 67 get { return (IFixedValueParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; } 68 } 69 70 public IFixedValueParameter<IntValue> MaximumDegreeOfParallelismParameter { 71 get { return (IFixedValueParameter<IntValue>)Parameters[MaximumDegreeOfParallelismParameterName]; } 72 } 73 48 74 public IFixedValueParameter<IntValue> MinimumSchemaLengthParameter { 49 75 get { return (IFixedValueParameter<IntValue>)Parameters[MinimumSchemaLengthParameterName]; } 76 } 77 78 public IFixedValueParameter<BoolValue> StrictSchemaMatchingParameter { 79 get { return (IFixedValueParameter<BoolValue>)Parameters[StrictSchemaMatchingParameterName]; } 80 } 81 82 public ILookupParameter<IRegressionProblemData> ProblemDataParameter { 83 get { return (ILookupParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; } 84 } 85 86 public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> InterpreterParameter { 87 get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[InterpreterParameterName]; } 88 } 89 90 public bool ExecuteInParallel { 91 get { return ExecuteInParallelParameter.Value.Value; } 92 set { ExecuteInParallelParameter.Value.Value = value; } 93 } 94 95 public int MaximumDegreeOfParallelism { 96 get { return MaximumDegreeOfParallelismParameter.Value.Value; } 97 set { MaximumDegreeOfParallelismParameter.Value.Value = value; } 98 } 99 100 public int MinimumSchemaLength { 101 get { return MinimumSchemaLengthParameter.Value.Value; } 102 set { MinimumSchemaLengthParameter.Value.Value = value; } 103 } 104 105 public bool StrictSchemaMatching { 106 get { return StrictSchemaMatchingParameter.Value.Value; } 107 set { StrictSchemaMatchingParameter.Value.Value = value; } 50 108 } 51 109 … … 60 118 genotypicSimilarityCalculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator { SolutionVariableName = "SymbolicExpressionTree" }; 61 119 Parameters.Add(new FixedValueParameter<IntValue>(MinimumSchemaLengthParameterName, new IntValue(10))); 120 Parameters.Add(new FixedValueParameter<IntValue>(MaximumDegreeOfParallelismParameterName, new IntValue(4))); 121 Parameters.Add(new FixedValueParameter<BoolValue>(StrictSchemaMatchingParameterName, new BoolValue(true))); 122 Parameters.Add(new FixedValueParameter<BoolValue>(ExecuteInParallelParameterName, new BoolValue(true))); 123 Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName)); 124 Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(InterpreterParameterName)); 62 125 } 63 126 64 127 protected SymbolicDataAnalysisSchemaFrequencyAnalyzer(SymbolicDataAnalysisSchemaFrequencyAnalyzer original, 65 128 Cloner cloner) : base(original, cloner) { 66 comparer = original.comparer; 67 phenotypicSimilarityCalculator = original.phenotypicSimilarityCalculator; 68 genotypicSimilarityCalculator = original.genotypicSimilarityCalculator; 129 comparer = cloner.Clone(original.comparer); 130 phenotypicSimilarityCalculator = cloner.Clone(original.phenotypicSimilarityCalculator); 131 genotypicSimilarityCalculator = cloner.Clone(original.genotypicSimilarityCalculator); 132 MinimumSchemaLength = original.MinimumSchemaLength; 133 StrictSchemaMatching = original.StrictSchemaMatching; 69 134 qm = new QueryMatch(comparer) { MatchParents = true }; 70 135 } … … 76 141 [StorableConstructor] 77 142 protected SymbolicDataAnalysisSchemaFrequencyAnalyzer(bool deserializing) : base(deserializing) { } 78 79 143 80 144 [StorableHook(HookType.AfterDeserialization)] … … 84 148 85 149 public override IOperation Apply() { 86 int updateInterval = UpdateIntervalParameter.Value.Value; 87 IntValue updateCounter = UpdateCounterParameter.ActualValue; 88 // if counter does not yet exist then initialize it with update interval 89 // to make sure the solutions are analyzed on the first application of this operator 90 if (updateCounter == null) { 91 updateCounter = new IntValue(updateInterval); 92 UpdateCounterParameter.ActualValue = updateCounter; 93 } 94 //analyze solutions only every 'updateInterval' times 95 if (updateCounter.Value != updateInterval) { 96 updateCounter.Value++; 150 if (PopulationGraph == null || Generation.Value == 0 || 151 (Generation.Value > 1 && Generation.Value % UpdateInterval.Value != 0)) 97 152 return base.Apply(); 98 } 99 updateCounter.Value = 1; 100 101 if (PopulationGraph == null || Generation.Value == 0) 102 return base.Apply(); 103 104 var minimumSchemaLength = MinimumSchemaLengthParameter.Value.Value; 105 var vertices = PopulationGraph.GetByRank(Generation.Value).Cast<IGenealogyGraphNode<ISymbolicExpressionTree>>().ToList(); 106 var formatter = new SymbolicExpressionTreeStringFormatter { Indent = false, AppendNewLines = false }; 107 108 var schemas = SchemaCreator.GenerateSchemas(vertices, minimumSchemaLength).ToList(); 109 var trees = vertices.Select(x => x.Data).ToList(); 110 var qualities = vertices.Select(x => x.Quality).ToList(); 111 var scopes = ExecutionContext.Scope.SubScopes; // the scopes of all the individuals, needed because the tree evaluated values are stored in the scope and we use them for the phenotypic similarity 112 113 var matrix = new DoubleMatrix(schemas.Count, 5) { 114 RowNames = schemas.Select(x => formatter.Format(x.Root.GetSubtree(0).GetSubtree(0))), 115 ColumnNames = new[] { "Avg. Length", "Avg. Quality", "Relative Frequency", "Avg. Phen. Sim.", "Avg. Gen. Sim" }, 116 SortableView = true 117 }; 118 119 for (int i = 0; i < schemas.Count; ++i) { 120 var schema = schemas[i]; 121 int count = 0; 122 double avgLength = 0; 123 double avgQuality = 0; 124 var matchingScopes = new ScopeList(); 125 for (int j = 0; j < trees.Count; ++j) { 126 var tree = trees[j]; 127 if (qm.Match(tree, schema)) { 128 count++; 129 avgLength += tree.Length; 130 avgQuality += qualities[j]; 131 matchingScopes.Add(scopes[j]); 153 154 comparer.MatchVariableWeights = comparer.MatchConstantValues = StrictSchemaMatching; 155 phenotypicSimilarityCalculator.Interpreter = InterpreterParameter.ActualValue; 156 phenotypicSimilarityCalculator.ProblemData = ProblemDataParameter.ActualValue; 157 var generation = Generation.Value; 158 159 var population = PopulationGraph.GetByRank(generation).Cast<IGenealogyGraphNode<ISymbolicExpressionTree>>().ToList(); 160 var vertices = population.Where(x => x.InDegree == 2).OrderByDescending(x => x.Quality).ToList(); 161 ResultCollection resultCollection; 162 if (Results.ContainsKey("Schema Frequencies")) { 163 resultCollection = (ResultCollection)Results["Schema Frequencies"].Value; 164 } else { 165 resultCollection = new ResultCollection(); 166 var result = new Result("Schema Frequencies", resultCollection); 167 Results.Add(result); 168 } 169 170 var mostFrequentPerGeneration = new List<Tuple<string, double[]>>(); 171 172 var trees = population.Select(x => x.Data).ToList(); 173 var qualities = population.Select(x => x.Quality).ToList(); 174 var genSimMatrix = new double[trees.Count, trees.Count]; 175 var phenSimMatrix = new double[trees.Count, trees.Count]; 176 177 for (int i = 0; i < trees.Count - 1; ++i) { 178 for (int j = i + 1; j < trees.Count; ++j) { 179 genSimMatrix[i, j] = double.NaN; 180 phenSimMatrix[i, j] = double.NaN; 181 } 182 } 183 var schemas = SchemaCreator.GenerateSchemas(vertices, MinimumSchemaLength, StrictSchemaMatching).ToList(); 184 var schemaStrings = schemas.Select(x => SubtreeToString(x.Root.GetSubtree(0).GetSubtree(0), StrictSchemaMatching)).ToList(); 185 int[][] matchingIndices; 186 if (ExecuteInParallel) { 187 matchingIndices = new int[schemas.Count][]; 188 Parallel.For(0, schemas.Count, new ParallelOptions { MaxDegreeOfParallelism = MaximumDegreeOfParallelism }, i => { 189 var schema = schemas[i]; 190 matchingIndices[i] = Enumerable.Range(0, trees.Count).Where(v => qm.Match(trees[v].Root, schema.Root)).ToArray(); 191 }); 192 } else { 193 matchingIndices = schemas.Select(x => Enumerable.Range(0, trees.Count).Where(v => qm.Match(trees[v].Root, x.Root)).ToArray()).ToArray(); 194 } 195 196 var schemaStatistics = new List<Tuple<string, double[]>>(); 197 var avgPopQuality = qualities.Average(); 198 199 if (ExecuteInParallel) { 200 var locker = new object(); 201 Parallel.For(0, schemas.Count, new ParallelOptions { MaxDegreeOfParallelism = MaximumDegreeOfParallelism }, i => { 202 var indices = matchingIndices[i]; 203 if (indices.Length > 1) { 204 var avgSchemaQuality = indices.Average(x => qualities[x]); 205 var avgLength = indices.Average(x => trees[x].Length); 206 var avgGenSim = AverageSimilarity(indices, trees, genSimMatrix, genotypicSimilarityCalculator.CalculateSimilarity); 207 var avgPhenSim = AverageSimilarity(indices, trees, phenSimMatrix, phenotypicSimilarityCalculator.CalculateSimilarity); 208 var array = new[] { indices.Length, avgSchemaQuality, avgLength, avgGenSim, avgPhenSim, avgPopQuality }; 209 var t = new Tuple<string, double[]>(schemaStrings[i], array); 210 lock (locker) { 211 schemaStatistics.Add(t); 212 } 132 213 } 133 } 134 avgLength /= count; 135 avgQuality /= count; 136 double relativeFreq = (double)count / trees.Count; 137 double avgPhenotypicSimilarity = SchemaEvaluator.CalculateSimilarity(matchingScopes, phenotypicSimilarityCalculator, true, 4); 138 double avgGenotypicSimilarity = SchemaEvaluator.CalculateSimilarity(matchingScopes, genotypicSimilarityCalculator, true, 4); 139 140 matrix[i, 0] = avgLength; 141 matrix[i, 1] = avgQuality; 142 matrix[i, 2] = relativeFreq; 143 matrix[i, 3] = avgPhenotypicSimilarity; 144 matrix[i, 4] = avgGenotypicSimilarity; 145 } 146 147 if (Results.ContainsKey("Schema Frequencies")) { 148 var result = Results["Schema Frequencies"]; 149 result.Value = matrix; 150 } else { 151 var result = new Result("Schema Frequencies", matrix); 152 Results.Add(result); 153 } 154 214 }); 215 } else { 216 for (int i = 0; i < schemas.Count; ++i) { 217 var indices = matchingIndices[i]; 218 if (indices.Length > 1) { 219 var avgSchemaQuality = indices.Average(x => qualities[x]); 220 var avgLength = indices.Average(x => trees[x].Length); 221 var avgGenSim = AverageSimilarity(indices, trees, genSimMatrix, genotypicSimilarityCalculator.CalculateSimilarity); 222 var avgPhenSim = AverageSimilarity(indices, trees, phenSimMatrix, phenotypicSimilarityCalculator.CalculateSimilarity); 223 var array = new[] { indices.Length, avgSchemaQuality, avgLength, avgGenSim, avgPhenSim, avgPopQuality }; 224 var t = new Tuple<string, double[]>(schemaStrings[i], array); 225 schemaStatistics.Add(t); 226 } 227 } 228 } 229 230 if (!schemaStatistics.Any()) return base.Apply(); // shouldn't ever happen 231 var columnNames = new[] { "Count", "Avg Quality", "Avg Length", "Avg Genotype Similarity", "Avg Phenotype Similarity", "Avg Population Quality" }; 232 var mostFrequent = new DoubleMatrix(schemaStatistics.Count, schemaStatistics[0].Item2.Length); 233 mostFrequent.SortableView = true; 234 schemaStatistics.Sort((a, b) => { if (a.Item2[0].Equals(b.Item2[0])) return b.Item2[1].CompareTo(a.Item2[1]); return b.Item2[0].CompareTo(a.Item2[0]); }); 235 mostFrequentPerGeneration.Add(new Tuple<string, double[]>(schemaStatistics[0].Item1, new[] { (double)generation }.Concat(schemaStatistics[0].Item2).ToArray())); 236 mostFrequent.RowNames = schemaStatistics.Select(x => x.Item1); 237 mostFrequent.ColumnNames = columnNames; 238 for (int i = 0; i < schemaStatistics.Count; ++i) { 239 var values = schemaStatistics[i].Item2; 240 for (int j = 0; j < values.Length; ++j) { 241 mostFrequent[i, j] = values[j]; 242 } 243 } 244 resultCollection.Add(new Result("Generation " + generation + " Most Frequent Schemas", mostFrequent)); 245 246 columnNames = new[] { "Generation", "Count", "Avg Quality", "Avg Length", "Avg Genotype Similarity", "Avg Phenotype Similarity", "Avg Population Quality" }; 247 // sort according to quality, then count 248 schemaStatistics.Sort((a, b) => { if (a.Item2[1].Equals(b.Item2[1])) return b.Item2[0].CompareTo(a.Item2[0]); return b.Item2[1].CompareTo(a.Item2[1]); }); 249 DoubleMatrix bestSchemasMatrix; 250 if (!resultCollection.ContainsKey("Best Schemas")) { 251 bestSchemasMatrix = new DoubleMatrix(1, 7); 252 bestSchemasMatrix.RowNames = new[] { schemaStatistics[0].Item1 }; 253 bestSchemasMatrix.ColumnNames = columnNames; 254 var values = new[] { (double)generation }.Concat(schemaStatistics[0].Item2).ToArray(); 255 for (int i = 0; i < values.Length; ++i) 256 bestSchemasMatrix[0, i] = values[i]; 257 resultCollection.Add(new Result("Best Schemas", bestSchemasMatrix)); 258 } else { 259 bestSchemasMatrix = (DoubleMatrix)resultCollection["Best Schemas"].Value; 260 resultCollection["Best Schemas"].Value = AddRow(bestSchemasMatrix, new[] { (double)generation }.Concat(schemaStatistics[0].Item2).ToArray(), schemaStatistics[0].Item1); 261 } 262 263 // sort according to count, then quality 264 schemaStatistics.Sort((a, b) => { if (a.Item2[0].Equals(b.Item2[0])) return b.Item2[1].CompareTo(a.Item2[1]); return b.Item2[0].CompareTo(a.Item2[0]); }); 265 DoubleMatrix frequentSchemasMatrix; 266 if (!resultCollection.ContainsKey("Most Frequent Schemas")) { 267 frequentSchemasMatrix = new DoubleMatrix(1, 7); 268 frequentSchemasMatrix.RowNames = new[] { schemaStatistics[0].Item1 }; 269 frequentSchemasMatrix.ColumnNames = columnNames; 270 var values = new[] { (double)generation }.Concat(schemaStatistics[0].Item2).ToArray(); 271 for (int i = 0; i < values.Length; ++i) 272 frequentSchemasMatrix[0, i] = values[i]; 273 resultCollection.Add(new Result("Most Frequent Schemas", frequentSchemasMatrix)); 274 } else { 275 frequentSchemasMatrix = (DoubleMatrix)resultCollection["Most Frequent Schemas"].Value; 276 resultCollection["Most Frequent Schemas"].Value = AddRow(frequentSchemasMatrix, new[] { (double)generation }.Concat(schemaStatistics[0].Item2).ToArray(), schemaStatistics[0].Item1); 277 } 155 278 return base.Apply(); 279 } 280 281 private static DoubleMatrix AddRow(DoubleMatrix m, double[] row, string rowName) { 282 if (row.Length != m.Columns) 283 throw new Exception("Row value count must match matrix column count."); 284 var x = new DoubleMatrix(m.Rows + 1, m.Columns); 285 x.RowNames = m.RowNames.Concat(new[] { rowName }); 286 x.ColumnNames = m.ColumnNames; 287 for (int i = 0; i < m.Rows; ++i) 288 for (int j = 0; j < m.Columns; ++j) 289 x[i, j] = m[i, j]; 290 for (int j = 0; j < m.Columns; ++j) 291 x[m.Rows, j] = row[j]; 292 return x; 293 } 294 295 private static double AverageSimilarity(int[] indices, List<ISymbolicExpressionTree> trees, double[,] similarityMatrix, Func<ISymbolicExpressionTree, ISymbolicExpressionTree, double> similarityFunction) { 296 var agg = 0d; 297 int len = indices.Length; 298 var count = len * (len - 1) / 2d; 299 for (int i = 0; i < indices.Length - 1; ++i) { 300 var a = indices[i]; 301 for (int j = i + 1; j < indices.Length; ++j) { 302 var b = indices[j]; 303 if (double.IsNaN(similarityMatrix[a, b])) similarityMatrix[a, b] = similarityFunction(trees[a], trees[b]); 304 agg += similarityMatrix[a, b]; 305 } 306 } 307 return agg / count; 308 } 309 310 private static string SubtreeToString(ISymbolicExpressionTreeNode node, bool strict = false) { 311 StringBuilder strBuilder = new StringBuilder(); 312 // internal nodes or leaf nodes? 313 if (node is AnySubtree) 314 return "# "; 315 316 if (node.SubtreeCount > 0) { 317 strBuilder.Append("("); 318 // symbol on same line as '(' 319 string label = string.Empty; 320 if (node is AnyNode) 321 label = "="; 322 else { 323 var name = node.Symbol.Name; 324 label = ShortNames.ContainsKey(name) ? ShortNames[name] : name; 325 } 326 strBuilder.Append(label + " "); 327 // each subtree expression on a new line 328 // and closing ')' also on new line 329 foreach (var subtree in node.Subtrees) { 330 strBuilder.Append(SubtreeToString(subtree, strict)); 331 } 332 strBuilder.Append(") "); 333 } else { 334 // symbol in the same line with as '(' and ')' 335 var v = node as VariableTreeNode; 336 var c = node as ConstantTreeNode; 337 var w = node as AnyNode; // wildcard 338 string label = string.Empty; 339 if (w != null) 340 label = "="; 341 else if (v != null) 342 label = strict ? string.Format("{0:0.00}_{1}", v.Weight, v.VariableName) : string.Format("{0}", v.VariableName); 343 else if (c != null) 344 label = strict ? string.Format("{0:0.00}", c.Value) : "C"; 345 strBuilder.Append(label); 346 if (node.Parent != null && node != node.Parent.Subtrees.Last()) 347 strBuilder.Append(" "); 348 } 349 return strBuilder.ToString(); 156 350 } 157 351 }
Note: See TracChangeset
for help on using the changeset viewer.