[16734] | 1 | #region License Information
|
---|
| 2 | /* HeuristicLab
|
---|
| 3 | * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
| 4 | *
|
---|
| 5 | * This file is part of HeuristicLab.
|
---|
| 6 | *
|
---|
| 7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
| 8 | * it under the terms of the GNU General Public License as published by
|
---|
| 9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
| 10 | * (at your option) any later version.
|
---|
| 11 | *
|
---|
| 12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 15 | * GNU General Public License for more details.
|
---|
| 16 | *
|
---|
| 17 | * You should have received a copy of the GNU General Public License
|
---|
| 18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 19 | */
|
---|
| 20 | #endregion
|
---|
| 21 |
|
---|
| 22 | using HEAL.Attic;
|
---|
[17002] | 23 | using HeuristicLab.Algorithms.EvolvmentModelsOfModels;
|
---|
[16734] | 24 | using HeuristicLab.Analysis;
|
---|
| 25 | using HeuristicLab.Common;
|
---|
| 26 | using HeuristicLab.Core;
|
---|
| 27 | using HeuristicLab.Data;
|
---|
| 28 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
| 29 | using HeuristicLab.Optimization;
|
---|
| 30 | using HeuristicLab.Parameters;
|
---|
| 31 | using System;
|
---|
| 32 | using System.Collections.Generic;
|
---|
| 33 | using System.Linq;
|
---|
| 34 |
|
---|
| 35 | namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
|
---|
| 36 | /// <summary>
|
---|
| 37 | /// Calculates the accumulated frequencies of variable-symbols over all trees in the population.
|
---|
| 38 | /// </summary>
|
---|
[17134] | 39 | [Item("Symbolic Data Analysis Model Clusters Frequency Analyzer", "Calculates the accumulated frequencies of Model Clusters over all trees in the population.")]
|
---|
[16734] | 40 | [StorableType("4755115D-1B73-4577-BA2A-A762AE4C3B2F")]
|
---|
| 41 | public sealed class ModelClustersFrequencyAnalyzer : SymbolicDataAnalysisAnalyzer {
|
---|
| 42 | private const string ModelClustersFrequencyParameterName = "ModelClustersFrequency";
|
---|
| 43 | private const string AggregateModelClustersParameterName = "AggregateModelClusters";
|
---|
| 44 |
|
---|
| 45 | #region parameter properties
|
---|
[17002] | 46 | private const string MapParameterName = "Map";
|
---|
[16734] | 47 | public ILookupParameter<DataTable> ModelClustersFrequencyParameter {
|
---|
| 48 | get { return (ILookupParameter<DataTable>)Parameters[ModelClustersFrequencyParameterName]; }
|
---|
| 49 | }
|
---|
| 50 | public IValueLookupParameter<BoolValue> AggregateModelClustersParameter {
|
---|
| 51 | get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateModelClustersParameterName]; }
|
---|
| 52 | }
|
---|
[17002] | 53 | public ILookupParameter<EMMMapBase<ISymbolicExpressionTree>> MapParameter {
|
---|
| 54 | get { return (ILookupParameter<EMMMapBase<ISymbolicExpressionTree>>)Parameters[MapParameterName]; }
|
---|
| 55 | }
|
---|
| 56 |
|
---|
[16734] | 57 | #endregion
|
---|
| 58 | #region properties
|
---|
| 59 | public BoolValue AggregateModelClusters {
|
---|
| 60 | get { return AggregateModelClustersParameter.ActualValue; }
|
---|
| 61 | set { AggregateModelClustersParameter.Value = value; }
|
---|
| 62 | }
|
---|
[16899] | 63 | public DataTable ModelClustersFrequency {
|
---|
| 64 | get { return ModelClustersFrequencyParameter.ActualValue; }
|
---|
| 65 | set { ModelClustersFrequencyParameter.ActualValue = value; }
|
---|
| 66 | }
|
---|
[16734] | 67 | #endregion
|
---|
| 68 | [StorableConstructor]
|
---|
| 69 | private ModelClustersFrequencyAnalyzer(StorableConstructorFlag _) : base(_) { }
|
---|
| 70 | private ModelClustersFrequencyAnalyzer(ModelClustersFrequencyAnalyzer original, Cloner cloner)
|
---|
| 71 | : base(original, cloner) {
|
---|
| 72 | }
|
---|
| 73 | public ModelClustersFrequencyAnalyzer()
|
---|
| 74 | : base() {
|
---|
| 75 | Parameters.Add(new LookupParameter<DataTable>(ModelClustersFrequencyParameterName, "The relative Model Clusters reference frequencies aggregated over all trees in the population."));
|
---|
| 76 | Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateModelClustersParameterName, "Switch that determines whether all references to factor Model Clusters should be aggregated regardless of the value. Turn off to analyze all factor variable references with different values separately.", new BoolValue(true)));
|
---|
[17002] | 77 | Parameters.Add(new LookupParameter<EMMMapBase<ISymbolicExpressionTree>>(MapParameterName));
|
---|
[16734] | 78 | }
|
---|
| 79 |
|
---|
| 80 | [StorableHook(HookType.AfterDeserialization)]
|
---|
| 81 | private void AfterDeserialization() {
|
---|
| 82 | // BackwardsCompatibility3.3
|
---|
| 83 | #region Backwards compatible code, remove with 3.4
|
---|
| 84 | if (!Parameters.ContainsKey(AggregateModelClustersParameterName)) {
|
---|
| 85 | Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateModelClustersParameterName, "Switch that determines whether all references to factor Model Clusters should be aggregated regardless of the value. Turn off to analyze all factor Model Clusters references with different values separately.", new BoolValue(true)));
|
---|
| 86 | }
|
---|
| 87 | #endregion
|
---|
| 88 | }
|
---|
| 89 |
|
---|
| 90 | public override IDeepCloneable Clone(Cloner cloner) {
|
---|
| 91 | return new ModelClustersFrequencyAnalyzer(this, cloner);
|
---|
| 92 | }
|
---|
| 93 |
|
---|
| 94 | public override IOperation Apply() {
|
---|
| 95 | ItemArray<ISymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
|
---|
| 96 | ResultCollection results = ResultCollection;
|
---|
| 97 | DataTable datatable;
|
---|
| 98 | if (ModelClustersFrequencyParameter.ActualValue == null) {
|
---|
| 99 | datatable = new DataTable("ModelClusters frequencies", "Relative frequency of ModelClusters references aggregated over the whole population.");
|
---|
| 100 | datatable.VisualProperties.XAxisTitle = "Generation";
|
---|
| 101 | datatable.VisualProperties.YAxisTitle = "Relative ModelClusters Frequency";
|
---|
| 102 | ModelClustersFrequencyParameter.ActualValue = datatable;
|
---|
| 103 | results.Add(new Result("ModelClusters frequencies", "Relative frequency of ModelClusters references aggregated over the whole population.", datatable));
|
---|
| 104 | }
|
---|
| 105 |
|
---|
| 106 | datatable = ModelClustersFrequencyParameter.ActualValue;
|
---|
| 107 | // all rows must have the same number of values so we can just take the first
|
---|
| 108 | int numberOfValues = datatable.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First();
|
---|
| 109 |
|
---|
[17002] | 110 | foreach (var pair in CalculateModelClustersFrequency(expressions, MapParameter.ActualValue)) {
|
---|
[16734] | 111 | if (!datatable.Rows.ContainsKey(pair.Key)) {
|
---|
| 112 | // initialize a new row for the variable and pad with zeros
|
---|
| 113 | DataRow row = new DataRow(pair.Key, "", Enumerable.Repeat(0.0, numberOfValues));
|
---|
| 114 | row.VisualProperties.StartIndexZero = true;
|
---|
| 115 | datatable.Rows.Add(row);
|
---|
| 116 | }
|
---|
| 117 | datatable.Rows[pair.Key].Values.Add(Math.Round(pair.Value, 3));
|
---|
| 118 | }
|
---|
| 119 |
|
---|
| 120 | // add a zero for each data row that was not modified in the previous loop
|
---|
| 121 | foreach (var row in datatable.Rows.Where(r => r.Values.Count != numberOfValues + 1))
|
---|
| 122 | row.Values.Add(0.0);
|
---|
| 123 |
|
---|
| 124 | return base.Apply();
|
---|
| 125 | }
|
---|
| 126 |
|
---|
[17002] | 127 | public static IEnumerable<KeyValuePair<string, double>> CalculateModelClustersFrequency(IEnumerable<ISymbolicExpressionTree> trees, EMMMapBase<ISymbolicExpressionTree> map) {
|
---|
[16734] | 128 | var modelClustersFrequency = trees
|
---|
[17002] | 129 | .SelectMany(t => GetModelClustersReferences(t, map))
|
---|
[16734] | 130 | .GroupBy(pair => pair.Key, pair => pair.Value)
|
---|
[17002] | 131 | .ToDictionary(g => g.Key, g => g.Sum());
|
---|
[16734] | 132 |
|
---|
| 133 | double totalNumberOfSymbols = modelClustersFrequency.Values.Sum();
|
---|
| 134 |
|
---|
| 135 | foreach (var pair in modelClustersFrequency.OrderBy(p => p.Key, new NaturalStringComparer()))
|
---|
| 136 | yield return new KeyValuePair<string, double>(pair.Key, pair.Value / totalNumberOfSymbols);
|
---|
| 137 | }
|
---|
| 138 |
|
---|
[17002] | 139 | private static IEnumerable<KeyValuePair<string, double>> GetModelClustersReferences(ISymbolicExpressionTree tree, EMMMapBase<ISymbolicExpressionTree> map) {
|
---|
| 140 | Dictionary<string, double> references = new Dictionary<string, double>();
|
---|
| 141 | if (map is EMMIslandMap island) {
|
---|
| 142 | foreach (var treeNode in tree.IterateNodesPrefix().OfType<TreeModelTreeNode>()) {
|
---|
| 143 | string referenceId = "no";
|
---|
| 144 |
|
---|
| 145 | referenceId = "Cluster " + island.ClusterNumber[treeNode.TreeNumber];
|
---|
| 146 | if (references.ContainsKey(referenceId)) {
|
---|
| 147 | int a = (int)references[referenceId];
|
---|
| 148 | a++;
|
---|
| 149 | references[referenceId] = a;
|
---|
| 150 | } else {
|
---|
| 151 | references[referenceId] = 1;
|
---|
| 152 | }
|
---|
| 153 | }
|
---|
| 154 | } else {
|
---|
| 155 | if (map is EMMSucsessMap sMap) {
|
---|
| 156 | for (int i = 0; i < map.ModelSet.Count; i++) {
|
---|
| 157 | string referenceId = "Tree Probability" + i;
|
---|
[17134] | 158 | references[referenceId] = sMap.SelfConfigurationMechanism.Probabilities[i];
|
---|
[17002] | 159 | }
|
---|
[16734] | 160 | } else {
|
---|
[17002] | 161 | string referenceId = "no";
|
---|
| 162 | references[referenceId] = 0;
|
---|
[16734] | 163 | }
|
---|
| 164 | }
|
---|
| 165 | return references;
|
---|
| 166 | }
|
---|
| 167 | }
|
---|
| 168 | }
|
---|