Changeset 16258
- Timestamp:
- 10/25/18 13:37:03 (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisBuildingBlockAnalyzer.cs
r16255 r16258 20 20 #endregion 21 21 22 using System;23 22 using System.Collections.Generic; 24 23 using System.Linq; 24 using System.Text; 25 25 using HeuristicLab.Analysis; 26 26 using HeuristicLab.Common; … … 41 41 private const string SimplifyTreesParameterName = "SimplifyTrees"; 42 42 43 private readonly InfixExpressionFormatter formatter = new InfixExpressionFormatter();44 43 private Dictionary<int, DataRow> hashToRow = new Dictionary<int, DataRow>(); 45 44 45 #region parameters 46 46 public IValueLookupParameter<IntValue> MinimumSubtreeLengthParameter { 47 47 get { return (IValueLookupParameter<IntValue>)Parameters[MinimumSubtreeLengthParameterName]; } … … 51 51 get { return (IValueLookupParameter<BoolValue>)Parameters[SimplifyTreesParameterName]; } 52 52 } 53 #endregion 53 54 55 #region parameter properties 54 56 public IntValue MinimumSubtreeLength { 55 57 get { return MinimumSubtreeLengthParameter.ActualValue; } … … 59 61 get { return SimplifyTreesParameter.ActualValue; } 60 62 } 63 #endregion 61 64 62 65 public override void InitializeState() { … … 65 68 hashToRow = new Dictionary<int, DataRow>(); 66 69 } 67 68 70 69 71 [StorableHook(HookType.AfterDeserialization)] … … 102 104 var expressionCounts = new Dictionary<int, int>(); 103 105 104 int totalCount = 0; // total number of subtrees examined 106 int totalCount = 0; // total number of examined subtrees 107 108 // count hashes 105 109 foreach (var tree in SymbolicExpressionTree) { 106 110 var hashNodes = tree.Root.GetSubtree(0).GetSubtree(0).MakeNodes(); … … 114 118 ++totalCount; 115 119 var hash = s.CalculatedHashValue; 116 if (expressions. TryGetValue(hash, out string str)) {120 if (expressions.ContainsKey(hash)) { 117 121 expressionCounts[hash]++; 118 } else { 119 // set constant and weight values so the tree is formatted nicely by the formatter 120 var nodes = new HashNode<ISymbolicExpressionTreeNode>[1 + s.Size]; 121 Array.Copy(simplified, i - s.Size, nodes, 0, nodes.Length); 122 var subtree = nodes.ToSubtree(); 122 continue; 123 } 123 124 124 foreach (var node in subtree.IterateNodesPostfix()) { 125 if (node is ConstantTreeNode constantTreeNode) { 126 constantTreeNode.Value = 0; 127 } else if (node is VariableTreeNode variableTreeNode) { 128 variableTreeNode.Weight = 1; 129 } 130 } 131 132 expressions[hash] = formatter.Format(subtree); 133 expressionCounts[hash] = 1; 125 var sb = new StringBuilder(); 126 for (int j = i - s.Size; j < i; ++j) { 127 sb.Append(GetLabel(simplified[j].Data)).Append(" "); 134 128 } 129 sb.Append(GetLabel(simplified[i].Data)); 130 expressions[hash] = sb.ToString(); 131 expressionCounts[hash] = 1; 135 132 } 136 133 } 137 134 138 var mostCommon = expressionCounts.OrderByDescending(x => x.Value).Take(10).ToList(); 139 var mostCommonLabels = mostCommon.Select(x => expressions[x.Key]).ToList(); 140 135 // fill in values for existing rows 141 136 foreach (var t in hashToRow) { 142 137 var hash = t.Key; 143 138 var row = t.Value; 144 139 145 if (expressionCounts.TryGetValue(hash, out int count)) { 146 row.Values.Add((double)count / totalCount); 147 } else { 148 row.Values.Add(0); 149 } 140 expressionCounts.TryGetValue(hash, out int count); 141 row.Values.Add(count); 150 142 } 151 143 152 144 var nValues = dt.Rows.Any() ? dt.Rows.Max(x => x.Values.Count) : 0; 153 145 154 for (int i = 0; i < mostCommon.Count; ++i) { 155 var hash = mostCommon[i].Key; 156 var count = mostCommon[i].Value; 146 // check if we have new rows 147 foreach (var t in expressionCounts.OrderByDescending(x => x.Value).Take(10)) { 148 var hash = t.Key; 149 var count = t.Value; 150 var label = expressions[hash]; 157 151 158 152 if (hashToRow.ContainsKey(hash)) { 159 153 continue; 160 154 } 161 var label = mostCommonLabels[i];162 155 var row = new DataRow(label) { VisualProperties = { StartIndexZero = true } }; 163 // pad with zeroes 164 for (int j = 0; j < nValues - 1; ++j) { 165 row.Values.Add(0); 156 if (nValues > 0) { 157 row.Values.AddRange(Enumerable.Repeat<double>(0, nValues - 1)); // pad with zeroes 166 158 } 167 row.Values.Add( (double)count / totalCount);159 row.Values.Add(count); 168 160 dt.Rows.Add(row); 169 161 hashToRow[hash] = row; 170 162 } 163 171 164 return base.Apply(); 165 } 166 167 private static string GetLabel(ISymbolicExpressionTreeNode node) { 168 if (node is ConstantTreeNode constant) { 169 return "C"; 170 } 171 if (node is VariableTreeNode variable) { 172 return variable.VariableName; 173 } 174 if (node.Symbol is Addition) { 175 return "+"; 176 } 177 if (node.Symbol is Subtraction) { 178 return "-"; 179 } 180 if (node.Symbol is Multiplication) { 181 return "*"; 182 } 183 if (node.Symbol is Division) { 184 return "/"; 185 } 186 return node.Symbol.ToString(); 172 187 } 173 188 }
Note: See TracChangeset
for help on using the changeset viewer.