Changeset 17090


Ignore:
Timestamp:
07/05/19 15:44:04 (2 weeks ago)
Author:
abeham
Message:

#2950: merged revisions 16218, 16252, 16255, 16258, 16259, 16260, 16261, 16263, 16267, 16270, 16271, 16272, 16273, 16284, 16290, 16291, 16302, 16305, 16382, 16478 to stable

Location:
stable
Files:
7 edited
3 copied

Legend:

Unmodified
Added
Removed
  • stable

  • stable/HeuristicLab.Problems.DataAnalysis.Symbolic

  • stable/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisBuildingBlockAnalyzer.cs

    r16255 r17090  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Text;
    2526using HeuristicLab.Analysis;
    2627using HeuristicLab.Common;
     
    3839  public sealed class SymbolicDataAnalysisBuildingBlockAnalyzer : SymbolicDataAnalysisAnalyzer {
    3940    private const string BuildingBlocksResultName = "BuildingBlocks";
     41    private const string SolutionUniquenessResultName = "SolutionUniqueness";
    4042    private const string MinimumSubtreeLengthParameterName = "MinimumSubtreeLength";
    4143    private const string SimplifyTreesParameterName = "SimplifyTrees";
    4244
    43     private readonly InfixExpressionFormatter formatter = new InfixExpressionFormatter();
    44     private Dictionary<int, DataRow> hashToRow = new Dictionary<int, DataRow>();
    45 
     45    private Dictionary<ulong, DataRow> hashToRow = new Dictionary<ulong, DataRow>();
     46
     47    #region parameters
    4648    public IValueLookupParameter<IntValue> MinimumSubtreeLengthParameter {
    4749      get { return (IValueLookupParameter<IntValue>)Parameters[MinimumSubtreeLengthParameterName]; }
     
    5153      get { return (IValueLookupParameter<BoolValue>)Parameters[SimplifyTreesParameterName]; }
    5254    }
    53 
     55    #endregion
     56
     57    #region parameter properties
    5458    public IntValue MinimumSubtreeLength {
    5559      get { return MinimumSubtreeLengthParameter.ActualValue; }
     
    5963      get { return SimplifyTreesParameter.ActualValue; }
    6064    }
     65    #endregion
    6166
    6267    public override void InitializeState() {
    6368      base.InitializeState();
    6469
    65       hashToRow = new Dictionary<int, DataRow>();
    66     }
    67 
     70      hashToRow = new Dictionary<ulong, DataRow>();
     71    }
    6872
    6973    [StorableHook(HookType.AfterDeserialization)]
     
    8589      return new SymbolicDataAnalysisBuildingBlockAnalyzer(this, cloner);
    8690    }
     91
     92    [StorableConstructor]
     93    private SymbolicDataAnalysisBuildingBlockAnalyzer(bool deserializing) : base(deserializing) { }
     94
     95    private readonly Func<byte[], ulong> hashFunction = HashUtil.JSHash;
    8796
    8897    public override IOperation Apply() {
     
    99108      var simplify = SimplifyTrees.Value;
    100109
    101       var expressions = new Dictionary<int, string>();
    102       var expressionCounts = new Dictionary<int, int>();
    103 
    104       int totalCount = 0; // total number of subtrees examined
     110      var expressions = new Dictionary<ulong, string>();
     111      var expressionCounts = new Dictionary<ulong, int>();
     112
     113      int totalCount = 0; // total number of examined subtrees
     114
     115      var hashes = new List<ulong>();
     116      // count hashes
    105117      foreach (var tree in SymbolicExpressionTree) {
    106118        var hashNodes = tree.Root.GetSubtree(0).GetSubtree(0).MakeNodes();
    107         var simplified = simplify ? hashNodes.Simplify() : hashNodes.Sort();
     119        var simplified = simplify ? hashNodes.Simplify(hashFunction) : hashNodes.Sort(hashFunction);
     120        hashes.Add(simplified.Last().CalculatedHashValue); // maybe calculate aggregate hash instead
    108121
    109122        for (int i = 0; i < simplified.Length; i++) {
    110123          HashNode<ISymbolicExpressionTreeNode> s = simplified[i];
    111           if (s.IsChild || s.Size < minLength) {
     124          if (s.IsLeaf || s.Size < minLength) {
    112125            continue;
    113126          }
    114127          ++totalCount;
    115128          var hash = s.CalculatedHashValue;
    116           if (expressions.TryGetValue(hash, out string str)) {
     129          if (expressions.ContainsKey(hash)) {
    117130            expressionCounts[hash]++;
    118           } else {
    119             // set constant and weight values so the tree is formatted nicely by the formatter
    120             var nodes = new HashNode<ISymbolicExpressionTreeNode>[1 + s.Size];
    121             Array.Copy(simplified, i - s.Size, nodes, 0, nodes.Length);
    122             var subtree = nodes.ToSubtree();
    123 
    124             foreach (var node in subtree.IterateNodesPostfix()) {
    125               if (node is ConstantTreeNode constantTreeNode) {
    126                 constantTreeNode.Value = 0;
    127               } else if (node is VariableTreeNode variableTreeNode) {
    128                 variableTreeNode.Weight = 1;
    129               }
    130             }
    131 
    132             expressions[hash] = formatter.Format(subtree);
    133             expressionCounts[hash] = 1;
     131            continue;
    134132          }
     133
     134          var sb = new StringBuilder();
     135          for (int j = i - s.Size; j < i; ++j) {
     136            sb.Append(GetLabel(simplified[j].Data)).Append(" ");
     137          }
     138          sb.Append(GetLabel(simplified[i].Data));
     139          expressions[hash] = sb.ToString();
     140          expressionCounts[hash] = 1;
    135141        }
    136142      }
    137143
    138       var mostCommon = expressionCounts.OrderByDescending(x => x.Value).Take(10).ToList();
    139       var mostCommonLabels = mostCommon.Select(x => expressions[x.Key]).ToList();
    140 
     144      // fill in values for existing rows
    141145      foreach (var t in hashToRow) {
    142146        var hash = t.Key;
    143147        var row = t.Value;
    144148
    145         if (expressionCounts.TryGetValue(hash, out int count)) {
    146           row.Values.Add((double)count / totalCount);
    147         } else {
    148           row.Values.Add(0);
    149         }
     149        expressionCounts.TryGetValue(hash, out int count);
     150        row.Values.Add(count);
    150151      }
    151152
    152153      var nValues = dt.Rows.Any() ? dt.Rows.Max(x => x.Values.Count) : 0;
    153154
    154       for (int i = 0; i < mostCommon.Count; ++i) {
    155         var hash = mostCommon[i].Key;
    156         var count = mostCommon[i].Value;
     155      // check if we have new rows
     156      foreach (var t in expressionCounts.OrderByDescending(x => x.Value).Take(10)) {
     157        var hash = t.Key;
     158        var count = t.Value;
     159        var label = expressions[hash];
    157160
    158161        if (hashToRow.ContainsKey(hash)) {
    159162          continue;
    160163        }
    161         var label = mostCommonLabels[i];
    162164        var row = new DataRow(label) { VisualProperties = { StartIndexZero = true } };
    163         // pad with zeroes
    164         for (int j = 0; j < nValues - 1; ++j) {
    165           row.Values.Add(0);
     165        if (nValues > 0) {
     166          row.Values.AddRange(Enumerable.Repeat<double>(0, nValues - 1)); // pad with zeroes
    166167        }
    167         row.Values.Add((double)count / totalCount);
     168        row.Values.Add(count);
    168169        dt.Rows.Add(row);
    169170        hashToRow[hash] = row;
    170171      }
     172
     173      // compute solution uniqueness
     174      DataTableHistory dth;
     175      var counts = hashes.GroupBy(x => x).ToDictionary(x => x.Key, x => x.Count());
     176      if (!ResultCollection.ContainsKey(SolutionUniquenessResultName)) {
     177        dth = new DataTableHistory();
     178        ResultCollection.Add(new Result(SolutionUniquenessResultName, dth));
     179      } else {
     180        dth = (DataTableHistory)ResultCollection[SolutionUniquenessResultName].Value;
     181      }
     182
     183      var ct = new DataTable("Unique Solutions");
     184      var ctr = new DataRow { VisualProperties = { StartIndexZero = true, ChartType = DataRowVisualProperties.DataRowChartType.Columns } };
     185      ctr.Values.AddRange(hashes.Select(x => (double)counts[x]).OrderByDescending(x => x));
     186      ct.Rows.Add(ctr);
     187      dth.Add(ct);
     188
     189      var max = dth.Max(x => x.Rows.First().Values.Max());
     190      foreach (var table in dth) {
     191        table.VisualProperties.YAxisMinimumAuto = false;
     192        table.VisualProperties.YAxisMaximumAuto = false;
     193        table.VisualProperties.YAxisMinimumFixedValue = 0;
     194        table.VisualProperties.YAxisMaximumFixedValue = max;
     195      }
     196
    171197      return base.Apply();
     198    }
     199
     200    private static string GetLabel(ISymbolicExpressionTreeNode node) {
     201      if (node is ConstantTreeNode constant) {
     202        return "C";
     203      }
     204      if (node is VariableTreeNode variable) {
     205        return variable.VariableName;
     206      }
     207      if (node.Symbol is Addition) {
     208        return "+";
     209      }
     210      if (node.Symbol is Subtraction) {
     211        return "-";
     212      }
     213      if (node.Symbol is Multiplication) {
     214        return "*";
     215      }
     216      if (node.Symbol is Division) {
     217        return "/";
     218      }
     219      return node.Symbol.ToString();
    172220    }
    173221  }
  • stable/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Crossovers/SymbolicDataAnalysisExpressionDiversityPreservingCrossover.cs

    r16263 r17090  
    2020    private const string WindowingParameterName = "Windowing";
    2121    private const string ProportionalSamplingParameterName = "ProportionalSampling";
     22
     23    private static readonly Func<byte[], ulong> hashFunction = HashUtil.JSHash;
    2224
    2325    #region Parameter Properties
     
    7375      var leafCrossoverPointProbability = 1 - internalCrossoverPointProbability;
    7476
    75       var nodes0 = ActualRoot(parent0).MakeNodes().Sort();
    76       var nodes1 = ActualRoot(parent1).MakeNodes().Sort();
     77      var nodes0 = ActualRoot(parent0).MakeNodes().Sort(hashFunction);
     78      var nodes1 = ActualRoot(parent1).MakeNodes().Sort(hashFunction);
    7779
    7880      IList<HashNode<ISymbolicExpressionTreeNode>> sampled0;
     
    8183      if (proportionalSampling) {
    8284        var p = internalCrossoverPointProbability;
    83         var weights0 = nodes0.Select(x => x.IsChild ? 1 - p : p);
     85        var weights0 = nodes0.Select(x => x.IsLeaf ? 1 - p : p);
    8486        sampled0 = nodes0.SampleProportionalWithoutRepetition(random, nodes0.Length, weights0, windowing: windowing).ToArray();
    8587
    86         var weights1 = nodes1.Select(x => x.IsChild ? 1 - p : p);
     88        var weights1 = nodes1.Select(x => x.IsLeaf ? 1 - p : p);
    8789        sampled1 = nodes1.SampleProportionalWithoutRepetition(random, nodes1.Length, weights1, windowing: windowing).ToArray();
    8890      } else {
     
    133135
    134136      if (chooseInternal) {
    135         list.AddRange(nodes.Where(x => !x.IsChild && x.Data.Parent != null));
     137        list.AddRange(nodes.Where(x => !x.IsLeaf && x.Data.Parent != null));
    136138      }
    137139      if (!chooseInternal || list.Count == 0) {
    138         list.AddRange(nodes.Where(x => x.IsChild && x.Data.Parent != null));
     140        list.AddRange(nodes.Where(x => x.IsLeaf && x.Data.Parent != null));
    139141      }
    140142
  • stable/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashExtensions.cs

    r16218 r17090  
    3232
    3333      public bool Enabled;
    34       public int HashValue;           // the initial (fixed) hash value for this individual node/data
    35       public int CalculatedHashValue; // the calculated hash value (taking into account the children hash values)
    36 
    37       public Action<HashNode<T>[], int> Simplify;
     34      public ulong HashValue;           // the initial (fixed) hash value for this individual node/data
     35      public ulong CalculatedHashValue; // the calculated hash value (taking into account the children hash values)
     36
     37      public delegate void SimplifyAction(ref HashNode<T>[] nodes, int i);
     38      public SimplifyAction Simplify;
     39
    3840      public IComparer<T> Comparer;
    3941
    40       public bool IsChild => Arity == 0;
     42      public bool IsLeaf => Arity == 0;
    4143
    4244      public HashNode(IComparer<T> comparer) {
     
    6769
    6870      public override int GetHashCode() {
    69         return CalculatedHashValue;
     71        return (int)CalculatedHashValue;
    7072      }
    7173
     
    7981    }
    8082
    81     public static int ComputeHash<T>(this HashNode<T>[] nodes, int i) where T : class {
     83    public static ulong ComputeHash<T>(this HashNode<T>[] nodes, int i, Func<byte[], ulong> hashFunction) where T : class {
    8284      var node = nodes[i];
    83       var hashes = new int[node.Arity + 1];
    84       int idx = 0;
    85       foreach (int c in nodes.IterateChildren(i)) {
    86         hashes[idx++] = nodes[c].CalculatedHashValue;
    87       }
    88       hashes[idx] = node.HashValue;
    89       return HashUtil.JSHash(hashes);
    90     }
    91 
    92     public static HashNode<T>[] Simplify<T>(this HashNode<T>[] nodes) where T : class {
    93       var reduced = nodes.Reduce();
    94       reduced.Sort();
     85      const int size = sizeof(ulong);
     86      var hashes = new ulong[node.Arity + 1];
     87      var bytes = new byte[(node.Arity + 1) * size];
     88
     89      for (int j = i - 1, k = 0; k < node.Arity; ++k, j -= 1 + nodes[j].Size) {
     90        hashes[k] = nodes[j].CalculatedHashValue;
     91      }
     92      hashes[node.Arity] = node.HashValue;
     93      Buffer.BlockCopy(hashes, 0, bytes, 0, bytes.Length);
     94      return hashFunction(bytes);
     95    }
     96
     97    // set the enabled state for the whole subtree rooted at this node
     98    public static void SetEnabled<T>(this HashNode<T>[] nodes, int i, bool enabled) where T : class {
     99      nodes[i].Enabled = enabled;
     100      for (int j = i - nodes[i].Size; j < i; ++j)
     101        nodes[j].Enabled = enabled;
     102    }
     103
     104    public static HashNode<T>[] Simplify<T>(this HashNode<T>[] nodes, Func<byte[], ulong> hashFunction) where T : class {
     105      var reduced = nodes.UpdateNodeSizes().Reduce().Sort(hashFunction);
    95106
    96107      for (int i = 0; i < reduced.Length; ++i) {
    97108        var node = reduced[i];
    98         if (node.IsChild) {
    99           continue;
    100         }
    101         node.Simplify?.Invoke(reduced, i);
     109        if (node.IsLeaf) {
     110          continue;
     111        }
     112        node.Simplify?.Invoke(ref reduced, i);
    102113      }
    103114      // detect if anything was simplified
     
    117128        }
    118129      }
    119       simplified.UpdateNodeSizes();
    120       simplified.Sort();
    121       return simplified;
    122     }
    123 
    124     public static void Sort<T>(this HashNode<T>[] nodes) where T : class {
     130      return simplified.UpdateNodeSizes().Reduce().Sort(hashFunction);
     131    }
     132
     133    public static HashNode<T>[] Sort<T>(this HashNode<T>[] nodes, Func<byte[], ulong> hashFunction) where T : class {
     134      int sort(int a, int b) => nodes[a].CompareTo(nodes[b]);
     135
    125136      for (int i = 0; i < nodes.Length; ++i) {
    126137        var node = nodes[i];
    127138
    128         if (node.IsChild) {
     139        if (node.IsLeaf) {
    129140          continue;
    130141        }
     
    138149          } else { // i have some non-terminal children
    139150            var sorted = new HashNode<T>[size];
    140             var indices = nodes.IterateChildren(i);
    141             Array.Sort(indices, (a, b) => nodes[a].CompareTo(nodes[b]));
     151            var indices = new int[node.Arity];
     152            for (int j = i - 1, k = 0; k < node.Arity; j -= 1 + nodes[j].Size, ++k) {
     153              indices[k] = j;
     154            }
     155            Array.Sort(indices, sort);
    142156
    143157            int idx = 0;
    144158            foreach (var j in indices) {
    145159              var child = nodes[j];
    146               if (!child.IsChild) { // must copy complete subtree
     160              if (!child.IsLeaf) { // must copy complete subtree
    147161                Array.Copy(nodes, j - child.Size, sorted, idx, child.Size);
    148162                idx += child.Size;
     
    153167          }
    154168        }
    155         node.CalculatedHashValue = nodes.ComputeHash(i);
    156       }
     169        node.CalculatedHashValue = nodes.ComputeHash(i, hashFunction);
     170      }
     171      return nodes;
    157172    }
    158173
    159174    /// <summary>
    160     /// Get a function node's child indices from left to right
     175    /// Get a function node's child indicest
    161176    /// </summary>
    162     /// <typeparam name="T"></typeparam>
    163     /// <param name="nodes"></param>
    164     /// <param name="i"></param>
     177    /// <typeparam name="T">The data type encapsulated by a hash node</typeparam>
     178    /// <param name="nodes">An array of hash nodes with up-to-date node sizes</param>
     179    /// <param name="i">The index in the array of hash nodes of the node whose children we want to iterate</param>
    165180    /// <returns>An array containing child indices</returns>
    166181    public static int[] IterateChildren<T>(this HashNode<T>[] nodes, int i) where T : class {
     
    170185      var idx = i - 1;
    171186      for (int j = 0; j < arity; ++j) {
    172         while (!nodes[idx].Enabled) { --idx; } // skip nodes possibly disabled by simplification
    173187        children[j] = idx;
    174188        idx -= 1 + nodes[idx].Size;
     
    177191    }
    178192
    179     public static void UpdateNodeSizes<T>(this HashNode<T>[] nodes) where T : class {
     193    public static HashNode<T>[] UpdateNodeSizes<T>(this HashNode<T>[] nodes) where T : class {
    180194      for (int i = 0; i < nodes.Length; ++i) {
    181195        var node = nodes[i];
    182         if (node.IsChild) {
     196        if (node.IsLeaf) {
    183197          node.Size = 0;
    184198          continue;
    185199        }
    186200        node.Size = node.Arity;
    187         foreach (int j in nodes.IterateChildren(i)) {
     201
     202        for (int j = i - 1, k = 0; k < node.Arity; j -= 1 + nodes[j].Size, ++k) {
    188203          node.Size += nodes[j].Size;
    189204        }
    190205      }
    191     }
    192 
    193     /// <summary>
    194     /// Reduce nodes of the same type according to arithmetic rules
    195     /// </summary>
    196     /// <typeparam name="T"></typeparam>
    197     /// <param name="g">The given grammar (controls symbol arities)</param>
    198     /// <param name="nodes">The node array</param>
    199     /// <param name="i">Parent index</param>
    200     /// <param name="j">Child index</param>
    201     private static void Reduce<T>(this HashNode<T>[] nodes, int i, int j) where T : class {
    202       var p = nodes[i]; // parent node
    203       var c = nodes[j]; // child node
    204 
    205       if (c.IsChild)
    206         return;
    207 
    208       foreach (int k in nodes.IterateChildren(j)) {
    209         if (nodes[k].IsChild) continue;
    210         nodes.Reduce(j, k);
    211       }
    212 
    213       // handle commutative symbols (add, mul)
    214       if (p.IsCommutative && p.HashValue == c.HashValue) {
    215         c.Enabled = false;
    216         p.Arity += c.Arity - 1;
    217       }
    218     }
    219 
    220     public static HashNode<T>[] Reduce<T>(this HashNode<T>[] nodes) where T : class {
    221       nodes.UpdateNodeSizes();
    222       int i = nodes.Length - 1;
    223       foreach (int c in nodes.IterateChildren(i)) {
    224         if (nodes[c].IsChild) continue;
    225         nodes.Reduce(i, c);
    226       }
     206      return nodes;
     207    }
     208
     209    private static HashNode<T>[] Reduce<T>(this HashNode<T>[] nodes) where T : class {
    227210      int count = 0;
    228       foreach (var node in nodes) {
    229         if (!node.Enabled) { ++count; }
     211      for (int i = 0; i < nodes.Length; ++i) {
     212        var node = nodes[i];
     213        if (node.IsLeaf || !node.IsCommutative) {
     214          continue;
     215        }
     216
     217        var arity = node.Arity;
     218        for (int j = i - 1, k = 0; k < arity; j -= 1 + nodes[j].Size, ++k) {
     219          if (node.HashValue == nodes[j].HashValue) {
     220            nodes[j].Enabled = false;
     221            node.Arity += nodes[j].Arity - 1;
     222            ++count;
     223          }
     224        }
    230225      }
    231226      if (count == 0)
     
    233228
    234229      var reduced = new HashNode<T>[nodes.Length - count];
    235       i = 0;
     230      var idx = 0;
    236231      foreach (var node in nodes) {
    237         if (node.Enabled) { reduced[i++] = node; }
    238       }
    239       reduced.UpdateNodeSizes();
    240       return reduced;
     232        if (node.Enabled) { reduced[idx++] = node; }
     233      }
     234      return reduced.UpdateNodeSizes();
    241235    }
    242236  }
  • stable/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashUtil.cs

    r16218 r17090  
    2020#endregion
    2121
     22
    2223using System;
    2324using System.Security.Cryptography;
     
    2829
    2930    // A simple hash function from Robert Sedgwicks Algorithms in C book.I've added some simple optimizations to the algorithm in order to speed up its hashing process.
    30     public static int RSHash(int[] input) {
     31    public static ulong RSHash(byte[] input) {
    3132      const int b = 378551;
    32       int a = 63689;
    33       int hash = 0;
     33      ulong a = 63689;
     34      ulong hash = 0;
    3435
    3536      foreach (var v in input) {
     
    4142
    4243    // A bitwise hash function written by Justin Sobel
    43     public static int JSHash(int[] input) {
    44       int hash = 1315423911;
     44    public static ulong JSHash(byte[] input) {
     45      ulong hash = 1315423911;
    4546      for (int i = 0; i < input.Length; ++i)
    4647        hash ^= (hash << 5) + input[i] + (hash >> 2);
     
    4950
    5051    // This hash function comes from Brian Kernighan and Dennis Ritchie's book "The C Programming Language". It is a simple hash function using a strange set of possible seeds which all constitute a pattern of 31....31...31 etc, it seems to be very similar to the DJB hash function.
    51     public static int BKDRHash(int[] input) {
    52       const int seed = 131;
    53       int hash = 0;
     52    public static ulong BKDRHash(byte[] input) {
     53      ulong seed = 131;
     54      ulong hash = 0;
    5455      foreach (var v in input) {
    5556        hash = (hash * seed) + v;
     
    5960
    6061    // This is the algorithm of choice which is used in the open source SDBM project. The hash function seems to have a good over-all distribution for many different data sets. It seems to work well in situations where there is a high variance in the MSBs of the elements in a data set.
    61     public static int SDBMHash(int[] input) {
    62       int hash = 0;
     62    public static ulong SDBMHash(byte[] input) {
     63      ulong hash = 0;
    6364      foreach (var v in input) {
    6465        hash = v + (hash << 6) + (hash << 16) - hash;
     
    6869
    6970    // An algorithm produced by Professor Daniel J. Bernstein and shown first to the world on the usenet newsgroup comp.lang.c. It is one of the most efficient hash functions ever published.
    70     public static int DJBHash(int[] input) {
    71       int hash = 5381;
     71    public static ulong DJBHash(byte[] input) {
     72      ulong hash = 5381;
    7273      foreach (var v in input) {
    7374        hash = (hash << 5) + hash + v;
     
    7778
    7879    // An algorithm proposed by Donald E.Knuth in The Art Of Computer Programming Volume 3, under the topic of sorting and search chapter 6.4.
    79     public static int DEKHash(int[] input) {
    80       int hash = input.Length;
     80    public static ulong DEKHash(byte[] input) {
     81      ulong hash = (ulong)input.Length;
    8182      foreach (var v in input) {
    8283        hash = (hash << 5) ^ (hash >> 27) ^ v;
     
    8586    }
    8687
    87     public static int CryptoHash(HashAlgorithm ha, int[] input) {
    88       return BitConverter.ToInt32(ha.ComputeHash(input.ToByteArray()), 0);
    89     }
    90 
    91     public static byte[] ToByteArray(this int[] input) {
    92       const int size = sizeof(int);
    93       var bytes = new byte[input.Length * sizeof(int)];
    94       for (int i = 0; i < input.Length; ++i) {
    95         Array.Copy(BitConverter.GetBytes(bytes[i]), 0, bytes, i * size, size);
    96       }
    97       return bytes;
     88    public static ulong CryptoHash(HashAlgorithm ha, byte[] input) {
     89      return BitConverter.ToUInt64(ha.ComputeHash(input), 0);
    9890    }
    9991  }
  • stable/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs

    r16218 r17090  
    1 using System.Collections.Generic;
     1#region License Information
     2/* HeuristicLab
     3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     4 *
     5 * This file is part of HeuristicLab.
     6 *
     7 * HeuristicLab is free software: you can redistribute it and/or modify
     8 * it under the terms of the GNU General Public License as published by
     9 * the Free Software Foundation, either version 3 of the License, or
     10 * (at your option) any later version.
     11 *
     12 * HeuristicLab is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 * GNU General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU General Public License
     18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
     19 */
     20#endregion
     21
     22using System;
     23using System.Collections.Generic;
    224using System.Linq;
    325using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
     
    1739
    1840    private static readonly ISymbolicExpressionTreeNodeComparer comparer = new SymbolicExpressionTreeNodeComparer();
    19 
    20     public static int ComputeHash(this ISymbolicExpressionTree tree) {
    21       return ComputeHash(tree.Root.GetSubtree(0).GetSubtree(0));
    22     }
    23 
    24     public static Dictionary<ISymbolicExpressionTreeNode, int> ComputeNodeHashes(this ISymbolicExpressionTree tree) {
    25       var root = tree.Root.GetSubtree(0).GetSubtree(0);
    26       var nodes = root.MakeNodes();
    27       nodes.UpdateNodeSizes();
    28 
    29       for (int i = 0; i < nodes.Length; ++i) {
    30         if (nodes[i].IsChild)
    31           continue;
    32         nodes[i].CalculatedHashValue = nodes.ComputeHash(i);
    33       }
    34       return nodes.ToDictionary(x => x.Data, x => x.CalculatedHashValue);
    35     }
    36 
    37     public static int ComputeHash(this ISymbolicExpressionTreeNode treeNode) {
    38       var hashNodes = treeNode.MakeNodes();
    39       var simplified = hashNodes.Simplify();
    40       return ComputeHash(simplified);
    41     }
    42 
    43     public static int ComputeHash(this HashNode<ISymbolicExpressionTreeNode>[] nodes) {
    44       int hash = 1315423911;
    45       foreach (var node in nodes)
    46         hash ^= (hash << 5) + node.CalculatedHashValue + (hash >> 2);
    47       return hash;
    48     }
    49 
    50     public static HashNode<ISymbolicExpressionTreeNode> ToHashNode(this ISymbolicExpressionTreeNode node) {
     41    private static ISymbolicExpressionTreeNode ActualRoot(this ISymbolicExpressionTree tree) => tree.Root.GetSubtree(0).GetSubtree(0);
     42
     43    #region tree hashing
     44    public static ulong[] Hash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) {
     45      return tree.ActualRoot().Hash(simplify, strict);
     46    }
     47
     48    public static ulong[] Hash(this ISymbolicExpressionTreeNode node, bool simplify = false, bool strict = false) {
     49      ulong hashFunction(byte[] input) => HashUtil.DJBHash(input);
     50
     51      var hashNodes = simplify ? node.MakeNodes(strict).Simplify(hashFunction) : node.MakeNodes(strict).Sort(hashFunction);
     52      var hashes = new ulong[hashNodes.Length];
     53      for (int i = 0; i < hashes.Length; ++i) {
     54        hashes[i] = hashNodes[i].CalculatedHashValue;
     55      }
     56      return hashes;
     57    }
     58
     59    public static ulong ComputeHash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) {
     60      return ComputeHash(tree.ActualRoot(), simplify, strict);
     61    }
     62
     63    public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool simplify = false, bool strict = false) {
     64      return treeNode.Hash(simplify, strict).Last();
     65    }
     66
     67    public static HashNode<ISymbolicExpressionTreeNode> ToHashNode(this ISymbolicExpressionTreeNode node, bool strict = false) {
    5168      var symbol = node.Symbol;
    5269      var name = symbol.Name;
    53       if (symbol is Variable) {
    54         var variableTreeNode = (VariableTreeNode)node;
    55         name = variableTreeNode.VariableName;
    56       }
    57       var hash = name.GetHashCode();
     70      if (node is ConstantTreeNode constantNode) {
     71        name = strict ? constantNode.Value.ToString() : symbol.Name;
     72      } else if (node is VariableTreeNode variableNode) {
     73        name = strict ? variableNode.Weight.ToString() + variableNode.VariableName : variableNode.VariableName;
     74      }
     75      var hash = (ulong)name.GetHashCode();
    5876      var hashNode = new HashNode<ISymbolicExpressionTreeNode>(comparer) {
    5977        Data = node,
     
    7997    }
    8098
    81     public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTreeNode node) {
    82       return node.IterateNodesPostfix().Select(ToHashNode).ToArray();
    83     }
     99    public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTree tree, bool strict = false) {
     100      return MakeNodes(tree.ActualRoot(), strict);
     101    }
     102
     103    public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTreeNode node, bool strict = false) {
     104      return node.IterateNodesPostfix().Select(x => x.ToHashNode(strict)).ToArray().UpdateNodeSizes();
     105    }
     106    #endregion
     107
     108    #region tree similarity
     109    public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false, bool strict = false) {
     110      return ComputeSimilarity(t1.ActualRoot(), t2.ActualRoot(), simplify, strict);
     111    }
     112
     113    public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false, bool strict = false) {
     114      var lh = t1.Hash(simplify, strict);
     115      var rh = t2.Hash(simplify, strict);
     116
     117      Array.Sort(lh);
     118      Array.Sort(rh);
     119
     120      return ComputeSimilarity(lh, rh);
     121    }
     122
     123    // requires lhs and rhs to be sorted
     124    public static int IntersectCount(this ulong[] lh, ulong[] rh) {
     125      int count = 0;
     126      for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {
     127        var h1 = lh[i];
     128        var h2 = rh[j];
     129        if (h1 == h2) {
     130          ++count;
     131          ++i;
     132          ++j;
     133        } else if (h1 < h2) {
     134          ++i;
     135        } else if (h1 > h2) {
     136          ++j;
     137        }
     138      }
     139      return count;
     140    }
     141
     142    public static IEnumerable<ulong> Intersect(this ulong[] lh, ulong[] rh) {
     143      for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {
     144        var h1 = lh[i];
     145        var h2 = rh[j];
     146        if (h1 == h2) {
     147          yield return h1;
     148          ++i;
     149          ++j;
     150        } else if (h1 < h2) {
     151          ++i;
     152        } else if (h1 > h2) {
     153          ++j;
     154        }
     155      }
     156    }
     157
     158    // this will only work if lh and rh are sorted
     159    public static double ComputeSimilarity(ulong[] lh, ulong[] rh) {
     160      return 2d * IntersectCount(lh, rh) / (lh.Length + rh.Length);
     161    }
     162
     163    public static double ComputeAverageSimilarity(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {
     164      var total = trees.Count * (trees.Count - 1) / 2;
     165      double avg = 0;
     166      var hashes = new ulong[trees.Count][];
     167      // build hash arrays
     168      for (int i = 0; i < trees.Count; ++i) {
     169        var nodes = trees[i].MakeNodes(strict);
     170        hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
     171        Array.Sort(hashes[i]);
     172      }
     173      // compute similarity matrix
     174      for (int i = 0; i < trees.Count - 1; ++i) {
     175        for (int j = i + 1; j < trees.Count; ++j) {
     176          avg += ComputeSimilarity(hashes[i], hashes[j]);
     177        }
     178      }
     179      return avg / total;
     180    }
     181
     182    public static double[,] ComputeSimilarityMatrix(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {
     183      var sim = new double[trees.Count, trees.Count];
     184      var hashes = new ulong[trees.Count][];
     185      // build hash arrays
     186      for (int i = 0; i < trees.Count; ++i) {
     187        var nodes = trees[i].MakeNodes(strict);
     188        hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
     189        Array.Sort(hashes[i]);
     190      }
     191      // compute similarity matrix
     192      for (int i = 0; i < trees.Count - 1; ++i) {
     193        for (int j = i + 1; j < trees.Count; ++j) {
     194          sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);
     195        }
     196      }
     197      return sim;
     198    }
     199    #endregion
    84200
    85201    #region parse a nodes array back into a tree
     
    98214        var node = nodes[i];
    99215
    100         if (node.IsChild) {
     216        if (node.IsLeaf) {
    101217          if (node.Data is VariableTreeNode variable) {
    102218            var variableTreeNode = (VariableTreeNode)treeNodes[i];
    103219            variableTreeNode.VariableName = variable.VariableName;
    104             variableTreeNode.Weight = 1;
     220            variableTreeNode.Weight = variable.Weight;
    105221          } else if (node.Data is ConstantTreeNode @const) {
    106222            var constantTreeNode = (ConstantTreeNode)treeNodes[i];
     
    127243    #region tree simplification
    128244    // these simplification methods rely on the assumption that child nodes of the current node have already been simplified
    129     // (in other words simplification should be applied in a bottom-up fashion) 
     245    // (in other words simplification should be applied in a bottom-up fashion)
    130246    public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) {
     247      ulong hashFunction(byte[] bytes) => HashUtil.JSHash(bytes);
    131248      var root = tree.Root.GetSubtree(0).GetSubtree(0);
    132249      var nodes = root.MakeNodes();
    133       var simplified = nodes.Simplify();
     250      var simplified = nodes.Simplify(hashFunction);
    134251      return simplified.ToTree();
    135252    }
    136253
    137     public static void SimplifyAddition(HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
     254    public static void SimplifyAddition(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
    138255      // simplify additions of terms by eliminating terms with the same symbol and hash
    139256      var children = nodes.IterateChildren(i);
    140257
     258      // we always assume the child nodes are sorted
    141259      var curr = children[0];
    142260      var node = nodes[i];
     
    144262      foreach (var j in children.Skip(1)) {
    145263        if (nodes[j] == nodes[curr]) {
    146           for (int k = j - nodes[j].Size; k <= j; ++k) {
    147             nodes[k].Enabled = false;
    148           }
     264          nodes.SetEnabled(j, false);
    149265          node.Arity--;
    150266        } else {
     
    157273    }
    158274
    159     // simplify multiplications by reducing constants and div terms 
    160     public static void SimplifyMultiplication(HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
     275    // simplify multiplications by reducing constants and div terms
     276    public static void SimplifyMultiplication(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
    161277      var node = nodes[i];
    162278      var children = nodes.IterateChildren(i);
     
    174290            var d = children[k];
    175291            if (nodes[d].Data.Symbol is Constant) {
    176               ((ConstantTreeNode)child.Data).Value *= ((ConstantTreeNode)nodes[d].Data).Value;
    177292              nodes[d].Enabled = false;
    178293              node.Arity--;
     
    207322    }
    208323
    209     public static void SimplifyDivision(HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
     324    public static void SimplifyDivision(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
    210325      var node = nodes[i];
    211326      var children = nodes.IterateChildren(i);
    212327
    213       if (children.All(x => nodes[x].Data.Symbol is Constant)) {
     328      var tmp = nodes;
     329
     330      if (children.All(x => tmp[x].Data.Symbol is Constant)) {
    214331        var v = ((ConstantTreeNode)nodes[children.First()].Data).Value;
    215332        if (node.Arity == 1) {
     
    242359    }
    243360
    244     public static void SimplifyUnaryNode(HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
     361    public static void SimplifyUnaryNode(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
    245362      // check if the child of the unary node is a constant, then the whole node can be simplified
    246363      var parent = nodes[i];
     
    257374    }
    258375
    259     public static void SimplifyBinaryNode(HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
     376    public static void SimplifyBinaryNode(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
    260377      var children = nodes.IterateChildren(i);
    261       if (children.All(x => nodes[x].Data.Symbol is Constant)) {
     378      var tmp = nodes;
     379      if (children.All(x => tmp[x].Data.Symbol is Constant)) {
    262380        foreach (var j in children) {
    263381          nodes[j].Enabled = false;
  • stable/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.csproj

    r17072 r17090  
    128128  <ItemGroup>
    129129    <Compile Include="Analyzers\SymbolicDataAnalysisBottomUpDiversityAnalyzer.cs" />
     130    <Compile Include="Analyzers\SymbolicDataAnalysisBuildingBlockAnalyzer.cs" />
    130131    <Compile Include="Analyzers\SymbolicDataAnalysisSingleObjectivePruningAnalyzer.cs" />
    131132    <Compile Include="Analyzers\SymbolicDataAnalysisSingleObjectiveValidationParetoBestSolutionAnalyzer.cs" />
     
    146147    <Compile Include="Converters\DerivativeCalculator.cs" />
    147148    <Compile Include="Converters\TreeToAutoDiffTermConverter.cs" />
     149    <Compile Include="Crossovers\SymbolicDataAnalysisExpressionDiversityPreservingCrossover.cs" />
    148150    <Compile Include="Formatters\InfixExpressionFormatter.cs" />
    149151    <Compile Include="Formatters\TSQLExpressionFormatter.cs" />
    150152    <Compile Include="Formatters\SymbolicDataAnalysisExpressionMathematicaFormatter.cs" />
    151153    <Compile Include="Formatters\SymbolicDataAnalysisExpressionCSharpFormatter.cs" />
     154    <Compile Include="Hashing\HashExtensions.cs" />
     155    <Compile Include="Hashing\HashUtil.cs" />
     156    <Compile Include="Hashing\SymbolicExpressionTreeHash.cs" />
    152157    <Compile Include="Importer\InfixExpressionParser.cs" />
    153158    <Compile Include="Importer\SymbolicExpressionImporter.cs" />
  • stable/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisModel.cs

    r17054 r17090  
    118118      ConstantTreeNode alphaTreeNode = null;
    119119      ConstantTreeNode betaTreeNode = null;
    120       // check if model has been scaled previously by analyzing the structure of the tree
     120      // check if model has a structure that can be re-used for scaling
    121121      var startNode = SymbolicExpressionTree.Root.GetSubtree(0);
    122       if (startNode.GetSubtree(0).Symbol is Addition) {
    123         var addNode = startNode.GetSubtree(0);
    124         if (addNode.SubtreeCount == 2 && addNode.GetSubtree(0).Symbol is Multiplication && addNode.GetSubtree(1).Symbol is Constant) {
    125           alphaTreeNode = addNode.GetSubtree(1) as ConstantTreeNode;
    126           var mulNode = addNode.GetSubtree(0);
    127           if (mulNode.SubtreeCount == 2 && mulNode.GetSubtree(1).Symbol is Constant) {
    128             betaTreeNode = mulNode.GetSubtree(1) as ConstantTreeNode;
    129           }
     122      var addNode = startNode.GetSubtree(0);
     123      if (addNode.Symbol is Addition && addNode.SubtreeCount == 2) {
     124        alphaTreeNode = (ConstantTreeNode)addNode.Subtrees.LastOrDefault(n => n is ConstantTreeNode);
     125        var mulNode = addNode.Subtrees.FirstOrDefault(n => n.Symbol is Multiplication);
     126        if (mulNode != null) {
     127          betaTreeNode = (ConstantTreeNode)mulNode.Subtrees.LastOrDefault(n => n is ConstantTreeNode);
    130128        }
    131129      }
Note: See TracChangeset for help on using the changeset viewer.