Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
11/19/18 14:34:25 (5 years ago)
Author:
lkammere
Message:

#2915: Merge changes in the trunk to current HEAD into the branch.

Location:
branches/2915-AbsoluteSymbol/HeuristicLab.Problems.DataAnalysis.Symbolic
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/2915-AbsoluteSymbol/HeuristicLab.Problems.DataAnalysis.Symbolic

  • branches/2915-AbsoluteSymbol/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs

    r16218 r16304  
    1 using System.Collections.Generic;
     1#region License Information
     2/* HeuristicLab
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     4 *
     5 * This file is part of HeuristicLab.
     6 *
     7 * HeuristicLab is free software: you can redistribute it and/or modify
     8 * it under the terms of the GNU General Public License as published by
     9 * the Free Software Foundation, either version 3 of the License, or
     10 * (at your option) any later version.
     11 *
     12 * HeuristicLab is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 * GNU General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU General Public License
     18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
     19 */
     20#endregion
     21
     22using System;
    223using System.Linq;
    324using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
     
    1839    private static readonly ISymbolicExpressionTreeNodeComparer comparer = new SymbolicExpressionTreeNodeComparer();
    1940
    20     public static int ComputeHash(this ISymbolicExpressionTree tree) {
     41    public static ulong ComputeHash(this ISymbolicExpressionTree tree) {
    2142      return ComputeHash(tree.Root.GetSubtree(0).GetSubtree(0));
    2243    }
    2344
    24     public static Dictionary<ISymbolicExpressionTreeNode, int> ComputeNodeHashes(this ISymbolicExpressionTree tree) {
    25       var root = tree.Root.GetSubtree(0).GetSubtree(0);
    26       var nodes = root.MakeNodes();
    27       nodes.UpdateNodeSizes();
    28 
    29       for (int i = 0; i < nodes.Length; ++i) {
    30         if (nodes[i].IsChild)
    31           continue;
    32         nodes[i].CalculatedHashValue = nodes.ComputeHash(i);
    33       }
    34       return nodes.ToDictionary(x => x.Data, x => x.CalculatedHashValue);
    35     }
    36 
    37     public static int ComputeHash(this ISymbolicExpressionTreeNode treeNode) {
    38       var hashNodes = treeNode.MakeNodes();
    39       var simplified = hashNodes.Simplify();
    40       return ComputeHash(simplified);
    41     }
    42 
    43     public static int ComputeHash(this HashNode<ISymbolicExpressionTreeNode>[] nodes) {
    44       int hash = 1315423911;
    45       foreach (var node in nodes)
    46         hash ^= (hash << 5) + node.CalculatedHashValue + (hash >> 2);
    47       return hash;
    48     }
    49 
    50     public static HashNode<ISymbolicExpressionTreeNode> ToHashNode(this ISymbolicExpressionTreeNode node) {
     45    public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false) {
     46      return ComputeSimilarity(t1.Root.GetSubtree(0).GetSubtree(0), t2.Root.GetSubtree(0).GetSubtree(0), simplify);
     47    }
     48
     49    public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false) {
     50      HashNode<ISymbolicExpressionTreeNode>[] lhs;
     51      HashNode<ISymbolicExpressionTreeNode>[] rhs;
     52
     53      ulong hashFunction(byte[] input) => HashUtil.DJBHash(input);
     54
     55      if (simplify) {
     56        lhs = t1.MakeNodes().Simplify(hashFunction);
     57        rhs = t2.MakeNodes().Simplify(hashFunction);
     58      } else {
     59        lhs = t1.MakeNodes().Sort(hashFunction); // sort calculates hash values
     60        rhs = t2.MakeNodes().Sort(hashFunction);
     61      }
     62
     63      var lh = lhs.Select(x => x.CalculatedHashValue).ToArray();
     64      var rh = rhs.Select(x => x.CalculatedHashValue).ToArray();
     65
     66      Array.Sort(lh);
     67      Array.Sort(rh);
     68
     69      return ComputeSimilarity(lh, rh);
     70    }
     71
     72    // this will only work if lh and rh are sorted
     73    private static double ComputeSimilarity(ulong[] lh, ulong[] rh) {
     74      double count = 0;
     75      for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {
     76        var h1 = lh[i];
     77        var h2 = rh[j];
     78        if (h1 == h2) {
     79          ++count;
     80          ++i;
     81          ++j;
     82        } else if (h1 < h2) {
     83          ++i;
     84        } else if (h1 > h2) {
     85          ++j;
     86        }
     87      }
     88
     89      return 2d * count / (lh.Length + rh.Length);
     90    }
     91
     92    public static double ComputeAverageSimilarity(ISymbolicExpressionTree[] trees, bool simplify = false, bool strict = false) {
     93      var total = (double)trees.Length * (trees.Length - 1) / 2;
     94      double avg = 0;
     95      var hashes = new ulong[trees.Length][];
     96      // build hash arrays
     97      for (int i = 0; i < trees.Length; ++i) {
     98        var nodes = trees[i].MakeNodes(strict);
     99        hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
     100        Array.Sort(hashes[i]);
     101      }
     102      // compute similarity matrix
     103      for (int i = 0; i < trees.Length - 1; ++i) {
     104        for (int j = i + 1; j < trees.Length; ++j) {
     105          avg += ComputeSimilarity(hashes[i], hashes[j]);
     106        }
     107      }
     108      return avg / total;
     109    }
     110
     111    public static double[,] ComputeSimilarityMatrix(ISymbolicExpressionTree[] trees, bool simplify = false, bool strict = false) {
     112      var sim = new double[trees.Length, trees.Length];
     113      var hashes = new ulong[trees.Length][];
     114      // build hash arrays
     115      for (int i = 0; i < trees.Length; ++i) {
     116        var nodes = trees[i].MakeNodes(strict);
     117        hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
     118        Array.Sort(hashes[i]);
     119      }
     120      // compute similarity matrix
     121      for (int i = 0; i < trees.Length - 1; ++i) {
     122        for (int j = i + 1; j < trees.Length; ++j) {
     123          sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);
     124        }
     125      }
     126      return sim;
     127    }
     128
     129    public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool strict = false) {
     130      ulong hashFunction(byte[] input) => HashUtil.JSHash(input);
     131      var hashNodes = treeNode.MakeNodes(strict);
     132      var simplified = hashNodes.Simplify(hashFunction);
     133      return simplified.Last().CalculatedHashValue;
     134    }
     135
     136    public static HashNode<ISymbolicExpressionTreeNode> ToHashNode(this ISymbolicExpressionTreeNode node, bool strict = false) {
    51137      var symbol = node.Symbol;
    52138      var name = symbol.Name;
    53       if (symbol is Variable) {
    54         var variableTreeNode = (VariableTreeNode)node;
    55         name = variableTreeNode.VariableName;
    56       }
    57       var hash = name.GetHashCode();
     139      if (node is ConstantTreeNode constantNode) {
     140        name = strict ? constantNode.Value.ToString() : symbol.Name;
     141      } else if (node is VariableTreeNode variableNode) {
     142        name = strict ? variableNode.Weight.ToString() + variableNode.VariableName : variableNode.VariableName;
     143      }
     144      var hash = (ulong)name.GetHashCode();
    58145      var hashNode = new HashNode<ISymbolicExpressionTreeNode>(comparer) {
    59146        Data = node,
     
    79166    }
    80167
    81     public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTreeNode node) {
    82       return node.IterateNodesPostfix().Select(ToHashNode).ToArray();
     168    public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTree tree, bool strict = false) {
     169      return MakeNodes(tree.Root.GetSubtree(0).GetSubtree(0), strict);
     170    }
     171
     172    public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTreeNode node, bool strict = false) {
     173      return node.IterateNodesPostfix().Select(x => x.ToHashNode(strict)).ToArray().UpdateNodeSizes();
    83174    }
    84175
     
    98189        var node = nodes[i];
    99190
    100         if (node.IsChild) {
     191        if (node.IsLeaf) {
    101192          if (node.Data is VariableTreeNode variable) {
    102193            var variableTreeNode = (VariableTreeNode)treeNodes[i];
    103194            variableTreeNode.VariableName = variable.VariableName;
    104             variableTreeNode.Weight = 1;
     195            variableTreeNode.Weight = variable.Weight;
    105196          } else if (node.Data is ConstantTreeNode @const) {
    106197            var constantTreeNode = (ConstantTreeNode)treeNodes[i];
     
    129220    // (in other words simplification should be applied in a bottom-up fashion)
    130221    public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) {
     222      ulong hashFunction(byte[] bytes) => HashUtil.JSHash(bytes);
    131223      var root = tree.Root.GetSubtree(0).GetSubtree(0);
    132224      var nodes = root.MakeNodes();
    133       var simplified = nodes.Simplify();
     225      var simplified = nodes.Simplify(hashFunction);
    134226      return simplified.ToTree();
    135227    }
     
    174266            var d = children[k];
    175267            if (nodes[d].Data.Symbol is Constant) {
    176               ((ConstantTreeNode)child.Data).Value *= ((ConstantTreeNode)nodes[d].Data).Value;
    177268              nodes[d].Enabled = false;
    178269              node.Arity--;
Note: See TracChangeset for help on using the changeset viewer.