Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
03/07/19 12:29:27 (6 years ago)
Author:
gkronber
Message:

#2866: merged r16364:16653 from trunk to branch to prepare for trunk reintegration (resolving conflicts in the project file)

Location:
branches/2866_SymRegHyperbolicFunctions/HeuristicLab.Problems.DataAnalysis.Symbolic
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/2866_SymRegHyperbolicFunctions/HeuristicLab.Problems.DataAnalysis.Symbolic

  • branches/2866_SymRegHyperbolicFunctions/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashExtensions.cs

    r16305 r16654  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
  • branches/2866_SymRegHyperbolicFunctions/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashUtil.cs

    r16272 r16654  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
  • branches/2866_SymRegHyperbolicFunctions/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs

    r16305 r16654  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2121
    2222using System;
     23using System.Collections.Generic;
    2324using System.Linq;
    2425using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
     
    3839
    3940    private static readonly ISymbolicExpressionTreeNodeComparer comparer = new SymbolicExpressionTreeNodeComparer();
    40 
    41     public static ulong ComputeHash(this ISymbolicExpressionTree tree) {
    42       return ComputeHash(tree.Root.GetSubtree(0).GetSubtree(0));
    43     }
    44 
    45     public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false) {
    46       return ComputeSimilarity(t1.Root.GetSubtree(0).GetSubtree(0), t2.Root.GetSubtree(0).GetSubtree(0), simplify);
    47     }
    48 
    49     public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false) {
    50       HashNode<ISymbolicExpressionTreeNode>[] lhs;
    51       HashNode<ISymbolicExpressionTreeNode>[] rhs;
    52 
     41    private static ISymbolicExpressionTreeNode ActualRoot(this ISymbolicExpressionTree tree) => tree.Root.GetSubtree(0).GetSubtree(0);
     42
     43    #region tree hashing
     44    public static ulong[] Hash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) {
     45      return tree.ActualRoot().Hash(simplify, strict);
     46    }
     47
     48    public static ulong[] Hash(this ISymbolicExpressionTreeNode node, bool simplify = false, bool strict = false) {
    5349      ulong hashFunction(byte[] input) => HashUtil.DJBHash(input);
    5450
    55       if (simplify) {
    56         lhs = t1.MakeNodes().Simplify(hashFunction);
    57         rhs = t2.MakeNodes().Simplify(hashFunction);
    58       } else {
    59         lhs = t1.MakeNodes().Sort(hashFunction); // sort calculates hash values
    60         rhs = t2.MakeNodes().Sort(hashFunction);
    61       }
    62 
    63       var lh = lhs.Select(x => x.CalculatedHashValue).ToArray();
    64       var rh = rhs.Select(x => x.CalculatedHashValue).ToArray();
    65 
    66       Array.Sort(lh);
    67       Array.Sort(rh);
    68 
    69       return ComputeSimilarity(lh, rh);
    70     }
    71 
    72     // this will only work if lh and rh are sorted
    73     private static double ComputeSimilarity(ulong[] lh, ulong[] rh) {
    74       double count = 0;
    75       for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {
    76         var h1 = lh[i];
    77         var h2 = rh[j];
    78         if (h1 == h2) {
    79           ++count;
    80           ++i;
    81           ++j;
    82         } else if (h1 < h2) {
    83           ++i;
    84         } else if (h1 > h2) {
    85           ++j;
    86         }
    87       }
    88 
    89       return 2d * count / (lh.Length + rh.Length);
    90     }
    91 
    92     public static double ComputeAverageSimilarity(ISymbolicExpressionTree[] trees, bool simplify = false, bool strict = false) {
    93       var total = (double)trees.Length * (trees.Length - 1) / 2;
    94       double avg = 0;
    95       var hashes = new ulong[trees.Length][];
    96       // build hash arrays
    97       for (int i = 0; i < trees.Length; ++i) {
    98         var nodes = trees[i].MakeNodes(strict);
    99         hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
    100         Array.Sort(hashes[i]);
    101       }
    102       // compute similarity matrix
    103       for (int i = 0; i < trees.Length - 1; ++i) {
    104         for (int j = i + 1; j < trees.Length; ++j) {
    105           avg += ComputeSimilarity(hashes[i], hashes[j]);
    106         }
    107       }
    108       return avg / total;
    109     }
    110 
    111     public static double[,] ComputeSimilarityMatrix(ISymbolicExpressionTree[] trees, bool simplify = false, bool strict = false) {
    112       var sim = new double[trees.Length, trees.Length];
    113       var hashes = new ulong[trees.Length][];
    114       // build hash arrays
    115       for (int i = 0; i < trees.Length; ++i) {
    116         var nodes = trees[i].MakeNodes(strict);
    117         hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
    118         Array.Sort(hashes[i]);
    119       }
    120       // compute similarity matrix
    121       for (int i = 0; i < trees.Length - 1; ++i) {
    122         for (int j = i + 1; j < trees.Length; ++j) {
    123           sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);
    124         }
    125       }
    126       return sim;
    127     }
    128 
    129     public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool strict = false) {
    130       ulong hashFunction(byte[] input) => HashUtil.JSHash(input);
    131       var hashNodes = treeNode.MakeNodes(strict);
    132       var simplified = hashNodes.Simplify(hashFunction);
    133       return simplified.Last().CalculatedHashValue;
     51      var hashNodes = simplify ? node.MakeNodes(strict).Simplify(hashFunction) : node.MakeNodes(strict).Sort(hashFunction);
     52      var hashes = new ulong[hashNodes.Length];
     53      for (int i = 0; i < hashes.Length; ++i) {
     54        hashes[i] = hashNodes[i].CalculatedHashValue;
     55      }
     56      return hashes;
     57    }
     58
     59    public static ulong ComputeHash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) {
     60      return ComputeHash(tree.ActualRoot(), simplify, strict);
     61    }
     62
     63    public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool simplify = false, bool strict = false) {
     64      return treeNode.Hash(simplify, strict).Last();
    13465    }
    13566
     
    16798
    16899    public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTree tree, bool strict = false) {
    169       return MakeNodes(tree.Root.GetSubtree(0).GetSubtree(0), strict);
     100      return MakeNodes(tree.ActualRoot(), strict);
    170101    }
    171102
     
    173104      return node.IterateNodesPostfix().Select(x => x.ToHashNode(strict)).ToArray().UpdateNodeSizes();
    174105    }
     106    #endregion
     107
     108    #region tree similarity
     109    public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false, bool strict = false) {
     110      return ComputeSimilarity(t1.ActualRoot(), t2.ActualRoot(), simplify, strict);
     111    }
     112
     113    public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false, bool strict = false) {
     114      var lh = t1.Hash(simplify, strict);
     115      var rh = t2.Hash(simplify, strict);
     116
     117      Array.Sort(lh);
     118      Array.Sort(rh);
     119
     120      return ComputeSimilarity(lh, rh);
     121    }
     122
     123    // requires lhs and rhs to be sorted
     124    public static int IntersectCount(this ulong[] lh, ulong[] rh) {
     125      int count = 0;
     126      for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {
     127        var h1 = lh[i];
     128        var h2 = rh[j];
     129        if (h1 == h2) {
     130          ++count;
     131          ++i;
     132          ++j;
     133        } else if (h1 < h2) {
     134          ++i;
     135        } else if (h1 > h2) {
     136          ++j;
     137        }
     138      }
     139      return count;
     140    }
     141
     142    public static IEnumerable<ulong> Intersect(this ulong[] lh, ulong[] rh) {
     143      for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {
     144        var h1 = lh[i];
     145        var h2 = rh[j];
     146        if (h1 == h2) {
     147          yield return h1;
     148          ++i;
     149          ++j;
     150        } else if (h1 < h2) {
     151          ++i;
     152        } else if (h1 > h2) {
     153          ++j;
     154        }
     155      }
     156    }
     157
     158    // this will only work if lh and rh are sorted
     159    public static double ComputeSimilarity(ulong[] lh, ulong[] rh) {
     160      return 2d * IntersectCount(lh, rh) / (lh.Length + rh.Length);
     161    }
     162
     163    public static double ComputeAverageSimilarity(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {
     164      var total = trees.Count * (trees.Count - 1) / 2;
     165      double avg = 0;
     166      var hashes = new ulong[trees.Count][];
     167      // build hash arrays
     168      for (int i = 0; i < trees.Count; ++i) {
     169        var nodes = trees[i].MakeNodes(strict);
     170        hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
     171        Array.Sort(hashes[i]);
     172      }
     173      // compute similarity matrix
     174      for (int i = 0; i < trees.Count - 1; ++i) {
     175        for (int j = i + 1; j < trees.Count; ++j) {
     176          avg += ComputeSimilarity(hashes[i], hashes[j]);
     177        }
     178      }
     179      return avg / total;
     180    }
     181
     182    public static double[,] ComputeSimilarityMatrix(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {
     183      var sim = new double[trees.Count, trees.Count];
     184      var hashes = new ulong[trees.Count][];
     185      // build hash arrays
     186      for (int i = 0; i < trees.Count; ++i) {
     187        var nodes = trees[i].MakeNodes(strict);
     188        hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
     189        Array.Sort(hashes[i]);
     190      }
     191      // compute similarity matrix
     192      for (int i = 0; i < trees.Count - 1; ++i) {
     193        for (int j = i + 1; j < trees.Count; ++j) {
     194          sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);
     195        }
     196      }
     197      return sim;
     198    }
     199    #endregion
    175200
    176201    #region parse a nodes array back into a tree
     
    218243    #region tree simplification
    219244    // these simplification methods rely on the assumption that child nodes of the current node have already been simplified
    220     // (in other words simplification should be applied in a bottom-up fashion) 
     245    // (in other words simplification should be applied in a bottom-up fashion)
    221246    public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) {
    222247      ulong hashFunction(byte[] bytes) => HashUtil.JSHash(bytes);
     
    248273    }
    249274
    250     // simplify multiplications by reducing constants and div terms 
     275    // simplify multiplications by reducing constants and div terms
    251276    public static void SimplifyMultiplication(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
    252277      var node = nodes[i];
Note: See TracChangeset for help on using the changeset viewer.