Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
07/11/19 15:32:31 (6 years ago)
Author:
bburlacu
Message:

#2950: Small refactor in HashExtensions.cs (better comparison, consistent use of the same hash function). Fix bug and improve robustness of SimplifyMultiplication in SymbolicExpressionHashExtensions.cs

Location:
trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashExtensions.cs

    r17076 r17132  
    4545
    4646      public int CompareTo(HashNode<T> other) {
    47         return CalculatedHashValue.CompareTo(other.CalculatedHashValue);
     47        var res = HashValue.CompareTo(other.HashValue);
     48        return res == 0 ? CalculatedHashValue.CompareTo(other.CalculatedHashValue) : res;
    4849      }
    4950
     
    112113          }
    113114          node.Simplify?.Invoke(ref nodes, i);
    114           for (int j = i - node.Size; j < i; ++j) {
     115          for (int j = i - node.Size; j <= i; ++j) {
    115116            // detect if anything was simplified
    116117            if (!nodes[j].Enabled) {
  • trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs

    r17076 r17132  
    3939
    4040    private static ISymbolicExpressionTreeNode ActualRoot(this ISymbolicExpressionTree tree) => tree.Root.GetSubtree(0).GetSubtree(0);
     41    public static ulong HashFunction(byte[] input) => HashUtil.DJBHash(input);
    4142
    4243    #region tree hashing
     
    4647
    4748    public static ulong[] Hash(this ISymbolicExpressionTreeNode node, bool simplify = false, bool strict = false) {
    48       ulong hashFunction(byte[] input) => HashUtil.DJBHash(input);
    49 
    50       var hashNodes = simplify ? node.MakeNodes(strict).Simplify(hashFunction) : node.MakeNodes(strict).Sort(hashFunction); // simplify sorts implicitly
     49      var hashNodes = simplify ? node.MakeNodes(strict).Simplify(HashFunction) : node.MakeNodes(strict).Sort(HashFunction);
    5150      var hashes = new ulong[hashNodes.Length];
    5251      for (int i = 0; i < hashes.Length; ++i) {
     
    167166      for (int i = 0; i < trees.Count; ++i) {
    168167        var nodes = trees[i].MakeNodes(strict);
    169         hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
     168        hashes[i] = (simplify ? nodes.Simplify(HashFunction) : nodes.Sort(HashFunction)).Select(x => x.CalculatedHashValue).ToArray();
    170169        Array.Sort(hashes[i]);
    171170      }
     
    179178    }
    180179
     180    public static double[,] ComputeSimilarityMatrix(IList<ulong[]> hashes) {
     181      // compute similarity matrix
     182      var n = hashes.Count;
     183      var sim = new double[n, n];
     184      for (int i = 0; i < n - 1; ++i) {
     185        for (int j = i + 1; j < n; ++j) {
     186          sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);
     187        }
     188      }
     189      return sim;
     190    }
     191
    181192    public static double[,] ComputeSimilarityMatrix(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {
    182       var sim = new double[trees.Count, trees.Count];
    183193      var hashes = new ulong[trees.Count][];
    184194      // build hash arrays
    185195      for (int i = 0; i < trees.Count; ++i) {
    186196        var nodes = trees[i].MakeNodes(strict);
    187         hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
     197        hashes[i] = (simplify ? nodes.Simplify(HashFunction) : nodes.Sort(HashFunction)).Select(x => x.CalculatedHashValue).ToArray();
    188198        Array.Sort(hashes[i]);
    189199      }
    190       // compute similarity matrix
    191       for (int i = 0; i < trees.Count - 1; ++i) {
    192         for (int j = i + 1; j < trees.Count; ++j) {
    193           sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);
    194         }
    195       }
    196       return sim;
     200      return ComputeSimilarityMatrix(hashes);
    197201    }
    198202    #endregion
     
    210214      var treeNodes = nodes.Select(x => x.Data.Symbol.CreateTreeNode()).ToArray();
    211215
    212       // construct tree top down (assumes postfix order for nodes)
    213216      for (int i = nodes.Length - 1; i >= 0; --i) {
    214217        var node = nodes[i];
     
    245248    // (in other words simplification should be applied in a bottom-up fashion)
    246249    public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) {
    247       ulong hashFunction(byte[] bytes) => HashUtil.DJBHash(bytes);
    248       var root = tree.ActualRoot();
    249       var nodes = root.MakeNodes();
    250       var simplified = nodes.Simplify(hashFunction);
    251       return simplified.ToTree();
     250      return tree.MakeNodes().Simplify(HashFunction).ToTree();
    252251    }
    253252
     
    286285
    287286        var symbol = child.Data.Symbol;
    288         if (symbol is Constant) {
     287        if (child.Data is ConstantTreeNode firstConst) {
     288          // fold sibling constant nodes into the first constant
    289289          for (int k = j + 1; k < children.Length; ++k) {
    290             var d = children[k];
    291             if (nodes[d].Data.Symbol is Constant) {
    292               nodes[d].Enabled = false;
     290            var sibling = nodes[children[k]];
     291            if (sibling.Data is ConstantTreeNode otherConst) {
     292              sibling.Enabled = false;
    293293              node.Arity--;
     294              firstConst.Value *= otherConst.Value;
     295            } else {
     296              break;
     297            }
     298          }
     299        } else if (child.Data is VariableTreeNode variable) {
     300          // fold sibling constant nodes into the variable weight
     301          for (int k = j + 1; k < children.Length; ++k) {
     302            var sibling = nodes[children[k]];
     303            if (sibling.Data is ConstantTreeNode constantNode) {
     304              sibling.Enabled = false;
     305              node.Arity--;
     306              variable.Weight *= constantNode.Value;
    294307            } else {
    295308              break;
     
    297310          }
    298311        } else if (symbol is Division) {
    299           var div = nodes[c];
    300           var denominator =
    301             div.Arity == 1 ?
    302             nodes[c - 1] :                    // 1 / x is expressed as div(x) (with a single child)
    303             nodes[c - nodes[c - 1].Size - 2]; // assume division always has arity 1 or 2
    304 
    305           foreach (var d in children) {
    306             if (nodes[d].Enabled && nodes[d] == denominator) {
    307               nodes[c].Enabled = nodes[d].Enabled = denominator.Enabled = false;
     312          // 1/x is expressed as div(x) (with a single child)
     313          // we assume division always has arity 1 or 2
     314          var d = child.Arity == 1 ? c - 1 : c - nodes[c - 1].Size - 2;
     315          var denominator = nodes[d];
     316
     317          // iterate children of node i to see if any of them matches the denominator of div node c
     318          for (int k = 0; k < children.Length; ++k) {
     319            var sibling = nodes[children[k]];
     320            if (sibling.Enabled && sibling == denominator) {
     321              nodes.SetEnabled(children[j], false); // disable the div subtree
     322              nodes.SetEnabled(children[k], false); // disable the sibling matching the denominator
     323
    308324              node.Arity -= 2; // matching child + division node
    309325              break;
     
    370386        nodes[i].Enabled = false;
    371387      } else if ((parentSymbol is Exponential && childSymbol is Logarithm) || (parentSymbol is Logarithm && childSymbol is Exponential)) {
    372         // exp(log(x)) == x only for positive x. We consider this as equivalent for hashing anyway.
    373388        child.Enabled = parent.Enabled = false;
    374389      }
Note: See TracChangeset for help on using the changeset viewer.