Ignore:
Timestamp:
08/07/19 13:32:09 (6 weeks ago)
Author:
mkommend
Message:

#2974: Merged trunk changes into branch.

Location:
branches/2974_Constants_Optimization/HeuristicLab.Problems.DataAnalysis.Symbolic
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/2974_Constants_Optimization/HeuristicLab.Problems.DataAnalysis.Symbolic

  • branches/2974_Constants_Optimization/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs

    r16676 r17193  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    3838    private static readonly Constant constant = new Constant();
    3939
    40     private static readonly ISymbolicExpressionTreeNodeComparer comparer = new SymbolicExpressionTreeNodeComparer();
    4140    private static ISymbolicExpressionTreeNode ActualRoot(this ISymbolicExpressionTree tree) => tree.Root.GetSubtree(0).GetSubtree(0);
     41    public static ulong HashFunction(byte[] input) => HashUtil.DJBHash(input);
    4242
    4343    #region tree hashing
     
    4747
    4848    public static ulong[] Hash(this ISymbolicExpressionTreeNode node, bool simplify = false, bool strict = false) {
    49       ulong hashFunction(byte[] input) => HashUtil.DJBHash(input);
    50 
    51       var hashNodes = simplify ? node.MakeNodes(strict).Simplify(hashFunction) : node.MakeNodes(strict).Sort(hashFunction);
     49      var hashNodes = simplify ? node.MakeNodes(strict).Simplify(HashFunction) : node.MakeNodes(strict).Sort(HashFunction);
    5250      var hashes = new ulong[hashNodes.Length];
    5351      for (int i = 0; i < hashes.Length; ++i) {
     
    7472      }
    7573      var hash = (ulong)name.GetHashCode();
    76       var hashNode = new HashNode<ISymbolicExpressionTreeNode>(comparer) {
     74      var hashNode = new HashNode<ISymbolicExpressionTreeNode> {
    7775        Data = node,
    7876        Arity = node.SubtreeCount,
     
    168166      for (int i = 0; i < trees.Count; ++i) {
    169167        var nodes = trees[i].MakeNodes(strict);
    170         hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
     168        hashes[i] = (simplify ? nodes.Simplify(HashFunction) : nodes.Sort(HashFunction)).Select(x => x.CalculatedHashValue).ToArray();
    171169        Array.Sort(hashes[i]);
    172170      }
     
    180178    }
    181179
     180    public static double[,] ComputeSimilarityMatrix(IList<ulong[]> hashes) {
     181      // compute similarity matrix
     182      var n = hashes.Count;
     183      var sim = new double[n, n];
     184      for (int i = 0; i < n - 1; ++i) {
     185        for (int j = i + 1; j < n; ++j) {
     186          sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);
     187        }
     188      }
     189      return sim;
     190    }
     191
    182192    public static double[,] ComputeSimilarityMatrix(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {
    183       var sim = new double[trees.Count, trees.Count];
    184193      var hashes = new ulong[trees.Count][];
    185194      // build hash arrays
    186195      for (int i = 0; i < trees.Count; ++i) {
    187196        var nodes = trees[i].MakeNodes(strict);
    188         hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
     197        hashes[i] = (simplify ? nodes.Simplify(HashFunction) : nodes.Sort(HashFunction)).Select(x => x.CalculatedHashValue).ToArray();
    189198        Array.Sort(hashes[i]);
    190199      }
    191       // compute similarity matrix
    192       for (int i = 0; i < trees.Count - 1; ++i) {
    193         for (int j = i + 1; j < trees.Count; ++j) {
    194           sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);
    195         }
    196       }
    197       return sim;
     200      return ComputeSimilarityMatrix(hashes);
    198201    }
    199202    #endregion
     
    245248    // (in other words simplification should be applied in a bottom-up fashion)
    246249    public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) {
    247       ulong hashFunction(byte[] bytes) => HashUtil.JSHash(bytes);
    248       var root = tree.Root.GetSubtree(0).GetSubtree(0);
    249       var nodes = root.MakeNodes();
    250       var simplified = nodes.Simplify(hashFunction);
    251       return simplified.ToTree();
     250      return tree.MakeNodes().Simplify(HashFunction).ToTree();
    252251    }
    253252
     
    286285
    287286        var symbol = child.Data.Symbol;
    288         if (symbol is Constant) {
     287        if (child.Data is ConstantTreeNode firstConst) {
     288          // fold sibling constant nodes into the first constant
    289289          for (int k = j + 1; k < children.Length; ++k) {
    290             var d = children[k];
    291             if (nodes[d].Data.Symbol is Constant) {
    292               nodes[d].Enabled = false;
     290            var sibling = nodes[children[k]];
     291            if (sibling.Data is ConstantTreeNode otherConst) {
     292              sibling.Enabled = false;
    293293              node.Arity--;
     294              firstConst.Value *= otherConst.Value;
     295            } else {
     296              break;
     297            }
     298          }
     299        } else if (child.Data is VariableTreeNode variable) {
     300          // fold sibling constant nodes into the variable weight
     301          for (int k = j + 1; k < children.Length; ++k) {
     302            var sibling = nodes[children[k]];
     303            if (sibling.Data is ConstantTreeNode constantNode) {
     304              sibling.Enabled = false;
     305              node.Arity--;
     306              variable.Weight *= constantNode.Value;
    294307            } else {
    295308              break;
     
    297310          }
    298311        } else if (symbol is Division) {
    299           var div = nodes[c];
    300           var denominator =
    301             div.Arity == 1 ?
    302             nodes[c - 1] :                    // 1 / x is expressed as div(x) (with a single child)
    303             nodes[c - nodes[c - 1].Size - 2]; // assume division always has arity 1 or 2
    304 
    305           foreach (var d in children) {
    306             if (nodes[d].Enabled && nodes[d] == denominator) {
    307               nodes[c].Enabled = nodes[d].Enabled = denominator.Enabled = false;
     312          // 1/x is expressed as div(x) (with a single child)
     313          // we assume division always has arity 1 or 2
     314          var d = child.Arity == 1 ? c - 1 : c - nodes[c - 1].Size - 2;
     315          var denominator = nodes[d];
     316
     317          // iterate children of node i to see if any of them matches the denominator of div node c
     318          for (int k = 0; k < children.Length; ++k) {
     319            var sibling = nodes[children[k]];
     320            if (sibling.Enabled && sibling == denominator) {
     321              nodes.SetEnabled(children[j], false); // disable the div subtree
     322              nodes.SetEnabled(children[k], false); // disable the sibling matching the denominator
     323
    308324              node.Arity -= 2; // matching child + division node
    309325              break;
Note: See TracChangeset for help on using the changeset viewer.