Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/07/19 13:32:09 (5 years ago)
Author:
mkommend
Message:

#2974: Merged trunk changes into branch.

Location:
branches/2974_Constants_Optimization/HeuristicLab.Problems.DataAnalysis.Symbolic
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/2974_Constants_Optimization/HeuristicLab.Problems.DataAnalysis.Symbolic

  • branches/2974_Constants_Optimization/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashExtensions.cs

    r16676 r17193  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2121
    2222using System;
    23 using System.Collections.Generic;
     23using System.Linq;
    2424
    2525namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
    2626  public static class SymbolicExpressionHashExtensions {
     27    /// <summary>
     28    /// Holds data that is necessary to handle tree nodes in hashing / simplification.
     29    /// </summary>
     30    /// <typeparam name="T">The tree node type</typeparam>
    2731    public sealed class HashNode<T> : IComparable<HashNode<T>>, IEquatable<HashNode<T>> where T : class {
    2832      public T Data;
     
    3842      public SimplifyAction Simplify;
    3943
    40       public IComparer<T> Comparer;
    41 
    4244      public bool IsLeaf => Arity == 0;
    4345
    44       public HashNode(IComparer<T> comparer) {
    45         Comparer = comparer;
    46       }
    47 
    48       private HashNode() { }
    49 
    5046      public int CompareTo(HashNode<T> other) {
    51         var res = Comparer.Compare(Data, other.Data);
     47        var res = HashValue.CompareTo(other.HashValue);
    5248        return res == 0 ? CalculatedHashValue.CompareTo(other.CalculatedHashValue) : res;
    5349      }
     
    10399
    104100    public static HashNode<T>[] Simplify<T>(this HashNode<T>[] nodes, Func<byte[], ulong> hashFunction) where T : class {
    105       var reduced = nodes.UpdateNodeSizes().Reduce().Sort(hashFunction);
    106 
    107       for (int i = 0; i < reduced.Length; ++i) {
    108         var node = reduced[i];
    109         if (node.IsLeaf) {
    110           continue;
    111         }
    112         node.Simplify?.Invoke(ref reduced, i);
    113       }
    114       // detect if anything was simplified
    115       var count = 0;
    116       foreach (var node in reduced) {
    117         if (!node.Enabled) { ++count; }
    118       }
    119       if (count == 0) {
    120         return reduced;
    121       }
    122 
    123       var simplified = new HashNode<T>[reduced.Length - count];
    124       int j = 0;
    125       foreach (var node in reduced) {
    126         if (node.Enabled) {
    127           simplified[j++] = node;
    128         }
    129       }
    130       return simplified.UpdateNodeSizes().Reduce().Sort(hashFunction);
     101      bool simplified = false;
     102      nodes = nodes.UpdateNodeSizes().Reduce().Sort(hashFunction);
     103      do {
     104        if (simplified) {
     105          simplified = false;
     106          nodes = nodes.Where(x => x.Enabled).ToArray().UpdateNodeSizes().Reduce().Sort(hashFunction);
     107        }
     108
     109        for (int i = 0; i < nodes.Length; ++i) {
     110          var node = nodes[i];
     111          if (node.IsLeaf) {
     112            continue;
     113          }
     114          node.Simplify?.Invoke(ref nodes, i);
     115          for (int j = i - node.Size; j <= i; ++j) {
     116            // detect if anything was simplified
     117            if (!nodes[j].Enabled) {
     118              simplified = true;
     119              break;
     120            }
     121          }
     122        }
     123      } while (simplified);
     124      return nodes.UpdateNodeSizes().Sort(hashFunction);
    131125    }
    132126
     
    173167
    174168    /// <summary>
    175     /// Get a function node's child indicest
     169    /// Get a function node's child indices
    176170    /// </summary>
    177171    /// <typeparam name="T">The data type encapsulated by a hash node</typeparam>
    178     /// <param name="nodes">An array of hash nodes with up-to-date node sizes</param>
     172    /// <param name="nodes">An array of hash nodes with up-to-date node sizes (see UpdateNodeSizes)</param>
    179173    /// <param name="i">The index in the array of hash nodes of the node whose children we want to iterate</param>
    180174    /// <returns>An array containing child indices</returns>
     
    191185    }
    192186
     187    /// <summary>
     188    /// Determines size of each branch and sets the results for each node.
     189    /// </summary>
     190    /// <typeparam name="T">The data type encapsulated by a hash node</typeparam>
     191    /// <param name="nodes">An array of hash nodes in postfix order.</param>
     192    /// <returns>The array with updated node sizes. The array is not copied.</returns>
    193193    public static HashNode<T>[] UpdateNodeSizes<T>(this HashNode<T>[] nodes) where T : class {
    194194      for (int i = 0; i < nodes.Length; ++i) {
     
    199199        }
    200200        node.Size = node.Arity;
    201 
     201        // visit all children and sum up their size (assumes postfix order).
    202202        for (int j = i - 1, k = 0; k < node.Arity; j -= 1 + nodes[j].Size, ++k) {
    203203          node.Size += nodes[j].Size;
     
    207207    }
    208208
    209     private static HashNode<T>[] Reduce<T>(this HashNode<T>[] nodes) where T : class {
     209    // disables duplicate branches and removes the disabled nodes
     210    public static HashNode<T>[] Reduce<T>(this HashNode<T>[] nodes) where T : class {
    210211      int count = 0;
    211212      for (int i = 0; i < nodes.Length; ++i) {
  • branches/2974_Constants_Optimization/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashUtil.cs

    r16676 r17193  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
  • branches/2974_Constants_Optimization/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs

    r16676 r17193  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    3838    private static readonly Constant constant = new Constant();
    3939
    40     private static readonly ISymbolicExpressionTreeNodeComparer comparer = new SymbolicExpressionTreeNodeComparer();
    4140    private static ISymbolicExpressionTreeNode ActualRoot(this ISymbolicExpressionTree tree) => tree.Root.GetSubtree(0).GetSubtree(0);
     41    public static ulong HashFunction(byte[] input) => HashUtil.DJBHash(input);
    4242
    4343    #region tree hashing
     
    4747
    4848    public static ulong[] Hash(this ISymbolicExpressionTreeNode node, bool simplify = false, bool strict = false) {
    49       ulong hashFunction(byte[] input) => HashUtil.DJBHash(input);
    50 
    51       var hashNodes = simplify ? node.MakeNodes(strict).Simplify(hashFunction) : node.MakeNodes(strict).Sort(hashFunction);
     49      var hashNodes = simplify ? node.MakeNodes(strict).Simplify(HashFunction) : node.MakeNodes(strict).Sort(HashFunction);
    5250      var hashes = new ulong[hashNodes.Length];
    5351      for (int i = 0; i < hashes.Length; ++i) {
     
    7472      }
    7573      var hash = (ulong)name.GetHashCode();
    76       var hashNode = new HashNode<ISymbolicExpressionTreeNode>(comparer) {
     74      var hashNode = new HashNode<ISymbolicExpressionTreeNode> {
    7775        Data = node,
    7876        Arity = node.SubtreeCount,
     
    168166      for (int i = 0; i < trees.Count; ++i) {
    169167        var nodes = trees[i].MakeNodes(strict);
    170         hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
     168        hashes[i] = (simplify ? nodes.Simplify(HashFunction) : nodes.Sort(HashFunction)).Select(x => x.CalculatedHashValue).ToArray();
    171169        Array.Sort(hashes[i]);
    172170      }
     
    180178    }
    181179
     180    public static double[,] ComputeSimilarityMatrix(IList<ulong[]> hashes) {
     181      // compute similarity matrix
     182      var n = hashes.Count;
     183      var sim = new double[n, n];
     184      for (int i = 0; i < n - 1; ++i) {
     185        for (int j = i + 1; j < n; ++j) {
     186          sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);
     187        }
     188      }
     189      return sim;
     190    }
     191
    182192    public static double[,] ComputeSimilarityMatrix(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {
    183       var sim = new double[trees.Count, trees.Count];
    184193      var hashes = new ulong[trees.Count][];
    185194      // build hash arrays
    186195      for (int i = 0; i < trees.Count; ++i) {
    187196        var nodes = trees[i].MakeNodes(strict);
    188         hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
     197        hashes[i] = (simplify ? nodes.Simplify(HashFunction) : nodes.Sort(HashFunction)).Select(x => x.CalculatedHashValue).ToArray();
    189198        Array.Sort(hashes[i]);
    190199      }
    191       // compute similarity matrix
    192       for (int i = 0; i < trees.Count - 1; ++i) {
    193         for (int j = i + 1; j < trees.Count; ++j) {
    194           sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);
    195         }
    196       }
    197       return sim;
     200      return ComputeSimilarityMatrix(hashes);
    198201    }
    199202    #endregion
     
    245248    // (in other words simplification should be applied in a bottom-up fashion)
    246249    public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) {
    247       ulong hashFunction(byte[] bytes) => HashUtil.JSHash(bytes);
    248       var root = tree.Root.GetSubtree(0).GetSubtree(0);
    249       var nodes = root.MakeNodes();
    250       var simplified = nodes.Simplify(hashFunction);
    251       return simplified.ToTree();
     250      return tree.MakeNodes().Simplify(HashFunction).ToTree();
    252251    }
    253252
     
    286285
    287286        var symbol = child.Data.Symbol;
    288         if (symbol is Constant) {
     287        if (child.Data is ConstantTreeNode firstConst) {
     288          // fold sibling constant nodes into the first constant
    289289          for (int k = j + 1; k < children.Length; ++k) {
    290             var d = children[k];
    291             if (nodes[d].Data.Symbol is Constant) {
    292               nodes[d].Enabled = false;
     290            var sibling = nodes[children[k]];
     291            if (sibling.Data is ConstantTreeNode otherConst) {
     292              sibling.Enabled = false;
    293293              node.Arity--;
     294              firstConst.Value *= otherConst.Value;
     295            } else {
     296              break;
     297            }
     298          }
     299        } else if (child.Data is VariableTreeNode variable) {
     300          // fold sibling constant nodes into the variable weight
     301          for (int k = j + 1; k < children.Length; ++k) {
     302            var sibling = nodes[children[k]];
     303            if (sibling.Data is ConstantTreeNode constantNode) {
     304              sibling.Enabled = false;
     305              node.Arity--;
     306              variable.Weight *= constantNode.Value;
    294307            } else {
    295308              break;
     
    297310          }
    298311        } else if (symbol is Division) {
    299           var div = nodes[c];
    300           var denominator =
    301             div.Arity == 1 ?
    302             nodes[c - 1] :                    // 1 / x is expressed as div(x) (with a single child)
    303             nodes[c - nodes[c - 1].Size - 2]; // assume division always has arity 1 or 2
    304 
    305           foreach (var d in children) {
    306             if (nodes[d].Enabled && nodes[d] == denominator) {
    307               nodes[c].Enabled = nodes[d].Enabled = denominator.Enabled = false;
     312          // 1/x is expressed as div(x) (with a single child)
     313          // we assume division always has arity 1 or 2
     314          var d = child.Arity == 1 ? c - 1 : c - nodes[c - 1].Size - 2;
     315          var denominator = nodes[d];
     316
     317          // iterate children of node i to see if any of them matches the denominator of div node c
     318          for (int k = 0; k < children.Length; ++k) {
     319            var sibling = nodes[children[k]];
     320            if (sibling.Enabled && sibling == denominator) {
     321              nodes.SetEnabled(children[j], false); // disable the div subtree
     322              nodes.SetEnabled(children[k], false); // disable the sibling matching the denominator
     323
    308324              node.Arity -= 2; // matching child + division node
    309325              break;
Note: See TracChangeset for help on using the changeset viewer.