Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
12/09/14 17:00:53 (9 years ago)
Author:
mkommend
Message:

#2282: Performance improvements in PPP (reduced memory allocations and more efficient implementation).

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/Parameter-less Population Pyramid/HeuristicLab.Algorithms.ParameterlessPopulationPyramid/3.3/LinkageTree.cs

    r11672 r11674  
    2727
    2828namespace HeuristicLab.Algorithms.ParameterlessPopulationPyramid {
    29  
     29
    3030  public class LinkageTree {
    3131
     
    6666        for (int j = 0; j < i; j++) {
    6767          // Updates the entry of the 4 long array based on the two bits
    68           var pattern = (Convert.ToInt32(solution[j]) << 1) + Convert.ToInt32(solution[i]);
     68
     69          var pattern = (Convert.ToByte(solution[j]) << 1) + Convert.ToByte(solution[i]);
    6970          occurances[i][j][pattern]++;
    7071        }
     
    7879    private static double NegativeEntropy(int[] counts, double total) {
    7980      double sum = 0;
    80       foreach (var value in counts) {
    81         if (value == 0) continue;
    82         var p = value / total;
    83         sum += (p * Math.Log(p));
     81      for (int i = 0; i < counts.Length; i++) {
     82        if (counts[i] != 0) {
     83          sum += ((counts[i] / total) * Math.Log(counts[i] / total));
     84        }
    8485      }
    8586      return sum;
     
    8889    // Uses the frequency table to calcuate the entropy distance between two indices.
    8990    // In the GECCO paper, calculates Equation 1
     91    private int[] bits = new int[4];
    9092    private double EntropyDistance(int i, int j) {
    9193      // This ensures you are using the lower triangular part of "occurances"
     
    9698      }
    9799      var entry = occurances[i][j];
    98       int[] bits = new int[4];
    99100      // extracts the occurrences of the individual bits
    100101      bits[0] = entry[0] + entry[2];  // i zero
     
    114115    }
    115116
     117
     118
    116119    // Performs O(N^2) clustering based on the method described in:
    117120    // "Optimal implementations of UPGMA and other common clustering algorithms"
    118121    // by I. Gronau and S. Moran
    119122    // In the GECCO paper, Figure 2 is a simplified version of this algorithm.
     123    private double[][] distances;
    120124    private void Rebuild() {
     125      if (distances == null) {
     126        distances = new double[clusters.Length * 2 - 1][];
     127        for (int i = 0; i < distances.Length; i++)
     128          distances[i] = new double[clusters.Length * 2 - 1];
     129      }
     130
     131
    121132      // Keep track of which clusters have not been merged
    122       var topLevel = Enumerable.Range(0, length).ToList();
    123       bool[] useful = Enumerable.Repeat(true, clusters.Length).ToArray();
     133      var topLevel = new List<int>(length);
     134      for (int i = 0; i < length; i++)
     135        topLevel.Add(i);
     136
     137      bool[] useful = new bool[clusters.Length];
     138      for (int i = 0; i < useful.Length; i++)
     139        useful[i] = true;
    124140
    125141      // Store the distances between all clusters
    126       double[,] distances = new double[clusters.Length, clusters.Length];
    127142      for (int i = 1; i < length; i++) {
    128143        for (int j = 0; j < i; j++) {
    129           distances[i, j] = EntropyDistance(clusters[i][0], clusters[j][0]);
     144          distances[i][j] = EntropyDistance(clusters[i][0], clusters[j][0]);
    130145          // make it symmetric
    131           distances[j, i] = distances[i, j];
     146          distances[j][i] = distances[i][j];
    132147        }
    133148      }
     
    139154      for (int index = length; index < clusters.Length; index++) {
    140155        // Shuffle everything not yet in the path
    141         topLevel.ShuffleInPlace(rand, end_of_path, topLevel.Count-1);
     156        topLevel.ShuffleInPlace(rand, end_of_path, topLevel.Count - 1);
    142157
    143158        // if nothing in the path, just add a random usable node
     
    151166          // best_index stores the location of the best thing in the top level
    152167          int best_index = end_of_path;
    153           double min_dist = distances[final, topLevel[best_index]];
     168          double min_dist = distances[final][topLevel[best_index]];
    154169          // check all options which might be closer to "final" than "topLevel[best_index]"
    155170          for (int option = end_of_path + 1; option < topLevel.Count; option++) {
    156             if (distances[final, topLevel[option]] < min_dist) {
    157               min_dist = distances[final, topLevel[option]];
     171            if (distances[final][topLevel[option]] < min_dist) {
     172              min_dist = distances[final][topLevel[option]];
    158173              best_index = option;
    159174            }
    160175          }
    161176          // If the current last two in the path are minimally distant
    162           if (end_of_path > 1 && min_dist >= distances[final, topLevel[end_of_path - 2]]) {
     177          if (end_of_path > 1 && min_dist >= distances[final][topLevel[end_of_path - 2]]) {
    163178            break;
    164179          }
     
    173188
    174189        // Only keep a cluster if the distance between the joining clusters is > zero
    175         bool keep = !distances[first, second].IsAlmost(0.0);
     190        bool keep = !distances[first][second].IsAlmost(0.0);
    176191        useful[first] = keep;
    177192        useful[second] = keep;
     
    191206          }
    192207          // Use the previous distances to calculate the joined distance
    193           double first_distance = distances[first, x];
     208          double first_distance = distances[first][x];
    194209          first_distance *= clusters[first].Count;
    195           double second_distance = distances[second, x];
     210          double second_distance = distances[second][x];
    196211          second_distance *= clusters[second].Count;
    197           distances[x, index] = ((first_distance + second_distance)
     212          distances[x][index] = ((first_distance + second_distance)
    198213              / (clusters[first].Count + clusters[second].Count));
    199214          // make it symmetric
    200           distances[index, x] = distances[x, index];
     215          distances[index][x] = distances[x][index];
    201216          i++;
    202217        }
Note: See TracChangeset for help on using the changeset viewer.