1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Collections.Generic;


24  using System.Linq;


25  using HeuristicLab.Common;


26  using HeuristicLab.Core;


27 


28  namespace HeuristicLab.Algorithms.ParameterlessPopulationPyramid {


29 


30  public class LinkageTree {


31 


32  private readonly int[][][] occurances;


33  private readonly List<int>[] clusters;


34  private List<int> clusterOrdering;


35  private readonly int length;


36  private readonly IRandom rand;


37  private bool rebuildRequired = false;


38 


39  public LinkageTree(int length, IRandom rand) {


40  this.length = length;


41  this.rand = rand;


42  occurances = new int[length][][];


43 


44  // Create a lower triangular matrix without the diagonal


45  for (int i = 1; i < length; i++) {


46  occurances[i] = new int[i][];


47  for (int j = 0; j < i; j++) {


48  occurances[i][j] = new int[4];


49  }


50  }


51  clusters = new List<int>[2 * length  1];


52  for (int i = 0; i < clusters.Length; i++) {


53  clusters[i] = new List<int>();


54  }


55  clusterOrdering = new List<int>();


56 


57  // first "length" clusters just contain a single gene


58  for (int i = 0; i < length; i++) {


59  clusters[i].Add(i);


60  }


61  }


62 


63  public void Add(bool[] solution) {


64  if (solution.Length != length) throw new ArgumentException("The individual has not the correct length.");


65  for (int i = 1; i < solution.Length; i++) {


66  for (int j = 0; j < i; j++) {


67  // Updates the entry of the 4 long array based on the two bits


68  var pattern = (Convert.ToInt32(solution[j]) << 1) + Convert.ToInt32(solution[i]);


69  occurances[i][j][pattern]++;


70  }


71  }


72  rebuildRequired = true;


73  }


74 


75  // While "total" always has an integer value, it is a double to reduce


76  // how often type casts are needed to prevent integer divison


77  private static double NegativeEntropy(int[] counts, double total) {


78  double sum = 0;


79  foreach (var value in counts) {


80  if (value == 0) continue;


81  var p = value / total;


82  sum += (p * Math.Log(p));


83  }


84  return sum;


85  }


86 


87  private double EntropyDistance(int i, int j) {


88  // This ensures you are using the lower triangular part of "occurances"


89  if (i < j) {


90  int temp = i;


91  i = j;


92  j = temp;


93  }


94  var entry = occurances[i][j];


95  int[] bits = new int[4];


96  // extracts the occurrences of the individual bits


97  bits[0] = entry[0] + entry[2]; // i zero


98  bits[1] = entry[1] + entry[3]; // i one


99  bits[2] = entry[0] + entry[1]; // j zero


100  bits[3] = entry[2] + entry[3]; // j one


101  double total = bits[0] + bits[1];


102  // entropy of the two bits on their own


103  double separate = NegativeEntropy(bits, total);


104  // entropy of the two bits as a single unit


105  double together = NegativeEntropy(entry, total);


106  // If together there is 0 entropy, the distance is zero


107  if (together.IsAlmost(0)) {


108  return 0.0;


109  }


110  return 2  (separate / together);


111  }


112 


113  private void Rebuild() {


114  // Keep track of which clusters have not been merged


115  var topLevel = Enumerable.Range(0, length).ToList();


116  bool[] useful = Enumerable.Repeat(true, clusters.Length).ToArray();


117 


118  // Store the distances between all clusters


119  double[,] distances = new double[clusters.Length, clusters.Length];


120  for (int i = 1; i < length; i++) {


121  for (int j = 0; j < i; j++) {


122  distances[i, j] = EntropyDistance(clusters[i][0], clusters[j][0]);


123  // make it symmetric


124  distances[j, i] = distances[i, j];


125  }


126  }


127  // Each iteration we add some amount to the path, and remove the last


128  // two elements. This keeps track of how much of usable is in the path.


129  int end_of_path = 0;


130 


131  // build all clusters of size greater than 1


132  for (int index = length; index < clusters.Length; index++) {


133  // Shuffle everything not yet in the path


134  topLevel.ShuffleInPlace(rand, end_of_path, topLevel.Count1);


135 


136  // if nothing in the path, just add a random usable node


137  if (end_of_path == 0) {


138  end_of_path = 1;


139  }


140  while (end_of_path < topLevel.Count) {


141  // last node in the path


142  int final = topLevel[end_of_path  1];


143 


144  // best_index stores the location of the best thing in the top level


145  int best_index = end_of_path;


146  double min_dist = distances[final, topLevel[best_index]];


147  // check all options which might be closer to "final" than "topLevel[best_index]"


148  for (int option = end_of_path + 1; option < topLevel.Count; option++) {


149  if (distances[final, topLevel[option]] < min_dist) {


150  min_dist = distances[final, topLevel[option]];


151  best_index = option;


152  }


153  }


154  // If the current last two in the path are minimally distant


155  if (end_of_path > 1 && min_dist >= distances[final, topLevel[end_of_path  2]]) {


156  break;


157  }


158 


159  // move the best to the end of the path


160  topLevel.Swap(end_of_path, best_index);


161  end_of_path++;


162 


163  }


164  // Last two elements in the path are the clusters to join


165  int first = topLevel[end_of_path  2];


166  int second = topLevel[end_of_path  1];


167 


168  // Only keep a cluster if the distance between the joining clusters is > zero


169  bool keep = !distances[first, second].IsAlmost(0.0);


170  useful[first] = keep;


171  useful[second] = keep;


172 


173  // create the new cluster


174  clusters[index] = clusters[first].Concat(clusters[second]).ToList();


175  // Calculate distances from all clusters to the newly created cluster


176  int i = 0;


177  int end = topLevel.Count  1;


178  while (i <= end) {


179  int x = topLevel[i];


180  // Moves 'first' and 'second' to after "end" in topLevel


181  if (x == first  x == second) {


182  topLevel.Swap(i, end);


183  end;


184  continue;


185  }


186  // Use the previous distances to calculate the joined distance


187  double first_distance = distances[first, x];


188  first_distance *= clusters[first].Count;


189  double second_distance = distances[second, x];


190  second_distance *= clusters[second].Count;


191  distances[x, index] = ((first_distance + second_distance)


192  / (clusters[first].Count + clusters[second].Count));


193  // make it symmetric


194  distances[index, x] = distances[x, index];


195  i++;


196  }


197 


198  // Remove first and second from the path


199  end_of_path = 2;


200  topLevel.RemoveAt(topLevel.Count  1);


201  topLevel[topLevel.Count  1] = index;


202  }


203  // Extract the useful clusters


204  clusterOrdering.Clear();


205  // Add all useful clusters. The last one is never useful.


206  for (int i = 0; i < useful.Length  1; i++) {


207  if (useful[i]) clusterOrdering.Add(i);


208  }


209 


210  // Shuffle before sort to ensure ties are broken randomly


211  clusterOrdering.ShuffleInPlace(rand);


212  clusterOrdering = clusterOrdering.OrderBy(i => clusters[i].Count).ToList();


213  }


214 


215  public IEnumerable<List<int>> Clusters {


216  get {


217  // Just in time rebuilding


218  if (rebuildRequired) Rebuild();


219  foreach (var index in clusterOrdering) {


220  // Send out the clusters in the desired order


221  yield return clusters[index];


222  }


223  }


224  }


225  }


226  } 
