1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Linq;
|
---|
4 |
|
---|
5 | namespace HeuristicLab.Analysis.FitnessLandscape.Analysis {
|
---|
6 |
|
---|
7 | public class InformationAnalysis {
|
---|
8 |
|
---|
9 | public class Peak {
|
---|
10 | public double QualityDelta { get; private set; }
|
---|
11 | public double Value { get; private set; }
|
---|
12 | public Peak(double qualityDelta, double value) {
|
---|
13 | QualityDelta = qualityDelta;
|
---|
14 | Value = value;
|
---|
15 | }
|
---|
16 | }
|
---|
17 |
|
---|
18 | public List<double> InformationContent { get; private set; }
|
---|
19 | public List<double> PartialInformationContent { get; private set; }
|
---|
20 | public List<double> DensityBasinInformation { get; private set; }
|
---|
21 | public List<double> TotalEntropy { get; private set; }
|
---|
22 | public List<double> QualityDelta { get; private set; }
|
---|
23 | public double InformationStability { get; private set; }
|
---|
24 | public int Regularity { get; private set; }
|
---|
25 | public int Diversity { get; private set; }
|
---|
26 | public Peak PeakInformationContent { get; private set; }
|
---|
27 | public Peak PeakPartialInformationContent { get; private set; }
|
---|
28 | public Peak PeakDensityBasinInformation { get; private set; }
|
---|
29 | public Peak PeakTotalEntropy { get; private set; }
|
---|
30 |
|
---|
31 | public InformationAnalysis(IList<double> qualities, int nQuantiles, int shapeSize) {
|
---|
32 | InformationContent = new List<double>();
|
---|
33 | PartialInformationContent = new List<double>();
|
---|
34 | DensityBasinInformation = new List<double>();
|
---|
35 | TotalEntropy = new List<double>();
|
---|
36 | QualityDelta = new List<double>();
|
---|
37 | if (shapeSize < 1)
|
---|
38 | throw new ArgumentException("Shape size must be at least 1 (better 2)");
|
---|
39 | if (qualities.Count > 1)
|
---|
40 | PerformAnalysis(qualities, nQuantiles, shapeSize);
|
---|
41 | }
|
---|
42 |
|
---|
43 | private void PerformAnalysis(IList<double> qualities, int nQuantiles, int shapeSize) {
|
---|
44 | var differences = Differences(qualities).ToList();
|
---|
45 | InformationStability = differences.Select(Math.Abs).Max();
|
---|
46 | Regularity = new HashSet<double>(differences).Count;
|
---|
47 | Diversity = new HashSet<double>(qualities).Count;
|
---|
48 | var absDifferences = differences.Select(Math.Abs).OrderBy(d => d).ToList();
|
---|
49 | nQuantiles = Math.Min(nQuantiles, absDifferences.Count);
|
---|
50 | var thresholds = (nQuantiles == 0
|
---|
51 | ? absDifferences
|
---|
52 | : UniqueThresholdCalculator.DetermineThresholds(differences, nQuantiles)).ToList();
|
---|
53 | foreach (var eps in thresholds) {
|
---|
54 | if (QualityDelta.Count > 0 && QualityDelta.Last() == eps) {
|
---|
55 | QualityDelta.DuplicateLast();
|
---|
56 | InformationContent.DuplicateLast();
|
---|
57 | DensityBasinInformation.DuplicateLast();
|
---|
58 | TotalEntropy.DuplicateLast();
|
---|
59 | PartialInformationContent.DuplicateLast();
|
---|
60 | } else {
|
---|
61 | var slopes = Slopes(eps, differences).ToList();
|
---|
62 | var shapes = Shapes(shapeSize, slopes).ToList();
|
---|
63 | var shapeCounts = CountShapes(shapes);
|
---|
64 | QualityDelta.Add(eps);
|
---|
65 | InformationContent.Add(CalculateEntropy(Shape.GetAll(shapeSize, Shape.Form.NonUni), shapeCounts, shapes.Count));
|
---|
66 | DensityBasinInformation.Add(CalculateEntropy(Shape.GetAll(shapeSize, Shape.Form.Uni), shapeCounts, shapes.Count));
|
---|
67 | TotalEntropy.Add(CalculateEntropy(Shape.GetAll(shapeSize, Shape.Form.Any), shapeCounts, shapes.Count));
|
---|
68 | PartialInformationContent.Add(CalculatePartialInformationContent(eps, differences));
|
---|
69 | }
|
---|
70 | }
|
---|
71 | PeakInformationContent = GetPeak(QualityDelta, InformationContent);
|
---|
72 | PeakDensityBasinInformation = GetPeak(QualityDelta, DensityBasinInformation);
|
---|
73 | PeakPartialInformationContent = GetPeak(QualityDelta, PartialInformationContent);
|
---|
74 | PeakTotalEntropy = GetPeak(QualityDelta, TotalEntropy);
|
---|
75 | }
|
---|
76 |
|
---|
77 | public static Peak GetPeak(IEnumerable<double> indexes, IEnumerable<double> values) {
|
---|
78 | var max = indexes.Zip(values, (i, v) => new { i, v }).OrderByDescending(p => p.v).First();
|
---|
79 | return new Peak(max.i, max.v);
|
---|
80 | }
|
---|
81 |
|
---|
82 | public enum Slope {
|
---|
83 | Up = 1,
|
---|
84 | Flat = 0,
|
---|
85 | Down = -1
|
---|
86 | }
|
---|
87 |
|
---|
88 | public class Shape : IEnumerable<Slope>, IComparable<Shape> {
|
---|
89 |
|
---|
90 | #region types, fields and properties
|
---|
91 | public enum Form {Uni, NonUni, Any}
|
---|
92 |
|
---|
93 | private readonly Slope[] slopes;
|
---|
94 |
|
---|
95 | private static readonly Dictionary<Tuple<Form, int>, IList<Shape>> SHAPES =
|
---|
96 | new Dictionary<Tuple<Form,int>,IList<Shape>>();
|
---|
97 | #endregion
|
---|
98 |
|
---|
99 | public Shape(IEnumerable<Slope> slopes) {
|
---|
100 | this.slopes = slopes.ToArray();
|
---|
101 | }
|
---|
102 |
|
---|
103 | #region static methods
|
---|
104 |
|
---|
105 | public static Shape Get(params Slope[] slopes) {
|
---|
106 | return new Shape(slopes);
|
---|
107 | }
|
---|
108 |
|
---|
109 | public static IList<Shape> GetAll(int size, Form type) {
|
---|
110 | var key = Tuple.Create(type, size);
|
---|
111 | IList<Shape> allShapes;
|
---|
112 | if (!SHAPES.TryGetValue(key, out allShapes)) {
|
---|
113 | allShapes = CreateAll(size, type).ToList();
|
---|
114 | SHAPES[key] = allShapes;
|
---|
115 | }
|
---|
116 | return allShapes;
|
---|
117 | }
|
---|
118 |
|
---|
119 | private static IEnumerable<Shape> CreateAll(int size, Form type) {
|
---|
120 | if (size == 0) {
|
---|
121 | yield return Get();
|
---|
122 | } else {
|
---|
123 | foreach (var s in CreateAll(size - 1, type)) {
|
---|
124 | foreach (var s2 in s.ExtendAll(type))
|
---|
125 | yield return s2;
|
---|
126 | }
|
---|
127 | }
|
---|
128 | }
|
---|
129 | #endregion
|
---|
130 |
|
---|
131 | private Shape Extend(Slope s) {
|
---|
132 | return new Shape(slopes.Concat(new[] {s}));
|
---|
133 | }
|
---|
134 |
|
---|
135 | private IEnumerable<Shape> ExtendAll(Form t) {
|
---|
136 | if (Length == 0 || t == Form.Any) {
|
---|
137 | yield return Extend(Slope.Up);
|
---|
138 | yield return Extend(Slope.Flat);
|
---|
139 | yield return Extend(Slope.Down);
|
---|
140 | } else if (t == Form.Uni) {
|
---|
141 | yield return Extend(slopes[0]);
|
---|
142 | } else if (t == Form.NonUni) {
|
---|
143 | if (slopes.Last() != Slope.Up) yield return Extend(Slope.Up);
|
---|
144 | if (slopes.Last() != Slope.Flat) yield return Extend(Slope.Flat);
|
---|
145 | if (slopes.Last() != Slope.Down) yield return Extend(Slope.Down);
|
---|
146 | }
|
---|
147 | }
|
---|
148 |
|
---|
149 | public int Length {
|
---|
150 | get { return slopes.Length; }
|
---|
151 | }
|
---|
152 |
|
---|
153 | public Slope this[int i] {
|
---|
154 | get { return slopes[i]; }
|
---|
155 | }
|
---|
156 |
|
---|
157 | #region IEnumerable Members
|
---|
158 | public IEnumerator<Slope> GetEnumerator() {
|
---|
159 | return (IEnumerator<Slope>) slopes.GetEnumerator();
|
---|
160 | }
|
---|
161 | System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() {
|
---|
162 | return GetEnumerator();
|
---|
163 | }
|
---|
164 | #endregion
|
---|
165 |
|
---|
166 | #region IComparable Members
|
---|
167 |
|
---|
168 | public int CompareTo(Shape other) {
|
---|
169 | if (other.Length < Length)
|
---|
170 | return -1;
|
---|
171 | if (other.Length > Length)
|
---|
172 | return 1;
|
---|
173 | for (var i = 0; i<Length; i++) {
|
---|
174 | var d = slopes[i].CompareTo(other.slopes[i]);
|
---|
175 | if (d != 0)
|
---|
176 | return d;
|
---|
177 | }
|
---|
178 | return 0;
|
---|
179 | }
|
---|
180 | public override bool Equals(object obj) {
|
---|
181 | var s = obj as Shape;
|
---|
182 | if (s == null) return false;
|
---|
183 | return CompareTo(s) == 0;
|
---|
184 | }
|
---|
185 | public override int GetHashCode() {
|
---|
186 | return slopes.Aggregate(0, (a, s) => a*3 + ((int) s + 1)).GetHashCode();
|
---|
187 | }
|
---|
188 |
|
---|
189 | #endregion
|
---|
190 |
|
---|
191 | private string asString;
|
---|
192 | public override string ToString() {
|
---|
193 | return asString ?? (asString = string.Join("", slopes.Select(s => (s == Slope.Down ? "v" : (s == Slope.Up ? "^" : "-")))));
|
---|
194 | }
|
---|
195 | }
|
---|
196 |
|
---|
197 | private static IEnumerable<Slope> Slopes(double eps, IEnumerable<double> differences) {
|
---|
198 | return differences.Select(d => (d > eps ? Slope.Up : (d < -eps ? Slope.Down : 0)));
|
---|
199 | }
|
---|
200 |
|
---|
201 | private static IEnumerable<Shape> Shapes(int size, IEnumerable<Slope> slopes) {
|
---|
202 | var q = new Queue<Slope>();
|
---|
203 | foreach (var s in slopes) {
|
---|
204 | q.Enqueue(s);
|
---|
205 | if (q.Count < size) continue;
|
---|
206 | yield return new Shape(q);
|
---|
207 | q.Dequeue();
|
---|
208 | }
|
---|
209 | }
|
---|
210 |
|
---|
211 | private static double CalculateEntropy(IList<Shape> shapes, Dictionary<Shape, int> shapeCounts, int totalNShapes) {
|
---|
212 | return shapes.Aggregate(0.0, (current, s) => current - Entropy(shapeCounts.GetValueOrDefault(s, 0), totalNShapes, shapes.Count));
|
---|
213 | }
|
---|
214 |
|
---|
215 | private static double CalculatePartialInformationContent(double eps, ICollection<double> differences) {
|
---|
216 | int slope = 0;
|
---|
217 | int nPeaks = 0;
|
---|
218 | foreach (var d in differences) {
|
---|
219 | if (d > eps) {
|
---|
220 | if (slope < 0) nPeaks++;
|
---|
221 | slope = +1;
|
---|
222 | } else if (d < -eps) {
|
---|
223 | if (slope > 0) nPeaks++;
|
---|
224 | slope = -1;
|
---|
225 | }
|
---|
226 | }
|
---|
227 | return 1.0 * nPeaks / differences.Count;
|
---|
228 | }
|
---|
229 |
|
---|
230 | private static Dictionary<Shape, int> CountShapes(IEnumerable<Shape> shapes) {
|
---|
231 | var shapeCounts = new Dictionary<Shape, int>();
|
---|
232 | foreach (var group in shapes.GroupBy(s => s))
|
---|
233 | shapeCounts[group.Key] = group.Count();
|
---|
234 | return shapeCounts;
|
---|
235 | }
|
---|
236 |
|
---|
237 | private static double Entropy(int count, int total, int nCases) {
|
---|
238 | if (count == 0)
|
---|
239 | return 0;
|
---|
240 | double freq = 1.0 * count / total;
|
---|
241 | return freq * Math.Log(freq, nCases);
|
---|
242 | }
|
---|
243 |
|
---|
244 | private static IEnumerable<double> Differences(IEnumerable<double> values) {
|
---|
245 | return Utils.Delta(values, (x, y) => y - x);
|
---|
246 | }
|
---|
247 |
|
---|
248 | }
|
---|
249 | }
|
---|