using System; using System.Collections.Generic; using System.Linq; namespace HeuristicLab.Analysis.FitnessLandscape.Analysis { public class InformationAnalysis { public class Peak { public double QualityDelta { get; private set; } public double Value { get; private set; } public Peak(double qualityDelta, double value) { QualityDelta = qualityDelta; Value = value; } } public List InformationContent { get; private set; } public List PartialInformationContent { get; private set; } public List DensityBasinInformation { get; private set; } public List TotalEntropy { get; private set; } public List QualityDelta { get; private set; } public double InformationStability { get; private set; } public int Regularity { get; private set; } public int Diversity { get; private set; } public Peak PeakInformationContent { get; private set; } public Peak PeakPartialInformationContent { get; private set; } public Peak PeakDensityBasinInformation { get; private set; } public Peak PeakTotalEntropy { get; private set; } public InformationAnalysis(IList qualities, int nQuantiles, int shapeSize) { InformationContent = new List(); PartialInformationContent = new List(); DensityBasinInformation = new List(); TotalEntropy = new List(); QualityDelta = new List(); if (shapeSize < 1) throw new ArgumentException("Shape size must be at least 1 (better 2)"); if (qualities.Count > 1) PerformAnalysis(qualities, nQuantiles, shapeSize); } private void PerformAnalysis(IList qualities, int nQuantiles, int shapeSize) { var differences = Differences(qualities).ToList(); InformationStability = differences.Select(Math.Abs).Max(); Regularity = new HashSet(differences).Count; Diversity = new HashSet(qualities).Count; var thresholds = (nQuantiles == 0 ? differences.Select(Math.Abs).OrderBy(d => d) : UniqueThresholdCalculator.DetermineThresholds(differences, nQuantiles)).ToList(); foreach (var eps in thresholds) { if (QualityDelta.Count > 0 && QualityDelta.Last() == eps) { QualityDelta.DuplicateLast(); InformationContent.DuplicateLast(); DensityBasinInformation.DuplicateLast(); TotalEntropy.DuplicateLast(); PartialInformationContent.DuplicateLast(); } else { var slopes = Slopes(eps, differences).ToList(); var shapes = Shapes(shapeSize, slopes).ToList(); var shapeCounts = CountShapes(shapes); QualityDelta.Add(eps); InformationContent.Add(CalculateEntropy(Shape.GetAll(shapeSize, Shape.Form.NonUni), shapeCounts, shapes.Count)); DensityBasinInformation.Add(CalculateEntropy(Shape.GetAll(shapeSize, Shape.Form.Uni), shapeCounts, shapes.Count)); TotalEntropy.Add(CalculateEntropy(Shape.GetAll(shapeSize, Shape.Form.Any), shapeCounts, shapes.Count)); PartialInformationContent.Add(CalculatePartialInformationContent(eps, differences)); } } PeakInformationContent = GetPeak(QualityDelta, InformationContent); PeakDensityBasinInformation = GetPeak(QualityDelta, DensityBasinInformation); PeakPartialInformationContent = GetPeak(QualityDelta, PartialInformationContent); PeakTotalEntropy = GetPeak(QualityDelta, TotalEntropy); } public static Peak GetPeak(IEnumerable indexes, IEnumerable values) { var max = indexes.Zip(values, (i, v) => new { i, v }).OrderByDescending(p => p.v).First(); return new Peak(max.i, max.v); } public enum Slope { Up = 1, Flat = 0, Down = -1 } public class Shape : IEnumerable, IComparable { #region types, fields and properties public enum Form {Uni, NonUni, Any} private readonly Slope[] slopes; private static readonly Dictionary, IList> SHAPES = new Dictionary,IList>(); #endregion public Shape(IEnumerable slopes) { this.slopes = slopes.ToArray(); } #region static methods public static Shape Get(params Slope[] slopes) { return new Shape(slopes); } public static IList GetAll(int size, Form type) { var key = Tuple.Create(type, size); IList allShapes; if (!SHAPES.TryGetValue(key, out allShapes)) { allShapes = CreateAll(size, type).ToList(); SHAPES[key] = allShapes; } return allShapes; } private static IEnumerable CreateAll(int size, Form type) { if (size == 0) { yield return Get(); } else { foreach (var s in CreateAll(size - 1, type)) { foreach (var s2 in s.ExtendAll(type)) yield return s2; } } } #endregion private Shape Extend(Slope s) { return new Shape(slopes.Concat(new[] {s})); } private IEnumerable ExtendAll(Form t) { if (Length == 0 || t == Form.Any) { yield return Extend(Slope.Up); yield return Extend(Slope.Flat); yield return Extend(Slope.Down); } else if (t == Form.Uni) { yield return Extend(slopes[0]); } else if (t == Form.NonUni) { if (slopes.Last() != Slope.Up) yield return Extend(Slope.Up); if (slopes.Last() != Slope.Flat) yield return Extend(Slope.Flat); if (slopes.Last() != Slope.Down) yield return Extend(Slope.Down); } } public int Length { get { return slopes.Length; } } public Slope this[int i] { get { return slopes[i]; } } #region IEnumerable Members public IEnumerator GetEnumerator() { return (IEnumerator) slopes.GetEnumerator(); } System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { return GetEnumerator(); } #endregion #region IComparable Members public int CompareTo(Shape other) { if (other.Length < Length) return -1; if (other.Length > Length) return 1; for (var i = 0; i a*3 + ((int) s + 1)).GetHashCode(); } #endregion private string asString; public override string ToString() { return asString ?? (asString = string.Join("", slopes.Select(s => (s == Slope.Down ? "v" : (s == Slope.Up ? "^" : "-"))))); } } private static IEnumerable Slopes(double eps, IEnumerable differences) { return differences.Select(d => (d > eps ? Slope.Up : (d < -eps ? Slope.Down : 0))); } private static IEnumerable Shapes(int size, IEnumerable slopes) { var q = new Queue(); foreach (var s in slopes) { q.Enqueue(s); if (q.Count < size) continue; yield return new Shape(q); q.Dequeue(); } } private static double CalculateEntropy(IList shapes, Dictionary shapeCounts, int totalNShapes) { return shapes.Aggregate(0.0, (current, s) => current - Entropy(shapeCounts.GetValueOrDefault(s, 0), totalNShapes, shapes.Count)); } private static double CalculatePartialInformationContent(double eps, ICollection differences) { int slope = 0; int nPeaks = 0; foreach (var d in differences) { if (d > eps) { if (slope < 0) nPeaks++; slope = +1; } else if (d < -eps) { if (slope > 0) nPeaks++; slope = -1; } } return 1.0 * nPeaks / differences.Count; } private static Dictionary CountShapes(IEnumerable shapes) { var shapeCounts = new Dictionary(); foreach (var group in shapes.GroupBy(s => s)) shapeCounts[group.Key] = group.Count(); return shapeCounts; } private static double Entropy(int count, int total, int nCases) { if (count == 0) return 0; double freq = 1.0 * count / total; return freq * Math.Log(freq, nCases); } private static IEnumerable Differences(IEnumerable values) { return Utils.Delta(values, (x, y) => y - x); } } }