1 | #region License Information |
---|
2 | /* |
---|
3 | * This file is part of SimSharp which is licensed under the MIT license. |
---|
4 | * See the LICENSE file in the project root for more information. |
---|
5 | */ |
---|
6 | #endregion |
---|
7 | |
---|
8 | using System; |
---|
9 | using System.Collections.Generic; |
---|
10 | using System.Linq; |
---|
11 | using System.Text; |
---|
12 | |
---|
13 | namespace SimSharp { |
---|
14 | /// <summary> |
---|
15 | /// This class calculates some descriptive statistics by |
---|
16 | /// remembering all data. All observed values are equally weighed. |
---|
17 | /// |
---|
18 | /// It can be used to calculate e.g. lead times of processes. |
---|
19 | /// </summary> |
---|
20 | public sealed class SampleMonitor : ISampleMonitor { |
---|
21 | /// <summary> |
---|
22 | /// Can only be set in the constructor. |
---|
23 | /// When it is true, median and percentiles can be computed and a |
---|
24 | /// histogram can be printed. In addition <see cref="Samples"/> |
---|
25 | /// may return all the remembered values for further processing. |
---|
26 | /// </summary> |
---|
27 | public bool Collect { get; } |
---|
28 | |
---|
29 | /// <summary> |
---|
30 | /// The monitor can be set to suppress updates. When it is set |
---|
31 | /// to false, the statistics will not be updated and new samples |
---|
32 | /// are ignored. |
---|
33 | /// </summary> |
---|
34 | public bool Active { get; set; } |
---|
35 | |
---|
36 | /// <summary> |
---|
37 | /// The name of the variable that is being monitored. |
---|
38 | /// Used for output in <see cref="Summarize(bool, int, double?, double?)"/>. |
---|
39 | /// </summary> |
---|
40 | public string Name { get; set; } |
---|
41 | |
---|
42 | public int Count { get; private set; } |
---|
43 | |
---|
44 | public double Min { get; private set; } |
---|
45 | public double Max { get; private set; } |
---|
46 | public double Total { get; private set; } |
---|
47 | double INumericMonitor.Sum { get { return Total; } } |
---|
48 | public double Mean { get; private set; } |
---|
49 | public double StdDev { get { return Math.Sqrt(Variance); } } |
---|
50 | public double Variance { get { return (Count > 0) ? variance / Count : 0.0; } } |
---|
51 | private double variance; |
---|
52 | public double Last { get; private set; } |
---|
53 | |
---|
54 | private List<double> samples; |
---|
55 | /// <summary> |
---|
56 | /// Returns the list of collected values, or an empty enumerable |
---|
57 | /// when <see cref="Collect"/> was initialized to false. |
---|
58 | /// </summary> |
---|
59 | public IEnumerable<double> Samples { get { return samples != null ? samples.AsEnumerable() : Enumerable.Empty<double>(); } } |
---|
60 | |
---|
61 | |
---|
62 | /// <summary> |
---|
63 | /// Calls <see cref="GetPercentile(double)"/>. |
---|
64 | /// </summary> |
---|
65 | /// <remarks> |
---|
66 | /// Median can only be computed when the monitor was initialized to collect the data. |
---|
67 | /// |
---|
68 | /// The data is preprocessed on every call, the runtime complexity of this method is therefore O(n * log(n)). |
---|
69 | /// </remarks> |
---|
70 | /// <returns>The median (50th percentile) of the samples.</returns> |
---|
71 | public double GetMedian() { |
---|
72 | return GetPercentile(0.5); |
---|
73 | } |
---|
74 | |
---|
75 | /// <summary> |
---|
76 | /// Calculates the p-percentile of the samples. |
---|
77 | /// </summary> |
---|
78 | /// <remarks> |
---|
79 | /// Percentiles can only be computed when the monitor was initialized to collect the data. |
---|
80 | /// |
---|
81 | /// The data is preprocessed on every call, the runtime complexity of this method is therefore O(n * log(n)). |
---|
82 | /// </remarks> |
---|
83 | /// <exception cref="ArgumentException">Thrown when <paramref name="p"/> is outside the valid range.</exception> |
---|
84 | /// <param name="p">The percentile has to be in the range [0;1].</param> |
---|
85 | /// <returns>The respective percentile of the samples.</returns> |
---|
86 | public double GetPercentile(double p) { |
---|
87 | if (p < 0 || p > 1) throw new ArgumentException("Percentile must be between 0 and 1", "p"); |
---|
88 | if (!Collect) return double.NaN; |
---|
89 | return GetPercentile(samples, p); |
---|
90 | } |
---|
91 | |
---|
92 | private static double GetPercentile(IList<double> s, double p) { |
---|
93 | if (p < 0 || p > 1) throw new ArgumentException("Percentile must be between 0 and 1", "p"); |
---|
94 | if (s.Count == 0) return double.NaN; |
---|
95 | var n = s.Count * p; |
---|
96 | var k = (int)Math.Ceiling(n); |
---|
97 | if (n < k) |
---|
98 | return s.OrderBy(x => x).Skip(k - 1).First(); |
---|
99 | return s.OrderBy(x => x).Skip(k - 1).Take(2).Average(); |
---|
100 | } |
---|
101 | |
---|
102 | public SampleMonitor(string name = null, bool collect = false) { |
---|
103 | Active = true; |
---|
104 | Name = name; |
---|
105 | Collect = collect; |
---|
106 | if (collect) samples = new List<double>(64); |
---|
107 | } |
---|
108 | |
---|
109 | public void Reset() { |
---|
110 | Count = 0; |
---|
111 | Min = Max = Total = Mean = 0; |
---|
112 | variance = 0; |
---|
113 | Last = 0; |
---|
114 | if (Collect) samples.Clear(); |
---|
115 | } |
---|
116 | |
---|
117 | public void Add(double value) { |
---|
118 | if (!Active) return; |
---|
119 | |
---|
120 | if (double.IsNaN(value) || double.IsInfinity(value)) |
---|
121 | throw new ArgumentException("Not a valid double", "value"); |
---|
122 | Count++; |
---|
123 | Total += value; |
---|
124 | Last = value; |
---|
125 | if (Collect) samples.Add(value); |
---|
126 | |
---|
127 | if (Count == 1) { |
---|
128 | Min = Max = Mean = value; |
---|
129 | } else { |
---|
130 | if (value < Min) Min = value; |
---|
131 | if (value > Max) Max = value; |
---|
132 | |
---|
133 | var oldMean = Mean; |
---|
134 | Mean = oldMean + (value - oldMean) / Count; |
---|
135 | variance = variance + (value - oldMean) * (value - Mean); |
---|
136 | } |
---|
137 | |
---|
138 | OnUpdated(); |
---|
139 | } |
---|
140 | |
---|
141 | public event EventHandler Updated; |
---|
142 | private void OnUpdated() { |
---|
143 | Updated?.Invoke(this, EventArgs.Empty); |
---|
144 | } |
---|
145 | |
---|
146 | string IMonitor.Summarize() { |
---|
147 | return Summarize(); |
---|
148 | } |
---|
149 | |
---|
150 | /// <summary> |
---|
151 | /// Provides a summary of the statistics in a certain format. |
---|
152 | /// If the monitor is configured to collect data, it may also print a histogram. |
---|
153 | /// </summary> |
---|
154 | /// <param name="withHistogram">Whether to suppress the histogram. |
---|
155 | /// This is only effective if <see cref="Collect"/> was set to true, otherwise |
---|
156 | /// the data to produce the histogram is not available in the first place.</param> |
---|
157 | /// <param name="maxBins">The maximum number of bins that should be used. |
---|
158 | /// Note that the bin width and thus the number of bins is also governed by |
---|
159 | /// <paramref name="binWidth"/> if it is defined. |
---|
160 | /// This is only effective if <see cref="Collect"/> and <paramref name="withHistogram"/> |
---|
161 | /// was set to true, otherwise the data to produce the histogram is not available |
---|
162 | /// in the first place.</param> |
---|
163 | /// <param name="histMin">The minimum for the histogram to start at or the sample |
---|
164 | /// minimum in case the default (null) is given. |
---|
165 | /// This is only effective if <see cref="Collect"/> and <paramref name="withHistogram"/> |
---|
166 | /// was set to true, otherwise the data to produce the histogram is not available |
---|
167 | /// in the first place.</param> |
---|
168 | /// <param name="binWidth">The interval for the bins of the histogram or the |
---|
169 | /// range (<see cref="Max"/> - <see cref="Min"/>) divided by the number of bins |
---|
170 | /// (<paramref name="maxBins"/>) in case the default value (null) is given. |
---|
171 | /// This is only effective if <see cref="Collect"/> and <paramref name="withHistogram"/> |
---|
172 | /// was set to true, otherwise the data to produce the histogram is not available |
---|
173 | /// in the first place.</param> |
---|
174 | /// <returns>A formatted string that provides a summary of the statistics.</returns> |
---|
175 | public string Summarize(bool withHistogram = true, int maxBins = 20, double? histMin = null, double? binWidth = null) { |
---|
176 | var nozero = Collect ? samples.Where(x => x != 0).ToList() : new List<double>(); |
---|
177 | var nozeromin = nozero.Count > 0 ? nozero.Min() : double.NaN; |
---|
178 | var nozeromax = nozero.Count > 0 ? nozero.Max() : double.NaN; |
---|
179 | var nozeromean = nozero.Count > 1 ? nozero.Average() : double.NaN; |
---|
180 | var nozerostdev = nozero.Count > 2 ? Math.Sqrt(nozero.Sum(x => (x - nozeromean) * (x - nozeromean)) / (nozero.Count - 1.0)) : double.NaN; |
---|
181 | var sb = new StringBuilder(); |
---|
182 | sb.Append("Statistics"); |
---|
183 | if (!string.IsNullOrEmpty(Name)) |
---|
184 | sb.Append(" of " + Name); |
---|
185 | sb.AppendLine(); |
---|
186 | sb.AppendLine(" all excl.zero zero "); |
---|
187 | sb.AppendLine("--------------- --------------- --------------- ---------------"); |
---|
188 | sb.AppendLine(string.Format("{0,15} {1,15} {2,15} {3,15}", "Count", Formatter.Format15(Count), Formatter.Format15(Collect ? nozero.Count : double.NaN), Formatter.Format15(Collect ? Count - nozero.Count : double.NaN))); |
---|
189 | sb.AppendLine(string.Format("{0,15} {1,15} {2,15}", "Mean", Formatter.Format15(Mean), Formatter.Format15(nozeromean))); |
---|
190 | sb.AppendLine(string.Format("{0,15} {1,15} {2,15}", "Std.dev", Formatter.Format15(StdDev), Formatter.Format15(nozerostdev))); |
---|
191 | sb.AppendLine(); |
---|
192 | sb.AppendLine(string.Format("{0,15} {1,15} {2,15}", "Minimum", Formatter.Format15(Min), Formatter.Format15(nozeromin))); |
---|
193 | if (Collect) { |
---|
194 | sb.AppendLine(string.Format("{0,15} {1,15} {2,15}", "Percentile-5%", Formatter.Format15(GetPercentile(0.05)), Formatter.Format15(GetPercentile(nozero, 0.05)))); |
---|
195 | sb.AppendLine(string.Format("{0,15} {1,15} {2,15}", "Median", Formatter.Format15(GetMedian()), Formatter.Format15(GetPercentile(nozero, 0.5)))); |
---|
196 | sb.AppendLine(string.Format("{0,15} {1,15} {2,15}", "Percentile-95%", Formatter.Format15(GetPercentile(0.95)), Formatter.Format15(GetPercentile(nozero, 0.95)))); |
---|
197 | } |
---|
198 | sb.AppendLine(string.Format("{0,15} {1,15} {2,15}", "Maximum", Formatter.Format15(Max), Formatter.Format15(nozeromax))); |
---|
199 | |
---|
200 | if (Collect && withHistogram) { |
---|
201 | var min = histMin ?? Min; |
---|
202 | var interval = binWidth ?? (Max - Min) / maxBins; |
---|
203 | var histData = samples.GroupBy(x => x <= min ? 0 : (int)Math.Floor(Math.Min((x - min + interval) / interval, maxBins))) |
---|
204 | .Select(x => new { Key = x.Key, Value = x.Count() }) |
---|
205 | .OrderBy(x => x.Key); |
---|
206 | sb.AppendLine(); |
---|
207 | sb.AppendLine("Histogram"); |
---|
208 | sb.AppendLine("<= count % cum% "); |
---|
209 | sb.AppendLine("--------------- ---------- ----- ------"); |
---|
210 | var cumul = 0.0; |
---|
211 | var totStars = 0; |
---|
212 | var last = -1; |
---|
213 | foreach (var kvp in histData) { |
---|
214 | while (kvp.Key > last + 1) { |
---|
215 | last++; |
---|
216 | var tmp = "|".PadLeft(totStars + 1); |
---|
217 | sb.AppendLine(string.Format("{0,15} {1,10} {2,5:F1} {3,5:F1} {4}{5}", Formatter.Format15(min + last * interval), 0, 0, cumul * 100, "", tmp)); |
---|
218 | } |
---|
219 | var prob = kvp.Value / (double)Count; |
---|
220 | cumul += prob; |
---|
221 | var probstars = (int)Math.Round(100 * prob / 2); |
---|
222 | var cumulstars = (int)Math.Round(100 * cumul / 2); |
---|
223 | var numstars = probstars; |
---|
224 | if (probstars + totStars < cumulstars) numstars++; |
---|
225 | var stars = string.Join("", Enumerable.Repeat("*", numstars)); |
---|
226 | totStars += numstars; |
---|
227 | var cumulbar = "|".PadLeft(totStars + 1 - numstars); |
---|
228 | sb.AppendLine(string.Format("{0,15} {1,10} {2,5:F1} {3,5:F1} {4}{5}", |
---|
229 | (kvp.Key == maxBins && min + kvp.Key * interval < Max) ? "inf" : Formatter.Format15(min + kvp.Key * interval), |
---|
230 | kvp.Value, prob * 100, cumul * 100, stars, cumulbar)); |
---|
231 | last = kvp.Key; |
---|
232 | } |
---|
233 | } |
---|
234 | return sb.ToString(); |
---|
235 | } |
---|
236 | } |
---|
237 | } |
---|