Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Optimization/3.3/RunCollectionDiscretizer.cs @ 6903

Last change on this file since 6903 was 6903, checked in by epitzer, 13 years ago

#1622 Enable discretization by empirical distribution's quantiles.

File size: 6.9 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using HeuristicLab.Common;
6using HeuristicLab.Core;
7using HeuristicLab.Data;
8using HeuristicLab.Parameters;
9using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
10
11namespace HeuristicLab.Optimization {
12  [Item("RunCollection Discretizer", "Creates several levels from the distribution of a certain result accross a run collection and assignes a discretized value.")]
13  [StorableClass]
14  public class RunCollectionDiscretizer : ParameterizedNamedItem, IRunCollectionModifier {
15
16    public override bool CanChangeName { get { return false; } }
17    public override bool CanChangeDescription { get { return false; } }
18
19    #region Parameters
20    public ValueParameter<StringValue> SourceParameter {
21      get { return (ValueParameter<StringValue>)Parameters["Source"]; }
22    }
23    public ValueParameter<StringValue> TargetParameter {
24      get { return (ValueParameter<StringValue>)Parameters["Target"]; }
25    }
26    public ValueParameter<DoubleValue> SpreadParameter {
27      get { return (ValueParameter<DoubleValue>)Parameters["Spread"]; }
28    }
29    public ValueParameter<StringValue> GroupByParameter {
30      get { return (ValueParameter<StringValue>)Parameters["GroupBy"]; }
31    }   
32    public ValueParameter<ItemList<StringValue>> LevelsParameter {
33      get { return (ValueParameter<ItemList<StringValue>>)Parameters["Levels"]; }
34    }   
35    #endregion
36
37    private string Source { get { return SourceParameter.Value.Value; } }
38    private string Target { get { return TargetParameter.Value.Value; } }   
39    private double Spread { get { return SpreadParameter.Value.Value; } }
40    private string GroupBy { get { return GroupByParameter.Value.Value; } }
41    private List<string> Levels { get { return LevelsParameter.Value.Select(v => v.Value).ToList(); } }
42
43      #region Construction & Cloning
44    [StorableConstructor]
45    protected RunCollectionDiscretizer(bool deserializing) : base(deserializing) { }
46    protected RunCollectionDiscretizer(RunCollectionDiscretizer original, Cloner cloner)
47      : base(original, cloner) {
48      RegisterEvents();
49    }
50    public RunCollectionDiscretizer() {
51      Parameters.Add(new ValueParameter<StringValue>("Source", "Source value name to be fuzzified.", new StringValue("Value")));
52      Parameters.Add(new ValueParameter<StringValue>("Target", "Target value name. The new, fuzzified variable to be created.", new StringValue("Calc.Value")));
53      Parameters.Add(new ValueParameter<DoubleValue>("Spread", "The number of standard deviations considered one additional level. Set to zero to use empirical distribution instead.", new DoubleValue(1)));
54      Parameters.Add(new ValueParameter<StringValue>("GroupBy", "Create separate analyzes for different values of this variable.", new StringValue("")));
55      Parameters.Add(new ValueParameter<ItemList<StringValue>>("Levels", "The list of levels to be assigned.",
56        new ItemList<StringValue> {
57          new StringValue("Very Low"),
58          new StringValue("Low"),
59          new StringValue("Average"),
60          new StringValue("High"),
61          new StringValue("Very High"),
62        }));     
63      RegisterEvents();
64      UpdateName();
65    }
66    public override IDeepCloneable Clone(Cloner cloner) {
67      return new RunCollectionDiscretizer(this, cloner);
68    }
69    [StorableHook(HookType.AfterDeserialization)]
70    private void AfterDeserialization() {
71      RegisterEvents();
72    }
73    #endregion
74
75    private void RegisterEvents() {
76      SourceParameter.ToStringChanged += Parameter_NameChanged;
77      TargetParameter.ToStringChanged += Parameter_NameChanged;     
78      GroupByParameter.ToStringChanged += Parameter_NameChanged;
79    }
80
81    private void Parameter_NameChanged(object sender, EventArgs e) {
82      UpdateName();
83    }
84
85    private void UpdateName() {
86      name = string.Format("{0} := Discrete({1}{2})",
87        Target,
88        Source,
89        string.IsNullOrWhiteSpace(GroupBy) ? "" : string.Format("/{0}", GroupBy));       
90      OnNameChanged();
91    }
92
93    #region IRunCollectionModifier Members
94
95    public void Modify(List<IRun> runs) {
96      foreach (var group in runs
97        .Select(r => new {Run=r, Value=GetSourceValue(r)})
98        .Where(r => r.Value.HasValue && !double.IsNaN(r.Value.Value) && !double.IsInfinity(r.Value.Value))
99        .Select(r => new {r.Run, r.Value.Value, Bin=GetGroupByValue(r.Run)})
100        .GroupBy(r => r.Bin).ToList()) {
101        var values = group.Select(r => r.Value).ToList();
102        if (values.Count > 0) {
103          if (Spread > 0) {
104            var avg = values.Average();
105            var stdDev = values.StandardDeviation();
106            foreach (var r in group) {
107              r.Run.Results[Target] = new StringValue(Discretize(r.Value, avg, stdDev));
108            }
109          } else {
110            values.Sort();
111            var a = values.ToArray();
112            foreach (var r in group) {
113              r.Run.Results[Target] = new StringValue(Discretize(r.Value, a));
114            }
115          }
116        }
117      }     
118    }
119
120    private double? GetSourceValue(IRun run) {
121      return CastSourceValue(run.Results) ?? CastSourceValue(run.Parameters);
122    }
123
124    private string GetGroupByValue(IRun run) {
125      if (string.IsNullOrWhiteSpace(GroupBy))
126        return String.Empty;
127      IItem value;
128      run.Results.TryGetValue(GroupBy, out value);
129      if (value == null)
130        run.Parameters.TryGetValue(GroupBy, out value);
131      if (value != null)
132        return value.ToString();
133      else
134        return String.Empty;
135    }
136
137    private double? CastSourceValue(IDictionary<string, IItem> variables) {
138      IItem value;
139      variables.TryGetValue(Source, out value);
140      var intValue = value as IntValue;
141      if (intValue != null) {
142        return intValue.Value;
143      } else {
144        var doubleValue = value as DoubleValue;
145        if (doubleValue != null)
146          return doubleValue.Value;
147      }
148      return null;
149    }
150
151    private string Discretize(double value, double avg, double stdDev) {
152      double dev = (value - avg)/(stdDev*Spread);
153      int index;
154      if (Levels.Count % 2 == 1) {
155        index = (int) Math.Floor(Math.Abs(dev));
156        index = (Levels.Count - 1)/2 + Math.Sign(dev) * index;
157      } else {
158        index = (int) Math.Ceiling(Math.Abs(dev));
159        if (dev > 0)
160          index = Levels.Count/2 + index;
161        else
162          index = Levels.Count/2 + 1 - index;
163      }
164      return Levels[Math.Min(Levels.Count - 1, Math.Max(0, index))];
165    }
166
167    private string Discretize(double value, double[] values) {
168      var index = Array.BinarySearch(values, value);
169      var pos = 1.0*(index < 0 ? ~index : index)/(values.Length-1);
170      return Levels[Math.Min(Levels.Count - 1, Math.Max(0, (int) Math.Round(pos*(Levels.Count-1))))];
171    }
172
173    #endregion
174  }
175}
Note: See TracBrowser for help on using the repository browser.