Free cookie consent management tool by TermsFeed Policy Generator

source: branches/3026_IntegrationIntoSymSpace/HeuristicLab.JsonInterface/Converters/RegressionProblemDataConverter.cs @ 18044

Last change on this file since 18044 was 18044, checked in by dpiringe, 3 years ago

#3026

  • adjusted the necessary plugin dependencies for all three JsonInterface projects (depending on CheckPluginDependenciesForReferencedAssemblies and CheckReferenceAssembliesForPluginDependencies tests)
  • using now the error handling dialog in FileManager
  • added a AggregateException in RegressionProblemDataConverter for unconvertable values
File size: 11.9 KB
Line 
1using System;
2using System.Collections;
3using System.Collections.Generic;
4using System.Linq;
5using System.Reflection;
6using HeuristicLab.Core;
7using HeuristicLab.Data;
8using HeuristicLab.Parameters;
9using HeuristicLab.Problems.DataAnalysis;
10
11namespace HeuristicLab.JsonInterface {
12  public class RegressionProblemDataConverter : BaseConverter {
13    #region Constants
14    private const BindingFlags flags = BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance;
15    private const string TestPartition = "TestPartition";
16    private const string TrainingPartition = "TrainingPartition";
17    private const string TargetVariable = "TargetVariable";
18    private const string AllowedInputVariables = "AllowedInputVariables";
19    private const string Dataset = "Dataset";
20    private const string VariableValues = "variableValues";
21    private const string VariableNames = "variableNames";
22    private const string InputVariables = "InputVariables";
23    private const string VariableRanges = "VariableRanges";
24    private const string Rows = "rows";
25    private const string Value = "value";
26    private const string Parameters = "parameters";
27    private const string CheckedItemList = "CheckedItemList";
28    #endregion
29
30    public override int Priority => 20;
31
32    public override bool CanConvertType(Type t) =>
33      HEAL.Attic.Mapper.StaticCache.GetType(new Guid("EE612297-B1AF-42D2-BF21-AF9A2D42791C")).IsAssignableFrom(t);
34
35    public override void Inject(IItem item, IJsonItem data, IJsonItemConverter root) {
36
37      dynamic regressionProblemData = (dynamic)item;
38
39      DoubleMatrixJsonItem dataset = null;
40      StringJsonItem targetVariable = null;
41      IntRangeJsonItem testPartition = null;
42      IntRangeJsonItem trainingPartition = null;
43      StringArrayJsonItem allowedInputVariables = null;
44
45
46      // search first for the items (cache them, because the
47      // order is important for injection)
48      foreach (var child in data.Children) {
49
50        if (child.Path.EndsWith(Dataset))
51          dataset = child as DoubleMatrixJsonItem;
52        else if (child.Path.EndsWith(TargetVariable))
53          targetVariable = child as StringJsonItem;
54        else if (child.Path.EndsWith(TestPartition))
55          testPartition = child as IntRangeJsonItem;
56        else if (child.Path.EndsWith(TrainingPartition))
57          trainingPartition = child as IntRangeJsonItem;
58        else if (child.Path.EndsWith(AllowedInputVariables))
59          allowedInputVariables = child as StringArrayJsonItem;
60
61      }
62
63      // check data
64      if(!dataset.ColumnNames.Any(x => x == targetVariable.Value)) {
65        throw new Exception($"The value of the target variable ('{targetVariable.Value}') has no matching row name value of the dataset.");
66      }
67
68      foreach(var v in allowedInputVariables.Value) {
69        if(!dataset.ColumnNames.Any(x => x == v))
70          throw new Exception($"The value of the input variable ('{v}') has no matching row name value of the dataset.");
71      }
72
73      // inject the value of the items
74      SetDataset(regressionProblemData, dataset);
75      SetTargetVariable(regressionProblemData, targetVariable);
76      SetAllowedInputVariables(regressionProblemData, allowedInputVariables, dataset);
77      SetTestPartition(regressionProblemData, testPartition);
78      SetTrainingPartition(regressionProblemData, trainingPartition);
79      SetVariableRanges(regressionProblemData, dataset);
80    }
81
82    public override IJsonItem Extract(IItem value, IJsonItemConverter root) {
83      IJsonItem item = new EmptyJsonItem() {
84        Name = value.ItemName,
85        Description = value.ItemDescription
86      };
87
88      IJsonItem ds = GetDataset(value);
89      if(ds != null)
90        item.AddChildren(ds);
91
92      item.AddChildren(GetTestPartition(value));
93      item.AddChildren(GetTrainingPartition(value));
94      item.AddChildren(GetTargetVariable(value));
95      item.AddChildren(GetAllowedInputVariables(value));
96      return item;
97    }
98
99    #region Inject Helper
100    private void SetDataset(dynamic regressionProblemData, DoubleMatrixJsonItem item) {
101      if (item != null) {
102        var dictTmp = new Dictionary<string, IList>();
103        int c = 0;
104        foreach (var col in item.ColumnNames) {
105          dictTmp.Add(col, new List<double>(item.Value[c]));
106          ++c;
107        }
108
109        object dataset = (object)regressionProblemData.Dataset;
110        var rows = dataset.GetType().GetField(Rows, flags);
111        rows.SetValue(dataset, item.Value[0].Length);
112
113        var variableNames = dataset.GetType().GetField(VariableNames, flags);
114        variableNames.SetValue(dataset, item.ColumnNames);
115
116        var dataInfo = dataset.GetType().GetField(VariableValues, flags);
117        dataInfo.SetValue(dataset, dictTmp);
118      }
119    }
120
121    private void SetTestPartition(dynamic regressionProblemData, IntRangeJsonItem item) {
122      if (item != null) {
123        regressionProblemData.TestPartition.Start = item.MinValue;
124        regressionProblemData.TestPartition.End = item.MaxValue;
125      }
126    }
127
128    private void SetTrainingPartition(dynamic regressionProblemData, IntRangeJsonItem item) {
129      if (item != null) {
130        regressionProblemData.TrainingPartition.Start = item.MinValue;
131        regressionProblemData.TrainingPartition.End = item.MaxValue;
132      }
133    }
134
135    private void SetTargetVariable(dynamic regressionProblemData, StringJsonItem item) {
136      if (item != null) {
137        var param = (IConstrainedValueParameter<StringValue>)regressionProblemData.TargetVariableParameter;
138        StringValue v = param.Value;
139        FieldInfo fi = v.GetType().GetField(Value, flags);
140        fi.SetValue(v, item.Value);
141      }
142    }
143
144    private void SetAllowedInputVariables(dynamic regressionProblemData, StringArrayJsonItem item, IMatrixJsonItem matrix) {
145      if (item != null && regressionProblemData is IParameterizedItem p) {
146        var regProbDataType = ((ParameterizedNamedItem)regressionProblemData).GetType(); //RegressionProblemData
147
148        var parameterizedNamedItemType = regProbDataType.BaseType.BaseType;
149
150        // reset parameter
151        var parametersInfo = parameterizedNamedItemType.GetField(Parameters, flags);
152        ParameterCollection col = (ParameterCollection)parametersInfo.GetValue((object)regressionProblemData);
153        var oldParam = (FixedValueParameter<ReadOnlyCheckedItemList<StringValue>>)col[InputVariables];
154        var value = oldParam.Value;
155        PropertyInfo listInfo = value.GetType().GetProperty(CheckedItemList, flags);
156        CheckedItemList<StringValue> checkedItemList = (CheckedItemList<StringValue>)listInfo.GetValue(value);
157        checkedItemList.Clear();
158
159        // add list items and set their check state (based on allowed input variables)
160        foreach(var i in matrix.ColumnNames) {
161          bool isChecked = false;
162          foreach(var x in item.Value)
163            isChecked = isChecked || (x == i);
164          checkedItemList.Add(new StringValue(i).AsReadOnly(), isChecked);
165        }
166      }
167    }
168
169    private void SetVariableRanges(dynamic regressionProblemData, DoubleMatrixJsonItem item) {
170      if (item != null) {
171        IntervalCollection variableRanges = (IntervalCollection)regressionProblemData.VariableRanges;
172
173        foreach(var kvp in variableRanges.GetDictionary()) {
174          variableRanges.DeleteInterval(kvp.Key);
175        }
176       
177        int count = 0;
178        foreach (var column in item.ColumnNames) {
179          var doubleValuesForColumn = item.Value[count];
180          if (doubleValuesForColumn.Any(x => double.IsNaN(x))) // add a NaN interval if any NaN value exists
181            variableRanges.AddInterval(column, new Interval(double.NaN, double.NaN));
182          else
183            variableRanges.AddInterval(column, new Interval(doubleValuesForColumn.Min(), doubleValuesForColumn.Max()));
184          count++;
185        }
186      }
187    }
188
189    private void SetShapeConstraints() {
190      // TODO
191    }
192    #endregion
193
194    #region Extract Helper
195    private IJsonItem GetDataset(IItem item) {
196      dynamic val = (dynamic)item;
197      object dataset = (object)val.Dataset;
198      FieldInfo dataInfo = dataset.GetType().GetField(VariableValues, flags);
199
200      if (dataInfo.GetValue(dataset) is Dictionary<string, IList> dict) {
201        IEnumerator it = dict.Values.First()?.GetEnumerator();
202
203        if(it != null) {
204          if(it.MoveNext() && it.Current is double) {
205            CreateMatrix(dict, out IList<string> columnNames, out double[][] mat);
206            return new DoubleMatrixJsonItem() {
207              Name = Dataset,
208              Value = mat,
209              ColumnNames = columnNames,
210              Minimum = double.MinValue,
211              Maximum = double.MaxValue
212            };
213          } else if(it.Current is int) {
214            CreateMatrix(dict, out IList<string> columnNames, out int[][] mat);
215            return new IntMatrixJsonItem() {
216              Name = Dataset,
217              Value = mat,
218              ColumnNames = columnNames,
219              Minimum = int.MinValue,
220              Maximum = int.MaxValue
221            };
222          } else if (it.Current is bool) {
223            CreateMatrix(dict, out IList<string> columnNames, out bool[][] mat);
224            return new BoolMatrixJsonItem() {
225              Name = Dataset,
226              Value = mat,
227              ColumnNames = columnNames
228            };
229          }
230        }
231      }
232      return null;
233    }
234   
235    private void CreateMatrix<T>(Dictionary<string, IList> dict, out IList<string> columnNames, out T[][] matrix) {
236      int cols = dict.Count, rows = 0, c = 0;
237      columnNames = new List<string>();
238      matrix = new T[cols][];
239      foreach (var x in dict) {
240        rows = Math.Max(rows, x.Value.Count);
241        columnNames.Add(x.Key);
242
243        matrix[c] = new T[rows];
244
245        int r = 0;
246        foreach (var cellValue in x.Value) {
247          try {
248            matrix[c][r] = (T)cellValue;
249          } catch (Exception e) {
250            throw new AggregateException($"The cell value '{cellValue}' in row {r} cannot be converted to {typeof(T).FullName}.", e);
251          }
252          ++r;
253        }
254        ++c;
255      }
256    }
257
258    private IJsonItem GetTestPartition(IItem item) {
259      dynamic val = (dynamic)item;
260      var trainingPartition = (IntRange)val.TrainingPartition;
261      var testPartition = (IntRange)val.TestPartition;
262      return new IntRangeJsonItem() {
263        Name = TestPartition,
264        MinValue = testPartition.Start,
265        MaxValue = testPartition.End,
266        Minimum = 0,
267        Maximum = int.MaxValue //Math.Max(testPartition.End, trainingPartition.End)
268      };
269    }
270
271    private IJsonItem GetTrainingPartition(IItem item) {
272      dynamic val = (dynamic)item;
273      var trainingPartition = (IntRange)val.TrainingPartition;
274      var testPartition = (IntRange)val.TestPartition;
275      return new IntRangeJsonItem() {
276        Name = TrainingPartition,
277        MinValue = trainingPartition.Start,
278        MaxValue = trainingPartition.End,
279        Minimum = 0,
280        Maximum = int.MaxValue //Math.Max(testPartition.End, trainingPartition.End)
281      };
282    }
283
284
285    private IJsonItem GetTargetVariable(IItem item) {
286      var vars = (IEnumerable<StringValue>)((dynamic)item).InputVariables;
287      return new StringJsonItem() {
288        Name = TargetVariable,
289        Value = (string)((dynamic)item).TargetVariable,
290        ConcreteRestrictedItems = vars.Select(x => x.Value)
291      };
292    }
293
294    private IJsonItem GetAllowedInputVariables(IItem item) {
295      var vars = (IEnumerable<StringValue>)((dynamic)item).InputVariables;
296      return new StringArrayJsonItem() {
297        Name = AllowedInputVariables,
298        Value = ((IEnumerable<string>)((dynamic)item).AllowedInputVariables).ToArray(),
299        ConcreteRestrictedItems = vars.Select(x => x.Value)
300      };
301    }
302    #endregion
303  }
304}
Note: See TracBrowser for help on using the repository browser.