source: branches/3026_IntegrationIntoSymSpace/HeuristicLab.JsonInterface/Converters/RegressionProblemDataConverter.cs @ 18056

Last change on this file since 18056 was 18056, checked in by dpiringe, 2 months ago

#3026

  • fixed a wrong description for the invert parameter in RunCollectionValueRemover
  • fixed the usage of a wrong formatter in RunCollectionSRSolutionGraphVizFormatter
  • fixed a bug in ListJsonItem where an empty guid field can cause an exception
  • started to rework the RegressionProblemDataConverter -> it causes a bug with the symbol Variable of the TypeCorherentGrammar (maybe more grammars)
    • the reasons for the rework: this converter was already the source of some problems in the past; it uses a lot of reflection and dynamic objects -> it is very complicated to read/understand
  • added an official description property Description in the metadata part of a template + entry in Constants.cs
File size: 12.1 KB
Line 
1using System;
2using System.Collections;
3using System.Collections.Generic;
4using System.Linq;
5using System.Reflection;
6using HeuristicLab.Core;
7using HeuristicLab.Data;
8using HeuristicLab.Parameters;
9using HeuristicLab.Problems.DataAnalysis;
10
11namespace HeuristicLab.JsonInterface {
12  public class RegressionProblemDataConverter : BaseConverter {
13    #region Constants
14    private const BindingFlags flags = BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance;
15    private const string TestPartition = "TestPartition";
16    private const string TrainingPartition = "TrainingPartition";
17    private const string TargetVariable = "TargetVariable";
18    private const string AllowedInputVariables = "AllowedInputVariables";
19    private const string Dataset = "Dataset";
20    private const string VariableValues = "variableValues";
21    private const string VariableNames = "variableNames";
22    private const string InputVariables = "InputVariables";
23    private const string VariableRanges = "VariableRanges";
24    private const string Rows = "rows";
25    private const string Value = "value";
26    private const string Parameters = "parameters";
27    private const string CheckedItemList = "CheckedItemList";
28    #endregion
29
30    public override int Priority => 20;//20;
31
32    public override bool CanConvertType(Type t) => t == typeof(ValueParameter<IRegressionProblemData>);
33      //HEAL.Attic.Mapper.StaticCache.GetType(new Guid("EE612297-B1AF-42D2-BF21-AF9A2D42791C")).IsAssignableFrom(t); // IRegressionProblemData
34
35    public override void Inject(IItem item, IJsonItem data, IJsonItemConverter root) {
36
37      var tmp = item as IRegressionProblemData; //ModifiableDataset
38      //tmp.InputVariables
39     
40
41      dynamic regressionProblemData = (dynamic)item;
42
43      DoubleMatrixJsonItem dataset = null;
44      StringJsonItem targetVariable = null;
45      IntRangeJsonItem testPartition = null;
46      IntRangeJsonItem trainingPartition = null;
47      StringArrayJsonItem allowedInputVariables = null;
48
49      // search first for the items (cache them, because the
50      // order is important for injection)
51      foreach (var child in data.Children) {
52        if (child.Path.EndsWith(Dataset))
53          dataset = child as DoubleMatrixJsonItem;
54        else if (child.Path.EndsWith(TargetVariable))
55          targetVariable = child as StringJsonItem;
56        else if (child.Path.EndsWith(TestPartition))
57          testPartition = child as IntRangeJsonItem;
58        else if (child.Path.EndsWith(TrainingPartition))
59          trainingPartition = child as IntRangeJsonItem;
60        else if (child.Path.EndsWith(AllowedInputVariables))
61          allowedInputVariables = child as StringArrayJsonItem;
62      }
63
64      // check data
65      if(!dataset.ColumnNames.Any(x => x == targetVariable.Value)) {
66        throw new Exception($"The value of the target variable ('{targetVariable.Value}') has no matching row name value of the dataset.");
67      }
68
69      foreach(var v in allowedInputVariables.Value) {
70        if(!dataset.ColumnNames.Any(x => x == v))
71          throw new Exception($"The value of the input variable ('{v}') has no matching row name value of the dataset.");
72      }
73
74      // inject the value of the items
75      SetDataset(regressionProblemData, dataset);
76      SetTargetVariable(regressionProblemData, targetVariable);
77      SetAllowedInputVariables(regressionProblemData, allowedInputVariables, dataset);
78      SetTestPartition(regressionProblemData, testPartition);
79      SetTrainingPartition(regressionProblemData, trainingPartition);
80      SetVariableRanges(regressionProblemData, dataset);
81    }
82
83    public override IJsonItem Extract(IItem value, IJsonItemConverter root) {
84      IJsonItem item = new EmptyJsonItem() {
85        Name = value.ItemName,
86        Description = value.ItemDescription
87      };
88
89      IJsonItem ds = GetDataset(value);
90      if(ds != null)
91        item.AddChildren(ds);
92
93      item.AddChildren(GetTestPartition(value));
94      item.AddChildren(GetTrainingPartition(value));
95      item.AddChildren(GetTargetVariable(value));
96      item.AddChildren(GetAllowedInputVariables(value));
97      return item;
98    }
99
100    #region Inject Helper
101    private void SetDataset(dynamic regressionProblemData, DoubleMatrixJsonItem item) {
102      if (item != null) {
103        var dictTmp = new Dictionary<string, IList>();
104        int c = 0;
105        foreach (var col in item.ColumnNames) {
106          dictTmp.Add(col, new List<double>(item.Value[c]));
107          ++c;
108        }
109
110        object dataset = (object)regressionProblemData.Dataset;
111        var rows = dataset.GetType().GetField(Rows, flags);
112        rows.SetValue(dataset, item.Value[0].Length);
113
114        var variableNames = dataset.GetType().GetField(VariableNames, flags);
115        variableNames.SetValue(dataset, item.ColumnNames);
116
117        var dataInfo = dataset.GetType().GetField(VariableValues, flags);
118        dataInfo.SetValue(dataset, dictTmp);
119      }
120    }
121
122    private void SetTestPartition(dynamic regressionProblemData, IntRangeJsonItem item) {
123      if (item != null) {
124        regressionProblemData.TestPartition.Start = item.MinValue;
125        regressionProblemData.TestPartition.End = item.MaxValue;
126      }
127    }
128
129    private void SetTrainingPartition(dynamic regressionProblemData, IntRangeJsonItem item) {
130      if (item != null) {
131        regressionProblemData.TrainingPartition.Start = item.MinValue;
132        regressionProblemData.TrainingPartition.End = item.MaxValue;
133      }
134    }
135
136    private void SetTargetVariable(dynamic regressionProblemData, StringJsonItem item) {
137      if (item != null) {
138        var param = (IConstrainedValueParameter<StringValue>)regressionProblemData.TargetVariableParameter;
139        StringValue v = param.Value;
140        FieldInfo fi = v.GetType().GetField(Value, flags);
141        fi.SetValue(v, item.Value);
142      }
143    }
144
145    private void SetAllowedInputVariables(dynamic regressionProblemData, StringArrayJsonItem item, IMatrixJsonItem matrix) {
146      if (item != null && regressionProblemData is IParameterizedItem p) {
147       
148        var regProbDataType = ((ParameterizedNamedItem)regressionProblemData).GetType(); //RegressionProblemData
149
150        var parameterizedNamedItemType = regProbDataType.BaseType.BaseType;
151
152        // reset parameter
153        var parametersInfo = parameterizedNamedItemType.GetField(Parameters, flags);
154        ParameterCollection col = (ParameterCollection)parametersInfo.GetValue((object)regressionProblemData);
155        var oldParam = (FixedValueParameter<ReadOnlyCheckedItemList<StringValue>>)col[InputVariables];
156        var value = oldParam.Value;
157        PropertyInfo listInfo = value.GetType().GetProperty(CheckedItemList, flags);
158        CheckedItemList<StringValue> checkedItemList = (CheckedItemList<StringValue>)listInfo.GetValue(value);
159        checkedItemList.Clear();
160
161        // add list items and set their check state (based on allowed input variables)
162        foreach(var i in matrix.ColumnNames) {
163          bool isChecked = false;
164          foreach(var x in item.Value)
165            isChecked = isChecked || (x == i);
166          checkedItemList.Add(new StringValue(i).AsReadOnly(), isChecked);
167        }
168      }
169    }
170
171    private void SetVariableRanges(dynamic regressionProblemData, DoubleMatrixJsonItem item) {
172      if (item != null) {
173        IntervalCollection variableRanges = (IntervalCollection)regressionProblemData.VariableRanges;
174
175        foreach(var kvp in variableRanges.GetDictionary()) {
176          variableRanges.DeleteInterval(kvp.Key);
177        }
178       
179        int count = 0;
180        foreach (var column in item.ColumnNames) {
181          var doubleValuesForColumn = item.Value[count];
182          if (doubleValuesForColumn.Any(x => double.IsNaN(x))) // add a NaN interval if any NaN value exists
183            variableRanges.AddInterval(column, new Interval(double.NaN, double.NaN));
184          else
185            variableRanges.AddInterval(column, new Interval(doubleValuesForColumn.Min(), doubleValuesForColumn.Max()));
186          count++;
187        }
188      }
189    }
190
191    private void SetShapeConstraints() {
192      // TODO
193    }
194    #endregion
195
196    #region Extract Helper
197    private IJsonItem GetDataset(IItem item) {
198      dynamic val = (dynamic)item;
199      object dataset = (object)val.Dataset;
200      FieldInfo dataInfo = dataset.GetType().GetField(VariableValues, flags);
201
202      if (dataInfo.GetValue(dataset) is Dictionary<string, IList> dict) {
203        IEnumerator it = dict.Values.First()?.GetEnumerator();
204
205        if(it != null) {
206          if(it.MoveNext() && it.Current is double) {
207            CreateMatrix(dict, out IList<string> columnNames, out double[][] mat);
208            return new DoubleMatrixJsonItem() {
209              Name = Dataset,
210              Value = mat,
211              ColumnNames = columnNames,
212              Minimum = double.MinValue,
213              Maximum = double.MaxValue
214            };
215          } else if(it.Current is int) {
216            CreateMatrix(dict, out IList<string> columnNames, out int[][] mat);
217            return new IntMatrixJsonItem() {
218              Name = Dataset,
219              Value = mat,
220              ColumnNames = columnNames,
221              Minimum = int.MinValue,
222              Maximum = int.MaxValue
223            };
224          } else if (it.Current is bool) {
225            CreateMatrix(dict, out IList<string> columnNames, out bool[][] mat);
226            return new BoolMatrixJsonItem() {
227              Name = Dataset,
228              Value = mat,
229              ColumnNames = columnNames
230            };
231          }
232        }
233      }
234      return null;
235    }
236   
237    private void CreateMatrix<T>(Dictionary<string, IList> dict, out IList<string> columnNames, out T[][] matrix) {
238      int cols = dict.Count, rows = 0, c = 0;
239      columnNames = new List<string>();
240      matrix = new T[cols][];
241      foreach (var x in dict) {
242        rows = Math.Max(rows, x.Value.Count);
243        columnNames.Add(x.Key);
244
245        matrix[c] = new T[rows];
246
247        int r = 0;
248        foreach (var cellValue in x.Value) {
249          try {
250            matrix[c][r] = (T)cellValue;
251          } catch (Exception e) {
252            throw new AggregateException($"The cell value '{cellValue}' in row {r} cannot be converted to {typeof(T).FullName}.", e);
253          }
254          ++r;
255        }
256        ++c;
257      }
258    }
259
260    private IJsonItem GetTestPartition(IItem item) {
261      dynamic val = (dynamic)item;
262      var trainingPartition = (IntRange)val.TrainingPartition;
263      var testPartition = (IntRange)val.TestPartition;
264      return new IntRangeJsonItem() {
265        Name = TestPartition,
266        MinValue = testPartition.Start,
267        MaxValue = testPartition.End,
268        Minimum = 0,
269        Maximum = int.MaxValue //Math.Max(testPartition.End, trainingPartition.End)
270      };
271    }
272
273    private IJsonItem GetTrainingPartition(IItem item) {
274      dynamic val = (dynamic)item;
275      var trainingPartition = (IntRange)val.TrainingPartition;
276      var testPartition = (IntRange)val.TestPartition;
277      return new IntRangeJsonItem() {
278        Name = TrainingPartition,
279        MinValue = trainingPartition.Start,
280        MaxValue = trainingPartition.End,
281        Minimum = 0,
282        Maximum = int.MaxValue //Math.Max(testPartition.End, trainingPartition.End)
283      };
284    }
285
286
287    private IJsonItem GetTargetVariable(IItem item) {
288      var vars = (IEnumerable<StringValue>)((dynamic)item).InputVariables;
289      return new StringJsonItem() {
290        Name = TargetVariable,
291        Value = (string)((dynamic)item).TargetVariable,
292        ConcreteRestrictedItems = vars.Select(x => x.Value)
293      };
294    }
295
296    private IJsonItem GetAllowedInputVariables(IItem item) {
297      var vars = (IEnumerable<StringValue>)((dynamic)item).InputVariables;
298      return new StringArrayJsonItem() {
299        Name = AllowedInputVariables,
300        Value = ((IEnumerable<string>)((dynamic)item).AllowedInputVariables).ToArray(),
301        ConcreteRestrictedItems = vars.Select(x => x.Value)
302      };
303    }
304    #endregion
305  }
306}
Note: See TracBrowser for help on using the repository browser.