Free cookie consent management tool by TermsFeed Policy Generator

source: branches/CEDMA-Exporter-715/sources/HeuristicLab.Modeling/3.2/ProblemInjector.cs @ 3215

Last change on this file since 3215 was 2162, checked in by gkronber, 15 years ago

Implemented #707: ProblemInjector creates a new dataset that contains only the target variable and all allowed input variables.

File size: 7.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Text;
25using System.Xml;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.DataAnalysis;
29using System.Linq;
30
31namespace HeuristicLab.Modeling {
32  public class ProblemInjector : OperatorBase {
33    public override string Description {
34      get { return @"Injects the necessary variables for a data-based modeling problem."; }
35    }
36
37    public ProblemInjector()
38      : base() {
39      AddVariableInfo(new VariableInfo("Dataset", "Dataset", typeof(Dataset), VariableKind.New));
40      GetVariableInfo("Dataset").Local = true;
41      AddVariable(new Variable("Dataset", new Dataset()));
42
43      AddVariableInfo(new VariableInfo("TargetVariable", "TargetVariable", typeof(IntData), VariableKind.New));
44      GetVariableInfo("TargetVariable").Local = true;
45      AddVariable(new Variable("TargetVariable", new IntData()));
46
47      AddVariableInfo(new VariableInfo("AllowedFeatures", "Indexes of allowed input variables", typeof(ItemList<IntData>), VariableKind.New));
48      GetVariableInfo("AllowedFeatures").Local = true;
49      AddVariable(new Variable("AllowedFeatures", new ItemList<IntData>()));
50
51      AddVariableInfo(new VariableInfo("TrainingSamplesStart", "TrainingSamplesStart", typeof(IntData), VariableKind.New));
52      GetVariableInfo("TrainingSamplesStart").Local = true;
53      AddVariable(new Variable("TrainingSamplesStart", new IntData()));
54
55      AddVariableInfo(new VariableInfo("TrainingSamplesEnd", "TrainingSamplesEnd", typeof(IntData), VariableKind.New));
56      GetVariableInfo("TrainingSamplesEnd").Local = true;
57      AddVariable(new Variable("TrainingSamplesEnd", new IntData()));
58
59      AddVariableInfo(new VariableInfo("ActualTrainingSamplesStart", "ActualTrainingSamplesStart", typeof(IntData), VariableKind.New));
60      AddVariableInfo(new VariableInfo("ActualTrainingSamplesEnd", "ActualTrainingSamplesEnd", typeof(IntData), VariableKind.New));
61
62      AddVariableInfo(new VariableInfo("ValidationSamplesStart", "ValidationSamplesStart", typeof(IntData), VariableKind.New));
63      GetVariableInfo("ValidationSamplesStart").Local = true;
64      AddVariable(new Variable("ValidationSamplesStart", new IntData()));
65
66      AddVariableInfo(new VariableInfo("ValidationSamplesEnd", "ValidationSamplesEnd", typeof(IntData), VariableKind.New));
67      GetVariableInfo("ValidationSamplesEnd").Local = true;
68      AddVariable(new Variable("ValidationSamplesEnd", new IntData()));
69
70      AddVariableInfo(new VariableInfo("TestSamplesStart", "TestSamplesStart", typeof(IntData), VariableKind.New));
71      GetVariableInfo("TestSamplesStart").Local = true;
72      AddVariable(new Variable("TestSamplesStart", new IntData()));
73
74      AddVariableInfo(new VariableInfo("TestSamplesEnd", "TestSamplesEnd", typeof(IntData), VariableKind.New));
75      GetVariableInfo("TestSamplesEnd").Local = true;
76      AddVariable(new Variable("TestSamplesEnd", new IntData()));
77
78      AddVariableInfo(new VariableInfo("MaxNumberOfTrainingSamples", "Maximal number of training samples to use (optional)", typeof(IntData), VariableKind.In));
79    }
80
81    public override IView CreateView() {
82      return new ProblemInjectorView(this);
83    }
84
85    public override IOperation Apply(IScope scope) {
86      AddVariableToScope("TrainingSamplesStart", scope);
87      AddVariableToScope("TrainingSamplesEnd", scope);
88      AddVariableToScope("ValidationSamplesStart", scope);
89      AddVariableToScope("ValidationSamplesEnd", scope);
90      AddVariableToScope("TestSamplesStart", scope);
91      AddVariableToScope("TestSamplesEnd", scope);
92
93      Dataset operatorDataset = (Dataset)GetVariable("Dataset").Value;
94      int targetVariable = ((IntData)GetVariable("TargetVariable").Value).Data;
95      ItemList<IntData> operatorAllowedFeatures = (ItemList<IntData>)GetVariable("AllowedFeatures").Value;
96
97      Dataset scopeDataset = CreateNewDataset(operatorDataset, targetVariable, operatorAllowedFeatures);
98
99      ItemList<IntData> allowedFeatures = new ItemList<IntData>();
100      allowedFeatures.AddRange(Enumerable.Range(1, scopeDataset.Columns -1 ).Select(x=>new IntData(x)));
101
102      scope.AddVariable(new Variable("Dataset", scopeDataset));
103      scope.AddVariable(new Variable("AllowedFeatures", allowedFeatures));
104      scope.AddVariable(new Variable("TargetVariable", new IntData(0)));
105
106      int trainingStart = GetVariableValue<IntData>("TrainingSamplesStart", scope, true).Data;
107      int trainingEnd = GetVariableValue<IntData>("TrainingSamplesEnd", scope, true).Data;
108
109      var maxTraining = GetVariableValue<IntData>("MaxNumberOfTrainingSamples", scope, true, false);
110      int nTrainingSamples;
111      if (maxTraining != null) {
112        nTrainingSamples = Math.Min(maxTraining.Data, trainingEnd - trainingStart);
113        if (nTrainingSamples <= 0)
114          throw new ArgumentException("Maximal number of training samples must be larger than 0", "MaxNumberOfTrainingSamples");
115      } else {
116        nTrainingSamples = trainingEnd - trainingStart;
117      }
118      scope.AddVariable(new Variable(scope.TranslateName("ActualTrainingSamplesStart"), new IntData(trainingStart)));
119      scope.AddVariable(new Variable(scope.TranslateName("ActualTrainingSamplesEnd"), new IntData(trainingStart + nTrainingSamples)));
120      return null;
121    }
122
123    private Dataset CreateNewDataset(Dataset operatorDataset, int targetVariable, ItemList<IntData> operatorAllowedFeatures) {
124      int columns = (operatorAllowedFeatures.Count() + 1);
125      double[] values = new double[operatorDataset.Rows * columns];
126
127      for (int i = 0; i < values.Length; i++) {
128        int row = i / columns;
129        int column = i % columns;
130        if (column == 0) {
131          values[i] = operatorDataset.GetValue(row, targetVariable);
132        } else {
133          values[i] = operatorDataset.GetValue(row, operatorAllowedFeatures[column-1].Data);
134        }
135      }
136
137      Dataset ds = new Dataset();
138      ds.Columns = columns;
139      ds.Rows = operatorDataset.Rows;
140      ds.Name = operatorDataset.Name;
141      ds.Samples = values;
142      double[] scalingFactor = new double[columns];
143      double[] scalingOffset = new double[columns];
144      ds.SetVariableName(0, operatorDataset.GetVariableName(targetVariable));
145      scalingFactor[0] = operatorDataset.ScalingFactor[targetVariable];
146      scalingOffset[0] = operatorDataset.ScalingOffset[targetVariable];
147      for (int column = 1; column < columns; column++) {
148        ds.SetVariableName(column, operatorDataset.GetVariableName(operatorAllowedFeatures[column - 1].Data));
149        scalingFactor[column] = operatorDataset.ScalingFactor[operatorAllowedFeatures[column - 1].Data];
150        scalingOffset[column] = operatorDataset.ScalingOffset[operatorAllowedFeatures[column - 1].Data];
151      }
152      ds.ScalingOffset = scalingOffset;
153      ds.ScalingFactor = scalingFactor;
154      return ds;
155    }
156
157    private void AddVariableToScope(string variableName, IScope scope) {
158      scope.AddVariable(new Variable(variableName, (IItem)GetVariable(variableName).Value.Clone()));     
159    }
160  }
161}
Note: See TracBrowser for help on using the repository browser.