Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.GP.StructureIdentification.Classification/3.3/MulticlassModeller.cs @ 2356

Last change on this file since 2356 was 2328, checked in by gkronber, 15 years ago

this is the remaining part of changeset r2327.
Applied changes in modeling plugins that are necessary for the new model analyzer (#722)

  • predictor has properties for the lower and upper limit of the predicted value
  • added views for predictors that show the limits (also added a new view for GeneticProgrammingModel that shows the size and height of the model)
  • Reintroduced TreeEvaluatorInjectors that read a PunishmentFactor and calculate the lower and upper limits for estimated values (limits are set in the tree evaluators)
  • Added operators to create Predictors. Changed modeling algorithms to use the predictors for the calculation of final model qualities and variable impacts (to be compatible with the new model analyzer the predictors use a very large PunishmentFactor)
  • replaced all private implementations of double.IsAlmost and use HL.Commons instead (see #733 r2324)
  • Implemented operator SolutionExtractor and moved BestSolutionStorer from HL.Logging to HL.Modeling (fixes #734)
File size: 8.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.DataAnalysis;
28
29namespace HeuristicLab.GP.StructureIdentification.Classification {
30  public class MulticlassModeller : OperatorBase {
31
32    private const string DATASET = "Dataset";
33    private const string TARGETVARIABLE = "TargetVariable";
34    private const string TARGETCLASSVALUES = "TargetClassValues";
35    private const string TRAININGSAMPLESSTART = "TrainingSamplesStart";
36    private const string TRAININGSAMPLESEND = "TrainingSamplesEnd";
37    private const string VALIDATIONSAMPLESSTART = "ValidationSamplesStart";
38    private const string VALIDATIONSAMPLESEND = "ValidationSamplesEnd";
39    private const string CLASSAVALUE = "ClassAValue";
40    private const string CLASSBVALUE = "ClassBValue";
41    private const double EPSILON = 1E-6;
42    public override string Description {
43      get { return @"TASK"; }
44    }
45
46    public MulticlassModeller()
47      : base() {
48      AddVariableInfo(new VariableInfo(DATASET, "The original dataset and the new dataset parts in the newly created subscopes", typeof(Dataset), VariableKind.In));
49      AddVariableInfo(new VariableInfo(TARGETVARIABLE, "TargetVariable", typeof(IntData), VariableKind.In));
50      AddVariableInfo(new VariableInfo(TARGETCLASSVALUES, "Class values of the target variable in the original dataset and in the new dataset parts", typeof(ItemList<DoubleData>), VariableKind.In | VariableKind.New));
51      AddVariableInfo(new VariableInfo(CLASSAVALUE, "The original class value of the new class A", typeof(DoubleData), VariableKind.New));
52      AddVariableInfo(new VariableInfo(CLASSBVALUE, "The original class value of the new class B", typeof(DoubleData), VariableKind.New));
53      AddVariableInfo(new VariableInfo(TRAININGSAMPLESSTART, "The start of training samples in the original dataset and starts of training samples in the new dataset parts", typeof(IntData), VariableKind.In | VariableKind.New));
54      AddVariableInfo(new VariableInfo(TRAININGSAMPLESEND, "The end of training samples in the original dataset and ends of training samples in the new dataset parts", typeof(IntData), VariableKind.In | VariableKind.New));
55      AddVariableInfo(new VariableInfo(VALIDATIONSAMPLESSTART, "The start of validation samples in the original dataset and starts of validation samples in the new dataset parts", typeof(IntData), VariableKind.In | VariableKind.New));
56      AddVariableInfo(new VariableInfo(VALIDATIONSAMPLESEND, "The end of validation samples in the original dataset and ends of validation samples in the new dataset parts", typeof(IntData), VariableKind.In | VariableKind.New));
57    }
58
59    public override IOperation Apply(IScope scope) {
60      Dataset origDataset = GetVariableValue<Dataset>(DATASET, scope, true);
61      int targetVariable = GetVariableValue<IntData>(TARGETVARIABLE, scope, true).Data;
62      ItemList<DoubleData> classValues = GetVariableValue<ItemList<DoubleData>>(TARGETCLASSVALUES, scope, true);
63      int origTrainingSamplesStart = GetVariableValue<IntData>(TRAININGSAMPLESSTART, scope, true).Data;
64      int origTrainingSamplesEnd = GetVariableValue<IntData>(TRAININGSAMPLESEND, scope, true).Data;
65      int origValidationSamplesStart = GetVariableValue<IntData>(VALIDATIONSAMPLESSTART, scope, true).Data;
66      int origValidationSamplesEnd = GetVariableValue<IntData>(VALIDATIONSAMPLESEND, scope, true).Data;
67      ItemList<DoubleData> binaryClassValues = new ItemList<DoubleData>();
68      binaryClassValues.Add(new DoubleData(0.0));
69      binaryClassValues.Add(new DoubleData(1.0));
70      for (int i = 0; i < classValues.Count - 1; i++) {
71        for (int j = i + 1; j < classValues.Count; j++) {
72          Dataset dataset = new Dataset();
73          dataset.Columns = origDataset.Columns;
74          double classAValue = classValues[i].Data;
75          double classBValue = classValues[j].Data;
76          int trainingSamplesStart;
77          int trainingSamplesEnd;
78          int validationSamplesStart;
79          int validationSamplesEnd;
80
81          trainingSamplesStart = 0;
82          List<double[]> rows = new List<double[]>();
83          for (int k = origTrainingSamplesStart; k < origTrainingSamplesEnd; k++) {
84            double[] row = new double[dataset.Columns];
85            double targetValue = origDataset.GetValue(k, targetVariable);
86            if (targetValue.IsAlmost(classAValue)) {
87              for (int l = 0; l < row.Length; l++) {
88                row[l] = origDataset.GetValue(k, l);
89              }
90              row[targetVariable] = 0;
91              rows.Add(row);
92            } else if (targetValue.IsAlmost(classBValue)) {
93              for (int l = 0; l < row.Length; l++) {
94                row[l] = origDataset.GetValue(k, l);
95              }
96              row[targetVariable] = 1.0;
97              rows.Add(row);
98            }
99          }
100          trainingSamplesEnd = rows.Count;
101          validationSamplesStart = rows.Count;
102          for (int k = origValidationSamplesStart; k < origValidationSamplesEnd; k++) {
103            double[] row = new double[dataset.Columns];
104            double targetValue = origDataset.GetValue(k, targetVariable);
105            if (targetValue.IsAlmost(classAValue)) {
106              for (int l = 0; l < row.Length; l++) {
107                row[l] = origDataset.GetValue(k, l);
108              }
109              row[targetVariable] = 0;
110              rows.Add(row);
111            } else if (targetValue.IsAlmost(classBValue)) {
112              for (int l = 0; l < row.Length; l++) {
113                row[l] = origDataset.GetValue(k, l);
114              }
115              row[targetVariable] = 1.0;
116              rows.Add(row);
117            }
118          }
119          validationSamplesEnd = rows.Count;
120
121          dataset.Rows = rows.Count;
122          dataset.Samples = new double[dataset.Rows * dataset.Columns];
123          for (int k = 0; k < dataset.Rows; k++) {
124            for (int l = 0; l < dataset.Columns; l++) {
125              dataset.SetValue(k, l, rows[k][l]);
126            }
127          }
128
129          Scope childScope = new Scope(classAValue + " vs. " + classBValue);
130
131          childScope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(TARGETCLASSVALUES), binaryClassValues));
132          childScope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(CLASSAVALUE), new DoubleData(classAValue)));
133          childScope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(CLASSBVALUE), new DoubleData(classBValue)));
134          childScope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(TRAININGSAMPLESSTART), new IntData(trainingSamplesStart)));
135          childScope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(TRAININGSAMPLESEND), new IntData(trainingSamplesEnd)));
136          childScope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(VALIDATIONSAMPLESSTART), new IntData(validationSamplesStart)));
137          childScope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(VALIDATIONSAMPLESEND), new IntData(validationSamplesEnd)));
138          childScope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(DATASET), dataset));
139          scope.AddSubScope(childScope);
140        }
141      }
142      return null;
143    }
144  }
145}
Note: See TracBrowser for help on using the repository browser.