Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.LinearRegression/3.2/LinearRegression.cs @ 2154

Last change on this file since 2154 was 2154, checked in by gkronber, 15 years ago

Added linear regression plugin. #697

File size: 16.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Text;
26using HeuristicLab.Core;
27using System.Xml;
28using System.Diagnostics;
29using HeuristicLab.DataAnalysis;
30using HeuristicLab.Data;
31using HeuristicLab.Operators;
32using HeuristicLab.GP.StructureIdentification;
33using HeuristicLab.Modeling;
34using HeuristicLab.GP;
35
36namespace HeuristicLab.LinearRegression {
37  public class LinearRegression : ItemBase, IEditable, IAlgorithm {
38
39    public string Name { get { return "LinearRegression"; } }
40    public string Description { get { return "TODO"; } }
41
42    private SequentialEngine.SequentialEngine engine;
43    public IEngine Engine {
44      get { return engine; }
45    }
46
47    public Dataset Dataset {
48      get { return ProblemInjector.GetVariableValue<Dataset>("Dataset", null, false); }
49      set { ProblemInjector.GetVariable("Dataset").Value = value; }
50    }
51
52    public int TargetVariable {
53      get { return ProblemInjector.GetVariableValue<IntData>("TargetVariable", null, false).Data; }
54      set { ProblemInjector.GetVariableValue<IntData>("TargetVariable", null, false).Data = value; }
55    }
56
57    public IOperator ProblemInjector {
58      get {
59        IOperator main = GetMainOperator();
60        return main.SubOperators[1];
61      }
62      set {
63        IOperator main = GetMainOperator();
64        main.RemoveSubOperator(1);
65        main.AddSubOperator(value, 1);
66      }
67    }
68
69    public IModel Model {
70      get {
71        if (!engine.Terminated) throw new InvalidOperationException("The algorithm is still running. Wait until the algorithm is terminated to retrieve the result.");
72        IScope bestModelScope = engine.GlobalScope;
73        return CreateLRModel(bestModelScope);
74      }
75    }
76
77    public LinearRegression() {
78      engine = new SequentialEngine.SequentialEngine();
79      CombinedOperator algo = CreateAlgorithm();
80      engine.OperatorGraph.AddOperator(algo);
81      engine.OperatorGraph.InitialOperator = algo;
82    }
83
84    private CombinedOperator CreateAlgorithm() {
85      CombinedOperator algo = new CombinedOperator();
86      SequentialProcessor seq = new SequentialProcessor();
87      algo.Name = "LinearRegression";
88      seq.Name = "LinearRegression";
89
90      IOperator globalInjector = CreateGlobalInjector();
91      ProblemInjector problemInjector = new ProblemInjector();
92      LinearRegressionOperator lrOperator = new LinearRegressionOperator();
93      lrOperator.GetVariableInfo("SamplesStart").ActualName = "TrainingSamplesStart";
94      lrOperator.GetVariableInfo("SamplesEnd").ActualName = "TrainingSamplesEnd";
95
96
97      seq.AddSubOperator(globalInjector);
98      seq.AddSubOperator(problemInjector);
99      seq.AddSubOperator(lrOperator);
100      seq.AddSubOperator(CreateModelAnalyser());
101
102
103      algo.OperatorGraph.InitialOperator = seq;
104      algo.OperatorGraph.AddOperator(seq);
105
106      return algo;
107    }
108
109    private IOperator CreateGlobalInjector() {
110      VariableInjector injector = new VariableInjector();
111      injector.AddVariable(new HeuristicLab.Core.Variable("PunishmentFactor", new DoubleData(10)));
112      injector.AddVariable(new HeuristicLab.Core.Variable("TotalEvaluatedNodes", new DoubleData(0)));
113      injector.AddVariable(new HeuristicLab.Core.Variable("TreeEvaluator", new HL2TreeEvaluator()));
114      injector.AddVariable(new HeuristicLab.Core.Variable("UseEstimatedTargetValue", new BoolData(false)));
115
116      return injector;
117    }
118
119    private IOperator CreateModelAnalyser() {
120      CombinedOperator modelAnalyser = new CombinedOperator();
121      modelAnalyser.Name = "Model Analyzer";
122      SequentialProcessor seqProc = new SequentialProcessor();
123      #region MSE
124      MeanSquaredErrorEvaluator trainingMSE = new MeanSquaredErrorEvaluator();
125      trainingMSE.Name = "TrainingMseEvaluator";
126      trainingMSE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
127      trainingMSE.GetVariableInfo("MSE").ActualName = "TrainingQuality";
128      trainingMSE.GetVariableInfo("SamplesStart").ActualName = "ValidationSamplesStart";
129      trainingMSE.GetVariableInfo("SamplesEnd").ActualName = "ValidationSamplesEnd";
130      MeanSquaredErrorEvaluator validationMSE = new MeanSquaredErrorEvaluator();
131      validationMSE.Name = "ValidationMseEvaluator";
132      validationMSE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
133      validationMSE.GetVariableInfo("MSE").ActualName = "ValidationQuality";
134      validationMSE.GetVariableInfo("SamplesStart").ActualName = "ValidationSamplesStart";
135      validationMSE.GetVariableInfo("SamplesEnd").ActualName = "ValidationSamplesEnd";
136      MeanSquaredErrorEvaluator testMSE = new MeanSquaredErrorEvaluator();
137      testMSE.Name = "TestMseEvaluator";
138      testMSE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
139      testMSE.GetVariableInfo("MSE").ActualName = "TestQuality";
140      testMSE.GetVariableInfo("SamplesStart").ActualName = "TestSamplesStart";
141      testMSE.GetVariableInfo("SamplesEnd").ActualName = "TestSamplesEnd";
142      #endregion
143     
144      #region R2
145      CoefficientOfDeterminationEvaluator trainingR2 = new CoefficientOfDeterminationEvaluator();
146      trainingR2.Name = "TrainingR2Evaluator";
147      trainingR2.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
148      trainingR2.GetVariableInfo("R2").ActualName = "TrainingR2";
149      trainingR2.GetVariableInfo("SamplesStart").ActualName = "TrainingSamplesStart";
150      trainingR2.GetVariableInfo("SamplesEnd").ActualName = "TrainingSamplesEnd";
151      CoefficientOfDeterminationEvaluator validationR2 = new CoefficientOfDeterminationEvaluator();
152      validationR2.Name = "ValidationR2Evaluator";
153      validationR2.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
154      validationR2.GetVariableInfo("R2").ActualName = "ValidationR2";
155      validationR2.GetVariableInfo("SamplesStart").ActualName = "ValidationSamplesStart";
156      validationR2.GetVariableInfo("SamplesEnd").ActualName = "ValidationSamplesEnd";
157      CoefficientOfDeterminationEvaluator testR2 = new CoefficientOfDeterminationEvaluator();
158      testR2.Name = "TestR2Evaluator";
159      testR2.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
160      testR2.GetVariableInfo("R2").ActualName = "TestR2";
161      testR2.GetVariableInfo("SamplesStart").ActualName = "TestSamplesStart";
162      testR2.GetVariableInfo("SamplesEnd").ActualName = "TestSamplesEnd";
163      #endregion
164
165      #region MAPE
166      MeanAbsolutePercentageErrorEvaluator trainingMAPE = new MeanAbsolutePercentageErrorEvaluator();
167      trainingMAPE.Name = "TrainingMapeEvaluator";
168      trainingMAPE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
169      trainingMAPE.GetVariableInfo("MAPE").ActualName = "TrainingMAPE";
170      trainingMAPE.GetVariableInfo("SamplesStart").ActualName = "TrainingSamplesStart";
171      trainingMAPE.GetVariableInfo("SamplesEnd").ActualName = "TrainingSamplesEnd";
172      MeanAbsolutePercentageErrorEvaluator validationMAPE = new MeanAbsolutePercentageErrorEvaluator();
173      validationMAPE.Name = "ValidationMapeEvaluator";
174      validationMAPE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
175      validationMAPE.GetVariableInfo("MAPE").ActualName = "ValidationMAPE";
176      validationMAPE.GetVariableInfo("SamplesStart").ActualName = "ValidationSamplesStart";
177      validationMAPE.GetVariableInfo("SamplesEnd").ActualName = "ValidationSamplesEnd";
178      MeanAbsolutePercentageErrorEvaluator testMAPE = new MeanAbsolutePercentageErrorEvaluator();
179      testMAPE.Name = "TestMapeEvaluator";
180      testMAPE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
181      testMAPE.GetVariableInfo("MAPE").ActualName = "TestMAPE";
182      testMAPE.GetVariableInfo("SamplesStart").ActualName = "TestSamplesStart";
183      testMAPE.GetVariableInfo("SamplesEnd").ActualName = "TestSamplesEnd";
184      #endregion
185
186      #region MAPRE
187      MeanAbsolutePercentageOfRangeErrorEvaluator trainingMAPRE = new MeanAbsolutePercentageOfRangeErrorEvaluator();
188      trainingMAPRE.Name = "TrainingMapreEvaluator";
189      trainingMAPRE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
190      trainingMAPRE.GetVariableInfo("MAPRE").ActualName = "TrainingMAPRE";
191      trainingMAPRE.GetVariableInfo("SamplesStart").ActualName = "TrainingSamplesStart";
192      trainingMAPRE.GetVariableInfo("SamplesEnd").ActualName = "TrainingSamplesEnd";
193      MeanAbsolutePercentageOfRangeErrorEvaluator validationMAPRE = new MeanAbsolutePercentageOfRangeErrorEvaluator();
194      validationMAPRE.Name = "ValidationMapreEvaluator";
195      validationMAPRE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
196      validationMAPRE.GetVariableInfo("MAPRE").ActualName = "ValidationMAPRE";
197      validationMAPRE.GetVariableInfo("SamplesStart").ActualName = "ValidationSamplesStart";
198      validationMAPRE.GetVariableInfo("SamplesEnd").ActualName = "ValidationSamplesEnd";
199      MeanAbsolutePercentageOfRangeErrorEvaluator testMAPRE = new MeanAbsolutePercentageOfRangeErrorEvaluator();
200      testMAPRE.Name = "TestMapreEvaluator";
201      testMAPRE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
202      testMAPRE.GetVariableInfo("MAPRE").ActualName = "TestMAPRE";
203      testMAPRE.GetVariableInfo("SamplesStart").ActualName = "TestSamplesStart";
204      testMAPRE.GetVariableInfo("SamplesEnd").ActualName = "TestSamplesEnd";
205      #endregion
206
207      #region VAF
208      VarianceAccountedForEvaluator trainingVAF = new VarianceAccountedForEvaluator();
209      trainingVAF.Name = "TrainingVafEvaluator";
210      trainingVAF.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
211      trainingVAF.GetVariableInfo("VAF").ActualName = "TrainingVAF";
212      trainingVAF.GetVariableInfo("SamplesStart").ActualName = "TrainingSamplesStart";
213      trainingVAF.GetVariableInfo("SamplesEnd").ActualName = "TrainingSamplesEnd";
214      VarianceAccountedForEvaluator validationVAF = new VarianceAccountedForEvaluator();
215      validationVAF.Name = "ValidationVafEvaluator";
216      validationVAF.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
217      validationVAF.GetVariableInfo("VAF").ActualName = "ValidationVAF";
218      validationVAF.GetVariableInfo("SamplesStart").ActualName = "ValidationSamplesStart";
219      validationVAF.GetVariableInfo("SamplesEnd").ActualName = "ValidationSamplesEnd";
220      VarianceAccountedForEvaluator testVAF = new VarianceAccountedForEvaluator();
221      testVAF.Name = "TestVafEvaluator";
222      testVAF.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
223      testVAF.GetVariableInfo("VAF").ActualName = "TestVAF";
224      testVAF.GetVariableInfo("SamplesStart").ActualName = "TestSamplesStart";
225      testVAF.GetVariableInfo("SamplesEnd").ActualName = "TestSamplesEnd";
226      #endregion
227
228      HeuristicLab.GP.StructureIdentification.VariableEvaluationImpactCalculator evalImpactCalc = new HeuristicLab.GP.StructureIdentification.VariableEvaluationImpactCalculator();
229      evalImpactCalc.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
230      HeuristicLab.Modeling.VariableQualityImpactCalculator qualImpactCalc = new HeuristicLab.GP.StructureIdentification.VariableQualityImpactCalculator();
231      qualImpactCalc.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
232      seqProc.AddSubOperator(trainingMSE);
233      seqProc.AddSubOperator(validationMSE);
234      seqProc.AddSubOperator(testMSE);
235      seqProc.AddSubOperator(trainingR2);
236      seqProc.AddSubOperator(validationR2);
237      seqProc.AddSubOperator(testR2);
238      seqProc.AddSubOperator(trainingMAPE);
239      seqProc.AddSubOperator(validationMAPE);
240      seqProc.AddSubOperator(testMAPE);
241      seqProc.AddSubOperator(trainingMAPRE);
242      seqProc.AddSubOperator(validationMAPRE);
243      seqProc.AddSubOperator(testMAPRE);
244      seqProc.AddSubOperator(trainingVAF);
245      seqProc.AddSubOperator(validationVAF);
246      seqProc.AddSubOperator(testVAF);
247      seqProc.AddSubOperator(qualImpactCalc);
248      seqProc.AddSubOperator(evalImpactCalc);
249      modelAnalyser.OperatorGraph.InitialOperator = seqProc;
250      modelAnalyser.OperatorGraph.AddOperator(seqProc);
251      return modelAnalyser;
252    }
253
254
255    protected internal virtual Model CreateLRModel(IScope bestModelScope) {
256      Model model = new Model();
257      model.TrainingMeanSquaredError = bestModelScope.GetVariableValue<DoubleData>("TrainingQuality", false).Data;
258      model.ValidationMeanSquaredError = bestModelScope.GetVariableValue<DoubleData>("ValidationQuality", false).Data;
259      model.TestMeanSquaredError = bestModelScope.GetVariableValue<DoubleData>("TestQuality", false).Data;
260      model.TrainingCoefficientOfDetermination = bestModelScope.GetVariableValue<DoubleData>("TrainingR2", false).Data;
261      model.ValidationCoefficientOfDetermination = bestModelScope.GetVariableValue<DoubleData>("ValidationR2", false).Data;
262      model.TestCoefficientOfDetermination = bestModelScope.GetVariableValue<DoubleData>("TestR2", false).Data;
263      model.TrainingMeanAbsolutePercentageError = bestModelScope.GetVariableValue<DoubleData>("TrainingMAPE", false).Data;
264      model.ValidationMeanAbsolutePercentageError = bestModelScope.GetVariableValue<DoubleData>("ValidationMAPE", false).Data;
265      model.TestMeanAbsolutePercentageError = bestModelScope.GetVariableValue<DoubleData>("TestMAPE", false).Data;
266      model.TrainingMeanAbsolutePercentageOfRangeError = bestModelScope.GetVariableValue<DoubleData>("TrainingMAPRE", false).Data;
267      model.ValidationMeanAbsolutePercentageOfRangeError = bestModelScope.GetVariableValue<DoubleData>("ValidationMAPRE", false).Data;
268      model.TestMeanAbsolutePercentageOfRangeError = bestModelScope.GetVariableValue<DoubleData>("TestMAPRE", false).Data;
269      model.TrainingVarianceAccountedFor = bestModelScope.GetVariableValue<DoubleData>("TrainingVAF", false).Data;
270      model.ValidationVarianceAccountedFor = bestModelScope.GetVariableValue<DoubleData>("ValidationVAF", false).Data;
271      model.TestVarianceAccountedFor = bestModelScope.GetVariableValue<DoubleData>("TestVAF", false).Data;
272
273      model.Data = bestModelScope.GetVariableValue<IFunctionTree>("LinearRegressionModel", false);
274      HeuristicLab.DataAnalysis.Dataset ds = bestModelScope.GetVariableValue<Dataset>("Dataset", true);
275      model.Dataset = ds;
276      model.TargetVariable = ds.GetVariableName(bestModelScope.GetVariableValue<IntData>("TargetVariable", true).Data);
277
278      ItemList evaluationImpacts = bestModelScope.GetVariableValue<ItemList>("VariableEvaluationImpacts", false);
279      ItemList qualityImpacts = bestModelScope.GetVariableValue<ItemList>("VariableQualityImpacts", false);
280      foreach (ItemList row in evaluationImpacts) {
281        string variableName = ((StringData)row[0]).Data;
282        double impact = ((DoubleData)row[1]).Data;
283        model.SetVariableEvaluationImpact(variableName, impact);
284      }
285      foreach (ItemList row in qualityImpacts) {
286        string variableName = ((StringData)row[0]).Data;
287        double impact = ((DoubleData)row[1]).Data;
288        model.SetVariableQualityImpact(variableName, impact);
289      }
290
291      return model;
292    }
293
294    private IOperator GetMainOperator() {
295      CombinedOperator lr = (CombinedOperator)Engine.OperatorGraph.InitialOperator;
296      return lr.OperatorGraph.InitialOperator;
297    }
298
299    public override IView CreateView() {
300      return engine.CreateView();
301    }
302
303    #region IEditable Members
304
305    public IEditor CreateEditor() {
306      return engine.CreateEditor();
307    }
308
309    #endregion
310  }
311}
Note: See TracBrowser for help on using the repository browser.