Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.LinearRegression/3.2/LinearRegression.cs @ 2242

Last change on this file since 2242 was 2242, checked in by gkronber, 15 years ago

Changed variable impact calculation operators to use the actual training sub-set instead of the full training set. #717 (SVM regression engine takes a very long time to finish)

File size: 17.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Text;
26using HeuristicLab.Core;
27using System.Xml;
28using System.Diagnostics;
29using HeuristicLab.DataAnalysis;
30using HeuristicLab.Data;
31using HeuristicLab.Operators;
32using HeuristicLab.GP.StructureIdentification;
33using HeuristicLab.Modeling;
34using HeuristicLab.GP;
35using HeuristicLab.Random;
36using HeuristicLab.GP.Interfaces;
37
38namespace HeuristicLab.LinearRegression {
39  public class LinearRegression : ItemBase, IEditable, IAlgorithm {
40
41    public string Name { get { return "LinearRegression"; } }
42    public string Description { get { return "TODO"; } }
43
44    private SequentialEngine.SequentialEngine engine;
45    public IEngine Engine {
46      get { return engine; }
47    }
48
49    public Dataset Dataset {
50      get { return ProblemInjector.GetVariableValue<Dataset>("Dataset", null, false); }
51      set { ProblemInjector.GetVariable("Dataset").Value = value; }
52    }
53
54    public int TargetVariable {
55      get { return ProblemInjector.GetVariableValue<IntData>("TargetVariable", null, false).Data; }
56      set { ProblemInjector.GetVariableValue<IntData>("TargetVariable", null, false).Data = value; }
57    }
58
59    public IOperator ProblemInjector {
60      get {
61        IOperator main = GetMainOperator();
62        return main.SubOperators[1];
63      }
64      set {
65        IOperator main = GetMainOperator();
66        main.RemoveSubOperator(1);
67        main.AddSubOperator(value, 1);
68      }
69    }
70
71    public IModel Model {
72      get {
73        if (!engine.Terminated) throw new InvalidOperationException("The algorithm is still running. Wait until the algorithm is terminated to retrieve the result.");
74        IScope bestModelScope = engine.GlobalScope;
75        return CreateLRModel(bestModelScope);
76      }
77    }
78
79    public LinearRegression() {
80      engine = new SequentialEngine.SequentialEngine();
81      CombinedOperator algo = CreateAlgorithm();
82      engine.OperatorGraph.AddOperator(algo);
83      engine.OperatorGraph.InitialOperator = algo;
84    }
85
86    private CombinedOperator CreateAlgorithm() {
87      CombinedOperator algo = new CombinedOperator();
88      SequentialProcessor seq = new SequentialProcessor();
89      algo.Name = "LinearRegression";
90      seq.Name = "LinearRegression";
91
92      var randomInjector = new RandomInjector();
93      randomInjector.Name = "Random Injector";
94      IOperator globalInjector = CreateGlobalInjector();
95      ProblemInjector problemInjector = new ProblemInjector();
96      problemInjector.GetVariableInfo("MaxNumberOfTrainingSamples").Local = true;
97      problemInjector.AddVariable(new HeuristicLab.Core.Variable("MaxNumberOfTrainingSamples", new IntData(5000)));
98
99      IOperator shuffler = new DatasetShuffler();
100      shuffler.GetVariableInfo("ShuffleStart").ActualName = "TrainingSamplesStart";
101      shuffler.GetVariableInfo("ShuffleEnd").ActualName = "TrainingSamplesEnd";
102
103      LinearRegressionOperator lrOperator = new LinearRegressionOperator();
104      lrOperator.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart";
105      lrOperator.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd";
106
107      seq.AddSubOperator(randomInjector);
108      seq.AddSubOperator(problemInjector);
109      seq.AddSubOperator(globalInjector);
110      seq.AddSubOperator(shuffler);
111      seq.AddSubOperator(lrOperator);
112      seq.AddSubOperator(CreateModelAnalyser());
113
114      algo.OperatorGraph.InitialOperator = seq;
115      algo.OperatorGraph.AddOperator(seq);
116
117      return algo;
118    }
119
120    private IOperator CreateGlobalInjector() {
121      VariableInjector injector = new VariableInjector();
122      injector.AddVariable(new HeuristicLab.Core.Variable("PunishmentFactor", new DoubleData(10)));
123      injector.AddVariable(new HeuristicLab.Core.Variable("TotalEvaluatedNodes", new DoubleData(0)));
124      injector.AddVariable(new HeuristicLab.Core.Variable("TreeEvaluator", new HL2TreeEvaluator()));
125      injector.AddVariable(new HeuristicLab.Core.Variable("UseEstimatedTargetValue", new BoolData(false)));
126
127      return injector;
128    }
129
130    private IOperator CreateModelAnalyser() {
131      CombinedOperator modelAnalyser = new CombinedOperator();
132      modelAnalyser.Name = "Model Analyzer";
133      SequentialProcessor seqProc = new SequentialProcessor();
134      #region MSE
135      MeanSquaredErrorEvaluator trainingMSE = new MeanSquaredErrorEvaluator();
136      trainingMSE.Name = "TrainingMseEvaluator";
137      trainingMSE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
138      trainingMSE.GetVariableInfo("MSE").ActualName = "TrainingQuality";
139      trainingMSE.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart";
140      trainingMSE.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd";
141      MeanSquaredErrorEvaluator validationMSE = new MeanSquaredErrorEvaluator();
142      validationMSE.Name = "ValidationMseEvaluator";
143      validationMSE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
144      validationMSE.GetVariableInfo("MSE").ActualName = "ValidationQuality";
145      validationMSE.GetVariableInfo("SamplesStart").ActualName = "ValidationSamplesStart";
146      validationMSE.GetVariableInfo("SamplesEnd").ActualName = "ValidationSamplesEnd";
147      MeanSquaredErrorEvaluator testMSE = new MeanSquaredErrorEvaluator();
148      testMSE.Name = "TestMseEvaluator";
149      testMSE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
150      testMSE.GetVariableInfo("MSE").ActualName = "TestQuality";
151      testMSE.GetVariableInfo("SamplesStart").ActualName = "TestSamplesStart";
152      testMSE.GetVariableInfo("SamplesEnd").ActualName = "TestSamplesEnd";
153      #endregion
154     
155      #region R2
156      CoefficientOfDeterminationEvaluator trainingR2 = new CoefficientOfDeterminationEvaluator();
157      trainingR2.Name = "TrainingR2Evaluator";
158      trainingR2.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
159      trainingR2.GetVariableInfo("R2").ActualName = "TrainingR2";
160      trainingR2.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart";
161      trainingR2.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd";
162      CoefficientOfDeterminationEvaluator validationR2 = new CoefficientOfDeterminationEvaluator();
163      validationR2.Name = "ValidationR2Evaluator";
164      validationR2.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
165      validationR2.GetVariableInfo("R2").ActualName = "ValidationR2";
166      validationR2.GetVariableInfo("SamplesStart").ActualName = "ValidationSamplesStart";
167      validationR2.GetVariableInfo("SamplesEnd").ActualName = "ValidationSamplesEnd";
168      CoefficientOfDeterminationEvaluator testR2 = new CoefficientOfDeterminationEvaluator();
169      testR2.Name = "TestR2Evaluator";
170      testR2.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
171      testR2.GetVariableInfo("R2").ActualName = "TestR2";
172      testR2.GetVariableInfo("SamplesStart").ActualName = "TestSamplesStart";
173      testR2.GetVariableInfo("SamplesEnd").ActualName = "TestSamplesEnd";
174      #endregion
175
176      #region MAPE
177      MeanAbsolutePercentageErrorEvaluator trainingMAPE = new MeanAbsolutePercentageErrorEvaluator();
178      trainingMAPE.Name = "TrainingMapeEvaluator";
179      trainingMAPE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
180      trainingMAPE.GetVariableInfo("MAPE").ActualName = "TrainingMAPE";
181      trainingMAPE.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart";
182      trainingMAPE.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd";
183      MeanAbsolutePercentageErrorEvaluator validationMAPE = new MeanAbsolutePercentageErrorEvaluator();
184      validationMAPE.Name = "ValidationMapeEvaluator";
185      validationMAPE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
186      validationMAPE.GetVariableInfo("MAPE").ActualName = "ValidationMAPE";
187      validationMAPE.GetVariableInfo("SamplesStart").ActualName = "ValidationSamplesStart";
188      validationMAPE.GetVariableInfo("SamplesEnd").ActualName = "ValidationSamplesEnd";
189      MeanAbsolutePercentageErrorEvaluator testMAPE = new MeanAbsolutePercentageErrorEvaluator();
190      testMAPE.Name = "TestMapeEvaluator";
191      testMAPE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
192      testMAPE.GetVariableInfo("MAPE").ActualName = "TestMAPE";
193      testMAPE.GetVariableInfo("SamplesStart").ActualName = "TestSamplesStart";
194      testMAPE.GetVariableInfo("SamplesEnd").ActualName = "TestSamplesEnd";
195      #endregion
196
197      #region MAPRE
198      MeanAbsolutePercentageOfRangeErrorEvaluator trainingMAPRE = new MeanAbsolutePercentageOfRangeErrorEvaluator();
199      trainingMAPRE.Name = "TrainingMapreEvaluator";
200      trainingMAPRE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
201      trainingMAPRE.GetVariableInfo("MAPRE").ActualName = "TrainingMAPRE";
202      trainingMAPRE.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart";
203      trainingMAPRE.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd";
204      MeanAbsolutePercentageOfRangeErrorEvaluator validationMAPRE = new MeanAbsolutePercentageOfRangeErrorEvaluator();
205      validationMAPRE.Name = "ValidationMapreEvaluator";
206      validationMAPRE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
207      validationMAPRE.GetVariableInfo("MAPRE").ActualName = "ValidationMAPRE";
208      validationMAPRE.GetVariableInfo("SamplesStart").ActualName = "ValidationSamplesStart";
209      validationMAPRE.GetVariableInfo("SamplesEnd").ActualName = "ValidationSamplesEnd";
210      MeanAbsolutePercentageOfRangeErrorEvaluator testMAPRE = new MeanAbsolutePercentageOfRangeErrorEvaluator();
211      testMAPRE.Name = "TestMapreEvaluator";
212      testMAPRE.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
213      testMAPRE.GetVariableInfo("MAPRE").ActualName = "TestMAPRE";
214      testMAPRE.GetVariableInfo("SamplesStart").ActualName = "TestSamplesStart";
215      testMAPRE.GetVariableInfo("SamplesEnd").ActualName = "TestSamplesEnd";
216      #endregion
217
218      #region VAF
219      VarianceAccountedForEvaluator trainingVAF = new VarianceAccountedForEvaluator();
220      trainingVAF.Name = "TrainingVafEvaluator";
221      trainingVAF.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
222      trainingVAF.GetVariableInfo("VAF").ActualName = "TrainingVAF";
223      trainingVAF.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart";
224      trainingVAF.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd";
225      VarianceAccountedForEvaluator validationVAF = new VarianceAccountedForEvaluator();
226      validationVAF.Name = "ValidationVafEvaluator";
227      validationVAF.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
228      validationVAF.GetVariableInfo("VAF").ActualName = "ValidationVAF";
229      validationVAF.GetVariableInfo("SamplesStart").ActualName = "ValidationSamplesStart";
230      validationVAF.GetVariableInfo("SamplesEnd").ActualName = "ValidationSamplesEnd";
231      VarianceAccountedForEvaluator testVAF = new VarianceAccountedForEvaluator();
232      testVAF.Name = "TestVafEvaluator";
233      testVAF.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
234      testVAF.GetVariableInfo("VAF").ActualName = "TestVAF";
235      testVAF.GetVariableInfo("SamplesStart").ActualName = "TestSamplesStart";
236      testVAF.GetVariableInfo("SamplesEnd").ActualName = "TestSamplesEnd";
237      #endregion
238
239      HeuristicLab.GP.StructureIdentification.VariableEvaluationImpactCalculator evalImpactCalc = new HeuristicLab.GP.StructureIdentification.VariableEvaluationImpactCalculator();
240      evalImpactCalc.GetVariableInfo("TrainingSamplesStart").ActualName = "ActualTrainingSamplesStart";
241      evalImpactCalc.GetVariableInfo("TrainingSamplesEnd").ActualName = "ActualTrainingSamplesEnd";
242      evalImpactCalc.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
243      HeuristicLab.Modeling.VariableQualityImpactCalculator qualImpactCalc = new HeuristicLab.GP.StructureIdentification.VariableQualityImpactCalculator();
244      qualImpactCalc.GetVariableInfo("TrainingSamplesStart").ActualName = "ActualTrainingSamplesStart";
245      qualImpactCalc.GetVariableInfo("TrainingSamplesEnd").ActualName = "ActualTrainingSamplesEnd";
246      qualImpactCalc.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
247      seqProc.AddSubOperator(trainingMSE);
248      seqProc.AddSubOperator(validationMSE);
249      seqProc.AddSubOperator(testMSE);
250      seqProc.AddSubOperator(trainingR2);
251      seqProc.AddSubOperator(validationR2);
252      seqProc.AddSubOperator(testR2);
253      seqProc.AddSubOperator(trainingMAPE);
254      seqProc.AddSubOperator(validationMAPE);
255      seqProc.AddSubOperator(testMAPE);
256      seqProc.AddSubOperator(trainingMAPRE);
257      seqProc.AddSubOperator(validationMAPRE);
258      seqProc.AddSubOperator(testMAPRE);
259      seqProc.AddSubOperator(trainingVAF);
260      seqProc.AddSubOperator(validationVAF);
261      seqProc.AddSubOperator(testVAF);
262      seqProc.AddSubOperator(qualImpactCalc);
263      seqProc.AddSubOperator(evalImpactCalc);
264      modelAnalyser.OperatorGraph.InitialOperator = seqProc;
265      modelAnalyser.OperatorGraph.AddOperator(seqProc);
266      return modelAnalyser;
267    }
268
269
270    protected internal virtual Model CreateLRModel(IScope bestModelScope) {
271      Model model = new Model();
272      model.TrainingMeanSquaredError = bestModelScope.GetVariableValue<DoubleData>("TrainingQuality", false).Data;
273      model.ValidationMeanSquaredError = bestModelScope.GetVariableValue<DoubleData>("ValidationQuality", false).Data;
274      model.TestMeanSquaredError = bestModelScope.GetVariableValue<DoubleData>("TestQuality", false).Data;
275      model.TrainingCoefficientOfDetermination = bestModelScope.GetVariableValue<DoubleData>("TrainingR2", false).Data;
276      model.ValidationCoefficientOfDetermination = bestModelScope.GetVariableValue<DoubleData>("ValidationR2", false).Data;
277      model.TestCoefficientOfDetermination = bestModelScope.GetVariableValue<DoubleData>("TestR2", false).Data;
278      model.TrainingMeanAbsolutePercentageError = bestModelScope.GetVariableValue<DoubleData>("TrainingMAPE", false).Data;
279      model.ValidationMeanAbsolutePercentageError = bestModelScope.GetVariableValue<DoubleData>("ValidationMAPE", false).Data;
280      model.TestMeanAbsolutePercentageError = bestModelScope.GetVariableValue<DoubleData>("TestMAPE", false).Data;
281      model.TrainingMeanAbsolutePercentageOfRangeError = bestModelScope.GetVariableValue<DoubleData>("TrainingMAPRE", false).Data;
282      model.ValidationMeanAbsolutePercentageOfRangeError = bestModelScope.GetVariableValue<DoubleData>("ValidationMAPRE", false).Data;
283      model.TestMeanAbsolutePercentageOfRangeError = bestModelScope.GetVariableValue<DoubleData>("TestMAPRE", false).Data;
284      model.TrainingVarianceAccountedFor = bestModelScope.GetVariableValue<DoubleData>("TrainingVAF", false).Data;
285      model.ValidationVarianceAccountedFor = bestModelScope.GetVariableValue<DoubleData>("ValidationVAF", false).Data;
286      model.TestVarianceAccountedFor = bestModelScope.GetVariableValue<DoubleData>("TestVAF", false).Data;
287
288      model.Data = bestModelScope.GetVariableValue<IGeneticProgrammingModel>("LinearRegressionModel", false);
289      HeuristicLab.DataAnalysis.Dataset ds = bestModelScope.GetVariableValue<Dataset>("Dataset", true);
290      model.Dataset = ds;
291      model.TargetVariable = ds.GetVariableName(bestModelScope.GetVariableValue<IntData>("TargetVariable", true).Data);
292
293      ItemList evaluationImpacts = bestModelScope.GetVariableValue<ItemList>("VariableEvaluationImpacts", false);
294      ItemList qualityImpacts = bestModelScope.GetVariableValue<ItemList>("VariableQualityImpacts", false);
295      foreach (ItemList row in evaluationImpacts) {
296        string variableName = ((StringData)row[0]).Data;
297        double impact = ((DoubleData)row[1]).Data;
298        model.SetVariableEvaluationImpact(variableName, impact);
299        model.AddInputVariables(variableName);
300      }
301      foreach (ItemList row in qualityImpacts) {
302        string variableName = ((StringData)row[0]).Data;
303        double impact = ((DoubleData)row[1]).Data;
304        model.SetVariableQualityImpact(variableName, impact);
305        model.AddInputVariables(variableName);
306      }
307
308      return model;
309    }
310
311    private IOperator GetMainOperator() {
312      CombinedOperator lr = (CombinedOperator)Engine.OperatorGraph.InitialOperator;
313      return lr.OperatorGraph.InitialOperator;
314    }
315
316    public override IView CreateView() {
317      return engine.CreateView();
318    }
319
320    #region IEditable Members
321
322    public IEditor CreateEditor() {
323      return engine.CreateEditor();
324    }
325
326    #endregion
327  }
328}
Note: See TracBrowser for help on using the repository browser.