Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/LinearRegression/LinearRegressionSolutionCreator.cs @ 5698

Last change on this file since 5698 was 5275, checked in by gkronber, 14 years ago

Merged changes from trunk to data analysis exploration branch and added fractional distance metric evaluator. #1142

File size: 7.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Symbols;
30using HeuristicLab.Operators;
31using HeuristicLab.Optimization;
32using HeuristicLab.Parameters;
33using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
34using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
35
36namespace HeuristicLab.Problems.DataAnalysis.Regression.LinearRegression {
37  /// <summary>
38  /// A base class for operators which evaluates OneMax solutions given in BinaryVector encoding.
39  /// </summary>
40  [Item("LinearRegressionSolutionCreator", "Uses linear regression to create a structure tree.")]
41  [StorableClass]
42  public sealed class LinearRegressionSolutionCreator : SingleSuccessorOperator, ISolutionCreator {
43    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
44    private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
45    private const string SamplesStartParameterName = "SamplesStart";
46    private const string SamplesEndParameterName = "SamplesEnd";
47
48    [StorableConstructor]
49    private LinearRegressionSolutionCreator(bool deserializing) : base(deserializing) { }
50    private LinearRegressionSolutionCreator(LinearRegressionSolutionCreator original, Cloner cloner) : base(original, cloner) { }
51    public LinearRegressionSolutionCreator() {
52      Parameters.Add(new LookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The resulting solution encoded as a symbolic expression tree."));
53      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The problem data on which the linear regression should be calculated."));
54      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesStartParameterName, "The start of the samples on which the linear regression should be applied."));
55      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesEndParameterName, "The end of the samples on which the linear regression should be applied."));
56    }
57
58    public override IDeepCloneable Clone(Cloner cloner) {
59      return new LinearRegressionSolutionCreator(this, cloner);
60    }
61
62    #region parameter properties
63    public ILookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
64      get { return (ILookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
65    }
66    public SymbolicExpressionTree SymbolicExpressionTree {
67      get { return SymbolicExpressionTreeParameter.ActualValue; }
68      set { SymbolicExpressionTreeParameter.ActualValue = value; }
69    }
70
71    public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter {
72      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; }
73    }
74    public DataAnalysisProblemData DataAnalysisProblemData {
75      get { return DataAnalysisProblemDataParameter.ActualValue; }
76      set { DataAnalysisProblemDataParameter.ActualValue = value; }
77    }
78
79    public IValueLookupParameter<IntValue> SamplesStartParameter {
80      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesStartParameterName]; }
81    }
82    public IntValue SamplesStart {
83      get { return SamplesStartParameter.ActualValue; }
84      set { SamplesStartParameter.ActualValue = value; }
85    }
86
87    public IValueLookupParameter<IntValue> SamplesEndParameter {
88      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesEndParameterName]; }
89    }
90    public IntValue SamplesEnd {
91      get { return SamplesEndParameter.ActualValue; }
92      set { SamplesEndParameter.ActualValue = value; }
93    }
94    #endregion
95
96
97    public override IOperation Apply() {
98      double rmsError, cvRmsError;
99      SymbolicExpressionTree = CreateSymbolicExpressionTree(DataAnalysisProblemData.Dataset, DataAnalysisProblemData.TargetVariable.Value, DataAnalysisProblemData.InputVariables.CheckedItems.Select(x => x.Value.Value), SamplesStart.Value, SamplesEnd.Value, out rmsError, out cvRmsError);
100      return base.Apply();
101    }
102
103    public static SymbolicExpressionTree CreateSymbolicExpressionTree(Dataset dataset, string targetVariable, IEnumerable<string> allowedInputVariables, int start, int end, out double rmsError, out double cvRmsError) {
104      double[,] inputMatrix = LinearRegressionUtil.PrepareInputMatrix(dataset, targetVariable, allowedInputVariables, start, end);
105
106      alglib.linreg.linearmodel lm = new alglib.linreg.linearmodel();
107      alglib.linreg.lrreport ar = new alglib.linreg.lrreport();
108      int nRows = inputMatrix.GetLength(0);
109      int nFeatures = inputMatrix.GetLength(1) - 1;
110      double[] coefficients = new double[nFeatures + 1]; //last coefficient is for the constant
111
112      int retVal = 1;
113      alglib.linreg.lrbuild(inputMatrix, nRows, nFeatures, ref retVal, lm, ar);
114      if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression model");
115      rmsError = ar.rmserror;
116      cvRmsError = ar.cvrmserror;
117
118      for (int i = 0; i < nFeatures + 1; i++)
119        coefficients[i] = lm.w[i + 4];
120
121      SymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
122      SymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();
123      tree.Root.AddSubTree(startNode);
124      SymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();
125      startNode.AddSubTree(addition);
126
127      int col = 0;
128      foreach (string column in allowedInputVariables) {
129        VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols.Variable().CreateTreeNode();
130        vNode.VariableName = column;
131        vNode.Weight = coefficients[col];
132        addition.AddSubTree(vNode);
133        col++;
134      }
135
136      ConstantTreeNode cNode = (ConstantTreeNode)new Constant().CreateTreeNode();
137      cNode.Value = coefficients[coefficients.Length - 1];
138      addition.AddSubTree(cNode);
139
140      return tree;
141    }
142  }
143}
Note: See TracBrowser for help on using the repository browser.