Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
05/18/10 23:25:23 (15 years ago)
Author:
mkommend
Message:

updated LinearRegressionSolutionCreator (ticket #1012)

Location:
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/HeuristicLab.Problems.DataAnalysis.Regression-3.3.csproj

    r3842 r3848  
    8686    <Compile Include="HeuristicLabProblemsDataAnalysisRegressionPlugin.cs" />
    8787    <Compile Include="LinearRegression\LinearRegressionSolutionCreator.cs" />
     88    <Compile Include="LinearRegression\LinearRegressionUtil.cs" />
    8889    <Compile Include="Properties\AssemblyInfo.cs" />
    8990    <Compile Include="SupportVectorRegression\BestSupportVectorRegressionSolutionAnalyzer.cs" />
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/LinearRegression/LinearRegressionSolutionCreator.cs

    r3846 r3848  
    3333using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Symbols;
    3434using HeuristicLab.Parameters;
     35using HeuristicLab.Data;
    3536
    3637namespace HeuristicLab.Problems.DataAnalysis.Regression.LinearRegression {
     
    4344    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
    4445    private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
     46    private const string SamplesStartParameterName = "SamplesStart";
     47    private const string SamplesEndParameterName = "SamplesEnd";
    4548
    4649    public LinearRegressionSolutionCreator() {
    4750      Parameters.Add(new LookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The resulting solution encoded as a symbolic expression tree."));
    4851      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The problem data on which the linear regression should be calculated."));
     52      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesStartParameterName,"The start of the samples on which the linear regression should be applied."));
     53      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesEndParameterName,"The end of the samples on which the linear regression should be applied."));
    4954    }
    5055    [StorableConstructor]
     
    6974      set { DataAnalysisProblemDataParameter.ActualValue = value; }
    7075    }
     76
     77    public IValueLookupParameter<IntValue> SamplesStartParameter {
     78      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesStartParameterName]; }
     79    }
     80    public IntValue SamplesStart {
     81      get { return SamplesStartParameter.ActualValue; }
     82      set { SamplesStartParameter.ActualValue = value; }
     83    }
     84
     85    public IValueLookupParameter<IntValue> SamplesEndParameter {
     86      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesEndParameterName]; }
     87    }
     88    public IntValue SamplesEnd {
     89      get { return SamplesEndParameter.ActualValue; }
     90      set { SamplesEndParameter.ActualValue = value; }
     91    }
    7192    #endregion
    7293
    7394
    7495    public override IOperation Apply() {
    75       SymbolicExpressionTree = CreateSymbolicExpressionTree(DataAnalysisProblemData);
     96      SymbolicExpressionTree = CreateSymbolicExpressionTree(DataAnalysisProblemData.Dataset,DataAnalysisProblemData.TargetVariable.Value, DataAnalysisProblemData.InputVariables.CheckedItems.Select(x => x.Value.Value), SamplesStart.Value, SamplesEnd.Value);
    7697      return base.Apply();
    7798    }
    7899
    79     public static SymbolicExpressionTree CreateSymbolicExpressionTree(DataAnalysisProblemData problem) {
    80       List<int> allowedRows = CalculateAllowedRows(problem);
    81       double[,] inputMatrix = PrepareInputMatrix(problem, allowedRows);
     100    public static SymbolicExpressionTree CreateSymbolicExpressionTree(Dataset dataset, string targetVariable, IEnumerable<string> allowedInputVariables, int start, int end) {
     101      double[,] inputMatrix = LinearRegressionUtil.PrepareInputMatrix(dataset, targetVariable, allowedInputVariables, start, end);
    82102
    83103      alglib.linreg.linearmodel lm = new alglib.linreg.linearmodel();
    84104      alglib.linreg.lrreport ar = new alglib.linreg.lrreport();
    85105      int nRows = inputMatrix.GetLength(0);
    86       int nFeatures = inputMatrix.GetLength(1) -1;
    87       double[] coefficients = new double[nFeatures +1]; //last coefficient is for the constant
     106      int nFeatures = inputMatrix.GetLength(1) - 1;
     107      double[] coefficients = new double[nFeatures + 1]; //last coefficient is for the constant
    88108
    89109      int retVal = 1;
     
    91111      if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression model");
    92112
    93       for (int i = 0; i < nFeatures+1; i++)
     113      for (int i = 0; i < nFeatures + 1; i++)
    94114        coefficients[i] = lm.w[i + 4];
    95115
    96116      SymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
    97       SymbolicExpressionTreeNode start = new StartSymbol().CreateTreeNode();
    98       tree.Root.AddSubTree(start);
     117      SymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();
     118      tree.Root.AddSubTree(startNode);
    99119      SymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();
    100       start.AddSubTree(addition);
     120      startNode.AddSubTree(addition);
    101121
    102122      int col = 0;
    103       foreach (string column in problem.InputVariables.CheckedItems.Select(c => c.Value.Value)) {
     123      foreach (string column in allowedInputVariables) {
    104124        VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols.Variable().CreateTreeNode();
    105125        vNode.VariableName = column;
     
    111131      ConstantTreeNode cNode = (ConstantTreeNode)new Constant().CreateTreeNode();
    112132      cNode.Value = coefficients[coefficients.Length - 1];
     133      addition.AddSubTree(cNode);
    113134
    114135      return tree;
    115136    }
    116 
    117     private static List<int> CalculateAllowedRows(DataAnalysisProblemData problem) {
    118       List<int> allowedRows = new List<int>();
    119       bool add = false;
    120 
    121       for (int row = problem.TrainingSamplesStart.Value; row < problem.TrainingSamplesEnd.Value; row++) {
    122         add = true;
    123         foreach (string column in problem.InputVariables.CheckedItems.Select(c => c.Value.Value)) {
    124           double value = problem.Dataset[column, row];
    125           if (double.IsInfinity(value) ||
    126             double.IsNaN(value))
    127             add = false;
    128         }
    129         if (double.IsNaN(problem.Dataset[problem.TargetVariable.Value, row]))
    130           add = false;
    131         if (add)
    132           allowedRows.Add(row);
    133         add = true;
    134       }
    135       return allowedRows;
    136     }
    137     private static double[,] PrepareInputMatrix(DataAnalysisProblemData problem, IList<int> allowedRows) {
    138       double[,] matrix = new double[allowedRows.Count, problem.InputVariables.CheckedItems.Count()+1];
    139       for (int row = 0; row < allowedRows.Count; row++) {
    140         int col = 0;
    141         foreach (string column in problem.InputVariables.CheckedItems.Select(c => c.Value.Value)) {
    142           matrix[row, col] = problem.Dataset[column, row];
    143           col++;
    144         }
    145         matrix[row, problem.InputVariables.CheckedItems.Count()] = problem.Dataset[problem.TargetVariable.Value, row];
    146       }
    147       return matrix;
    148     }
    149137  }
    150138}
Note: See TracChangeset for help on using the changeset viewer.