Free cookie consent management tool by TermsFeed Policy Generator

Changeset 2542


Ignore:
Timestamp:
12/03/09 18:05:27 (14 years ago)
Author:
gkronber
Message:

Added static methods to create LR models. #811.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.LinearRegression/3.2/LinearRegressionOperator.cs

    r2538 r2542  
    5757      int minTimeOffset = minTimeOffsetData == null ? 0 : minTimeOffsetData.Data;
    5858
    59       List<int> allowedColumns = CalculateAllowedColumns(dataset, targetVariableIndex, start, end);
     59      IFunctionTree tree = CreateModel(dataset, targetVariable, dataset.VariableNames, start, end, minTimeOffset, maxTimeOffset);
     60      scope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName("LinearRegressionModel"), new GeneticProgrammingModel(tree)));
     61      return null;
     62    }
     63
     64    public static IFunctionTree CreateModel(Dataset dataset, string targetVariable, IEnumerable<string> inputVariables, int start, int end) {
     65      return CreateModel(dataset, targetVariable, inputVariables, start, end, 0, 0);
     66    }
     67
     68    public static IFunctionTree CreateModel(Dataset dataset, string targetVariable, IEnumerable<string> inputVariables,
     69        int start, int end,
     70        int minTimeOffset, int maxTimeOffset) {
     71      int targetVariableIndex = dataset.GetVariableIndex(targetVariable);
     72      List<int> allowedColumns = CalculateAllowedColumns(dataset, targetVariableIndex, inputVariables.Select(x => dataset.GetVariableIndex(x)), start, end);
    6073      List<int> allowedRows = CalculateAllowedRows(dataset, targetVariableIndex, allowedColumns, start, end, minTimeOffset, maxTimeOffset);
    6174
     
    6376      double[] targetVector = PrepareTargetVector(dataset, targetVariableIndex, allowedRows);
    6477      double[] coefficients = CalculateCoefficients(inputMatrix, targetVector);
    65       IFunctionTree tree = CreateModel(coefficients, allowedColumns.Select(i => dataset.GetVariableName(i)).ToList(), minTimeOffset, maxTimeOffset);
    66 
    67       scope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName("LinearRegressionModel"), new GeneticProgrammingModel(tree)));
    68       return null;
     78      return CreateModel(coefficients, allowedColumns.Select(i => dataset.GetVariableName(i)).ToList(), minTimeOffset, maxTimeOffset);
    6979    }
    7080
    71     private IFunctionTree CreateModel(double[] coefficients, List<string> allowedVariables, int minTimeOffset, int maxTimeOffset) {
     81    private static IFunctionTree CreateModel(double[] coefficients, List<string> allowedVariables, int minTimeOffset, int maxTimeOffset) {
    7282      IFunctionTree root = new Addition().GetTreeNode();
    7383
     
    90100    }
    91101
    92     private double[] CalculateCoefficients(double[,] inputMatrix, double[] targetVector) {
     102    private static double[] CalculateCoefficients(double[,] inputMatrix, double[] targetVector) {
    93103      int retVal = 0;
    94104      alglib.linreg.linearmodel lm = new alglib.linreg.linearmodel();
     
    115125
    116126    //returns list of valid row indexes (rows without NaN values)
    117     private List<int> CalculateAllowedRows(Dataset dataset, int targetVariable, IList<int> allowedColumns, int start, int end, int minTimeOffset, int maxTimeOffset) {
     127    private static List<int> CalculateAllowedRows(Dataset dataset, int targetVariable, IList<int> allowedColumns, int start, int end, int minTimeOffset, int maxTimeOffset) {
    118128      List<int> allowedRows = new List<int>();
    119129      bool add;
     
    140150
    141151    //returns list of valid column indexes (columns which contain max. 10% NaN (or infinity) and contain at least two different values)
    142     private List<int> CalculateAllowedColumns(Dataset dataset, int targetVariable, int start, int end) {
     152    private static List<int> CalculateAllowedColumns(Dataset dataset, int targetVariable, IEnumerable<int> inputVariables, int start, int end) {
    143153      List<int> allowedColumns = new List<int>();
    144154      double n = end - start;
    145       for (int i = 0; i < dataset.Columns; i++) {
    146         double nanRatio = dataset.CountMissingValues(i, start, end) / n;
    147         if (i != targetVariable && nanRatio < 0.1 && dataset.GetRange(i, start, end) > 0.0) {
    148           allowedColumns.Add(i);
     155      foreach (int inputVariable in inputVariables) {// = 0; i < dataset.Columns; i++) {
     156        double nanRatio = dataset.CountMissingValues(inputVariable, start, end) / n;
     157        if (inputVariable != targetVariable && nanRatio < 0.1 && dataset.GetRange(inputVariable, start, end) > 0.0) {
     158          allowedColumns.Add(inputVariable);
    149159        }
    150160      }
     
    152162    }
    153163
    154     private double[,] PrepareInputMatrix(Dataset dataset, List<int> allowedColumns, List<int> allowedRows, int minTimeOffset, int maxTimeOffset) {
     164    private static double[,] PrepareInputMatrix(Dataset dataset, List<int> allowedColumns, List<int> allowedRows, int minTimeOffset, int maxTimeOffset) {
    155165      int rowCount = allowedRows.Count;
    156166      int timeOffsetRange = (maxTimeOffset - minTimeOffset + 1);
     
    167177    }
    168178
    169     private double[] PrepareTargetVector(Dataset dataset, int targetVariable, List<int> allowedRows) {
     179    private static double[] PrepareTargetVector(Dataset dataset, int targetVariable, List<int> allowedRows) {
    170180      int rowCount = allowedRows.Count;
    171181      double[] targetVector = new double[rowCount];
Note: See TracChangeset for help on using the changeset viewer.