Ignore:
Timestamp:
04/04/17 17:52:44 (6 months ago)
Author:
gkronber
Message:

#2650: merged the factors branch into trunk

Location:
trunk/sources
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources

  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views

  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views/3.4/SymbolicRegressionSolutionErrorCharacteristicsCurveView.cs

    r14400 r14826  
    2121
    2222using System;
     23using System.Collections;
    2324using System.Collections.Generic;
     25using System.Diagnostics.Contracts;
    2426using System.Linq;
    2527using HeuristicLab.Algorithms.DataAnalysis;
     
    4648      if (!problemData.TrainingIndices.Any()) return null; // don't create an LR model if the problem does not have a training set (e.g. loaded into an existing model)
    4749
    48       //clear checked inputVariables
    49       foreach (var inputVariable in problemData.InputVariables.CheckedItems) {
    50         problemData.InputVariables.SetItemCheckedState(inputVariable.Value, false);
    51       }
     50      var usedVariables = Content.Model.SymbolicExpressionTree.IterateNodesPostfix()
     51        .OfType<IVariableTreeNode>()
     52        .Select(node => node.VariableName).ToArray();
    5253
    53       //check inputVariables used in the symbolic regression model
    54       var usedVariables =
    55         Content.Model.SymbolicExpressionTree.IterateNodesPostfix().OfType<VariableTreeNode>().Select(
    56           node => node.VariableName).Distinct();
    57       foreach (var variable in usedVariables) {
    58         problemData.InputVariables.SetItemCheckedState(
    59           problemData.InputVariables.First(x => x.Value == variable), true);
    60       }
     54      var usedDoubleVariables = usedVariables
     55        .Where(name => problemData.Dataset.VariableHasType<double>(name))
     56      .Distinct();
    6157
    62       var solution = LinearRegression.CreateLinearRegressionSolution(problemData, out rmse, out cvRmsError);
     58      var usedFactorVariables = usedVariables
     59        .Where(name => problemData.Dataset.VariableHasType<string>(name))
     60        .Distinct();
     61
     62      // gkronber: for binary factors we actually produce a binary variable in the new dataset
     63      // but only if the variable is not used as a full factor anyway (LR creates binary columns anyway)
     64      var usedBinaryFactors =
     65        Content.Model.SymbolicExpressionTree.IterateNodesPostfix().OfType<BinaryFactorVariableTreeNode>()
     66        .Where(node => !usedFactorVariables.Contains(node.VariableName))
     67        .Select(node => Tuple.Create(node.VariableValue, node.VariableValue));
     68
     69      // create a new problem and dataset
     70      var variableNames =
     71        usedDoubleVariables
     72        .Concat(usedFactorVariables)
     73        .Concat(usedBinaryFactors.Select(t => t.Item1 + "=" + t.Item2))
     74        .Concat(new string[] { problemData.TargetVariable })
     75        .ToArray();
     76      var variableValues =
     77        usedDoubleVariables.Select(name => (IList)problemData.Dataset.GetDoubleValues(name).ToList())
     78        .Concat(usedFactorVariables.Select(name => problemData.Dataset.GetStringValues(name).ToList()))
     79        .Concat(
     80          // create binary variable
     81          usedBinaryFactors.Select(t => problemData.Dataset.GetReadOnlyStringValues(t.Item1).Select(val => val == t.Item2 ? 1.0 : 0.0).ToList())
     82        )
     83        .Concat(new[] { problemData.Dataset.GetDoubleValues(problemData.TargetVariable).ToList() });
     84
     85      var newDs = new Dataset(variableNames, variableValues);
     86      var newProblemData = new RegressionProblemData(newDs, variableNames.Take(variableNames.Length - 1), variableNames.Last());
     87      newProblemData.TrainingPartition.Start = problemData.TrainingPartition.Start;
     88      newProblemData.TrainingPartition.End = problemData.TrainingPartition.End;
     89      newProblemData.TestPartition.Start = problemData.TestPartition.Start;
     90      newProblemData.TestPartition.End = problemData.TestPartition.End;
     91
     92      var solution = LinearRegression.CreateLinearRegressionSolution(newProblemData, out rmse, out cvRmsError);
    6393      solution.Name = "Baseline (linear subset)";
    6494      return solution;
     
    6898    protected override IEnumerable<IRegressionSolution> CreateBaselineSolutions() {
    6999      foreach (var sol in base.CreateBaselineSolutions()) yield return sol;
     100
     101      // does not support lagged variables
     102      if (Content.Model.SymbolicExpressionTree.IterateNodesPrefix().OfType<LaggedVariableTreeNode>().Any()) yield break;
     103
    70104      yield return CreateLinearRegressionSolution();
    71105    }
Note: See TracChangeset for help on using the changeset viewer.