Ignore:
Timestamp:
08/05/16 17:34:16 (3 years ago)
Author:
gkronber
Message:

#2650:

  • added weight for FactorVariable (necessary for LR)
  • introduced VariableBase and VariableTreeNodeBase and IVariableSymbol
  • support for factors in LR
  • extended variable impacts in solution view
  • fixed ERC view for regression
  • support for FactorVariable in simplifier
  • improved support for FactorVariable in constants optimizer
  • multiple related changes and small fixes
File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views/3.4/SymbolicRegressionSolutionErrorCharacteristicsCurveView.cs

    r14185 r14238  
    2222using System;
    2323using System.Collections.Generic;
     24using System.Diagnostics.Contracts;
    2425using System.Linq;
    2526using HeuristicLab.Algorithms.DataAnalysis;
     
    4647      if (!problemData.TrainingIndices.Any()) return null; // don't create an LR model if the problem does not have a training set (e.g. loaded into an existing model)
    4748
    48       //clear checked inputVariables
    49       foreach (var inputVariable in problemData.InputVariables.CheckedItems) {
    50         problemData.InputVariables.SetItemCheckedState(inputVariable.Value, false);
    51       }
     49      var usedDoubleVariables =
     50        Content.Model.SymbolicExpressionTree.IterateNodesPostfix()
     51        .OfType<VariableTreeNode>()
     52        .Select(node => node.VariableName)
     53      .Concat(
     54        Content.Model.SymbolicExpressionTree.IterateNodesPostfix()
     55        .OfType<VariableConditionTreeNode>()
     56        .Select(node => node.VariableName)
     57        )
     58      .Distinct();
    5259
    53       //check inputVariables used in the symbolic regression model
    54       var usedVariables =
    55         Content.Model.SymbolicExpressionTree.IterateNodesPostfix().OfType<VariableTreeNode>().Select(
    56           node => node.VariableName).Distinct();
    57       foreach (var variable in usedVariables) {
    58         problemData.InputVariables.SetItemCheckedState(
    59           problemData.InputVariables.First(x => x.Value == variable), true);
    60       }
     60      var usedFactorVariables =
     61        Content.Model.SymbolicExpressionTree.IterateNodesPostfix()
     62        .OfType<FactorVariableTreeNode>()
     63        .Select(node => Tuple.Create(node.VariableName, node.VariableValue))
     64        .Distinct();
    6165
    62       var solution = LinearRegression.CreateLinearRegressionSolution(problemData, out rmse, out cvRmsError);
     66      // create a new problem and dataset
     67      var variableNames =
     68        usedDoubleVariables
     69        .Concat(usedFactorVariables.Select(t => t.Item1 + "=" + t.Item2))
     70        .Concat(new string[] { problemData.TargetVariable })
     71        .ToArray();
     72      var variableValues =
     73        usedDoubleVariables.Select(name => problemData.Dataset.GetDoubleValues(name).ToList())
     74        .Concat(
     75        // create binary variable
     76          usedFactorVariables.Select(t => problemData.Dataset.GetReadOnlyStringValues(t.Item1).Select(val => val == t.Item2 ? 1.0 : 0.0).ToList())
     77        )
     78        .Concat(new[] { problemData.Dataset.GetDoubleValues(problemData.TargetVariable).ToList() });
     79
     80      var newDs = new Dataset(variableNames, variableValues);
     81      var newProblemData = new RegressionProblemData(newDs, variableNames.Take(variableNames.Length - 1), variableNames.Last());
     82      newProblemData.TrainingPartition.Start = problemData.TrainingPartition.Start;
     83      newProblemData.TrainingPartition.End = problemData.TrainingPartition.End;
     84      newProblemData.TestPartition.Start = problemData.TestPartition.Start;
     85      newProblemData.TestPartition.End = problemData.TestPartition.End;
     86
     87      var solution = LinearRegression.CreateLinearRegressionSolution(newProblemData, out rmse, out cvRmsError);
    6388      solution.Name = "Baseline (linear subset)";
    6489      return solution;
     
    6893    protected override IEnumerable<IRegressionSolution> CreateBaselineSolutions() {
    6994      foreach (var sol in base.CreateBaselineSolutions()) yield return sol;
     95
     96      // does not support lagged variables
     97      if (Content.Model.SymbolicExpressionTree.IterateNodesPrefix().OfType<LaggedVariableTreeNode>().Any()) yield break;
     98
    7099      yield return CreateLinearRegressionSolution();
    71100    }
Note: See TracChangeset for help on using the changeset viewer.