source: branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views/3.4/SymbolicRegressionSolutionErrorCharacteristicsCurveView.cs @ 14238

Last change on this file since 14238 was 14238, checked in by gkronber, 3 years ago

#2650:

  • added weight for FactorVariable (necessary for LR)
  • introduced VariableBase and VariableTreeNodeBase and IVariableSymbol
  • support for factors in LR
  • extended variable impacts in solution view
  • fixed ERC view for regression
  • support for FactorVariable in simplifier
  • improved support for FactorVariable in constants optimizer
  • multiple related changes and small fixes
File size: 4.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics.Contracts;
25using System.Linq;
26using HeuristicLab.Algorithms.DataAnalysis;
27using HeuristicLab.MainForm;
28using HeuristicLab.Problems.DataAnalysis.Views;
29
30namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views {
31  [View("Error Characteristics Curve")]
32  [Content(typeof(ISymbolicRegressionSolution))]
33  public partial class SymbolicRegressionSolutionErrorCharacteristicsCurveView : RegressionSolutionErrorCharacteristicsCurveView {
34    public SymbolicRegressionSolutionErrorCharacteristicsCurveView() {
35      InitializeComponent();
36    }
37
38    public new ISymbolicRegressionSolution Content {
39      get { return (ISymbolicRegressionSolution)base.Content; }
40      set { base.Content = value; }
41    }
42
43    private IRegressionSolution CreateLinearRegressionSolution() {
44      if (Content == null) throw new InvalidOperationException();
45      double rmse, cvRmsError;
46      var problemData = (IRegressionProblemData)ProblemData.Clone();
47      if (!problemData.TrainingIndices.Any()) return null; // don't create an LR model if the problem does not have a training set (e.g. loaded into an existing model)
48
49      var usedDoubleVariables =
50        Content.Model.SymbolicExpressionTree.IterateNodesPostfix()
51        .OfType<VariableTreeNode>()
52        .Select(node => node.VariableName)
53      .Concat(
54        Content.Model.SymbolicExpressionTree.IterateNodesPostfix()
55        .OfType<VariableConditionTreeNode>()
56        .Select(node => node.VariableName)
57        )
58      .Distinct();
59
60      var usedFactorVariables =
61        Content.Model.SymbolicExpressionTree.IterateNodesPostfix()
62        .OfType<FactorVariableTreeNode>()
63        .Select(node => Tuple.Create(node.VariableName, node.VariableValue))
64        .Distinct();
65
66      // create a new problem and dataset
67      var variableNames =
68        usedDoubleVariables
69        .Concat(usedFactorVariables.Select(t => t.Item1 + "=" + t.Item2))
70        .Concat(new string[] { problemData.TargetVariable })
71        .ToArray();
72      var variableValues =
73        usedDoubleVariables.Select(name => problemData.Dataset.GetDoubleValues(name).ToList())
74        .Concat(
75        // create binary variable
76          usedFactorVariables.Select(t => problemData.Dataset.GetReadOnlyStringValues(t.Item1).Select(val => val == t.Item2 ? 1.0 : 0.0).ToList())
77        )
78        .Concat(new[] { problemData.Dataset.GetDoubleValues(problemData.TargetVariable).ToList() });
79
80      var newDs = new Dataset(variableNames, variableValues);
81      var newProblemData = new RegressionProblemData(newDs, variableNames.Take(variableNames.Length - 1), variableNames.Last());
82      newProblemData.TrainingPartition.Start = problemData.TrainingPartition.Start;
83      newProblemData.TrainingPartition.End = problemData.TrainingPartition.End;
84      newProblemData.TestPartition.Start = problemData.TestPartition.Start;
85      newProblemData.TestPartition.End = problemData.TestPartition.End;
86
87      var solution = LinearRegression.CreateLinearRegressionSolution(newProblemData, out rmse, out cvRmsError);
88      solution.Name = "Baseline (linear subset)";
89      return solution;
90    }
91
92
93    protected override IEnumerable<IRegressionSolution> CreateBaselineSolutions() {
94      foreach (var sol in base.CreateBaselineSolutions()) yield return sol;
95
96      // does not support lagged variables
97      if (Content.Model.SymbolicExpressionTree.IterateNodesPrefix().OfType<LaggedVariableTreeNode>().Any()) yield break;
98
99      yield return CreateLinearRegressionSolution();
100    }
101  }
102}
Note: See TracBrowser for help on using the repository browser.