Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2695_dataset-ids/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification.Views/3.4/SolutionComparisonView.cs @ 16824

Last change on this file since 16824 was 14826, checked in by gkronber, 8 years ago

#2650: merged the factors branch into trunk

File size: 5.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections;
24using System.Collections.Generic;
25using System.Linq;
26using HeuristicLab.Algorithms.DataAnalysis;
27using HeuristicLab.MainForm;
28using HeuristicLab.Problems.DataAnalysis.Views.Classification;
29
30namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Classification.Views {
31  [View("Solution Comparison")]
32  [Content(typeof(ISymbolicClassificationSolution))]
33  public partial class SolutionComparisonView : ClassificationSolutionComparisonView {
34
35    public SolutionComparisonView() {
36      InitializeComponent();
37    }
38
39    public new ISymbolicClassificationSolution Content {
40      get { return (ISymbolicClassificationSolution)base.Content; }
41      set { base.Content = value; }
42    }
43
44    protected override IEnumerable<IClassificationSolution> GenerateClassificationSolutions() {
45      var solutionsBase = base.GenerateClassificationSolutions();
46      var solutions = new List<IClassificationSolution>();
47
48      var symbolicSolution = Content;
49
50      // does not support lagged variables
51      if (symbolicSolution.Model.SymbolicExpressionTree.IterateNodesPrefix().OfType<LaggedVariableTreeNode>().Any()) return solutionsBase;
52
53      var problemData = (IClassificationProblemData)symbolicSolution.ProblemData.Clone();
54      if (!problemData.TrainingIndices.Any()) return null; // don't create an comparison models if the problem does not have a training set (e.g. loaded into an existing model)
55
56      var usedVariables = Content.Model.SymbolicExpressionTree.IterateNodesPostfix()
57      .OfType<IVariableTreeNode>()
58      .Select(node => node.VariableName).ToArray();
59
60      var usedDoubleVariables = usedVariables
61        .Where(name => problemData.Dataset.VariableHasType<double>(name))
62      .Distinct();
63
64      var usedFactorVariables = usedVariables
65        .Where(name => problemData.Dataset.VariableHasType<string>(name))
66        .Distinct();
67
68      // gkronber: for binary factors we actually produce a binary variable in the new dataset
69      // but only if the variable is not used as a full factor anyway (LR creates binary columns anyway)
70      var usedBinaryFactors =
71        Content.Model.SymbolicExpressionTree.IterateNodesPostfix().OfType<BinaryFactorVariableTreeNode>()
72        .Where(node => !usedFactorVariables.Contains(node.VariableName))
73        .Select(node => Tuple.Create(node.VariableValue, node.VariableValue));
74
75      // create a new problem and dataset
76      var variableNames =
77        usedDoubleVariables
78        .Concat(usedFactorVariables)
79        .Concat(usedBinaryFactors.Select(t => t.Item1 + "=" + t.Item2))
80        .Concat(new string[] { problemData.TargetVariable })
81        .ToArray();
82      var variableValues =
83        usedDoubleVariables.Select(name => (IList)problemData.Dataset.GetDoubleValues(name).ToList())
84        .Concat(usedFactorVariables.Select(name => problemData.Dataset.GetStringValues(name).ToList()))
85        .Concat(
86          // create binary variable
87          usedBinaryFactors.Select(t => problemData.Dataset.GetReadOnlyStringValues(t.Item1).Select(val => val == t.Item2 ? 1.0 : 0.0).ToList())
88        )
89        .Concat(new[] { problemData.Dataset.GetDoubleValues(problemData.TargetVariable).ToList() });
90
91      var newDs = new Dataset(variableNames, variableValues);
92      var newProblemData = new ClassificationProblemData(newDs, variableNames.Take(variableNames.Length - 1), variableNames.Last());
93      newProblemData.TrainingPartition.Start = problemData.TrainingPartition.Start;
94      newProblemData.TrainingPartition.End = problemData.TrainingPartition.End;
95      newProblemData.TestPartition.Start = problemData.TestPartition.Start;
96      newProblemData.TestPartition.End = problemData.TestPartition.End;
97
98      try {
99        var oneR = OneR.CreateOneRSolution(newProblemData);
100        oneR.Name = "OneR Classification Solution (subset)";
101        solutions.Add(oneR);
102      } catch (NotSupportedException) { } catch (ArgumentException) { }
103      try {
104        var lda = LinearDiscriminantAnalysis.CreateLinearDiscriminantAnalysisSolution(newProblemData);
105        lda.Name = "Linear Discriminant Analysis Solution (subset)";
106        solutions.Add(lda);
107      } catch (NotSupportedException) { } catch (ArgumentException) { }
108
109      return solutionsBase.Concat(solutions);
110    }
111  }
112}
Note: See TracBrowser for help on using the repository browser.