Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2913_MatlabScriptProblemInstanceProvider/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/Matlab/RegressionMatlabInstanceProvider.cs

Last change on this file was 15958, checked in by rhanghof, 7 years ago

#2913:

  • Enhancements on the RegressionMatlabImportDialog
  • Added a new text formater SymbolicDataAnalysisExpressionMATLABFunctionFormatter
File size: 3.8 KB
RevLine 
[15912]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.IO;
25using System.Linq;
26using System.Text;
27using System.Threading.Tasks;
28using HeuristicLab.Common;
29using HeuristicLab.Problems.DataAnalysis;
30using HeuristicLab.Problems.Instances.DataAnalysis.Regression.Matlab.Api.Types;
[15958]31using HeuristicLab.Random;
[15912]32
33namespace HeuristicLab.Problems.Instances.DataAnalysis.Regression.Matlab {
34  /// <summary>
35  /// This is a RegressionInstanceProvider which imports data for a regression problem from a matlab script.
36  /// </summary>
37  public class RegressionMatlabInstanceProvider : RegressionInstanceProvider {
38    public override string Name {
39      get {
40        return "Matlab Script";
41      }
42    }
43
44    public override string Description {
45      get { return ""; }
46    }
47
48    public override Uri WebLink {
49      get { return new Uri("http://dev.heuristiclab.com"); }
50    }
51
52    public override string ReferencePublication {
53      get { return ""; }
54    }
55
56    public override IEnumerable<IDataDescriptor> GetDataDescriptors() {
57      return new List<IDataDescriptor>();
58    }
59
60    public override IRegressionProblemData LoadData(IDataDescriptor descriptor) {
61      throw new NotImplementedException();
62    }
63
64    public override bool CanImportData {
65      get { return true; }
66    }
67
68
[15958]69    private Dataset GetValues(RegressionMatlabImportType type) {
70      if (type.Shuffle) {
71        type.Values = type.Values.Shuffle(new MersenneTwister());
72      }
73      return type.Values;
74    }
75
[15926]76    public IRegressionProblemData ImportData(string path, RegressionMatlabImportType type, IEnumerable<MLVariableInfo> variableNames) {
[15958]77      var dataset = GetValues(type);
[15912]78      var targetVar = type.TargetVariable;
79
80      // turn off input variables that are constant in the training partition
81      var rows = dataset.Rows;
82      var allowedInputVars = new List<string>();
83      int trainingPartEnd = (rows * type.TrainingPercentage) / 100;
84      trainingPartEnd = trainingPartEnd > 0 ? trainingPartEnd : 1;
85
86      var trainingIndizes = Enumerable.Range(0, trainingPartEnd);
87      if (trainingIndizes.Count() >= 2) {
88        foreach (var variableName in dataset.DoubleVariables) {
89          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 && variableName != type.TargetVariable) {
90            allowedInputVars.Add(variableName);
91          }
92        }
93      } else {
94        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => !x.Equals(type.TargetVariable)));
95      }
96
97      IRegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, targetVar);
98
99      regressionData.TrainingPartition.Start = trainingIndizes.First();
100      regressionData.TrainingPartition.End = trainingPartEnd;
101      regressionData.TestPartition.Start = trainingPartEnd;
102      regressionData.TestPartition.End = rows;
103
104      regressionData.Name = Path.GetFileName(path);
105
106      return regressionData;
107    }
108  }
109}
Note: See TracBrowser for help on using the repository browser.