Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
04/14/17 08:58:45 (8 years ago)
Author:
gkronber
Message:

#2699: merged changesets from trunk to branch

Location:
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4
Files:
1 deleted
7 edited

Legend:

Unmodified
Added
Removed
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4

    • Property svn:mergeinfo set to (toggle deleted branches)
      /stable/HeuristicLab.Algorithms.DataAnalysis/3.4mergedeligible
      /trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4mergedeligible
      /branches/1721-RandomForestPersistence/HeuristicLab.Algorithms.DataAnalysis/3.410321-10322
      /branches/Benchmarking/sources/HeuristicLab.Algorithms.DataAnalysis/3.46917-7005
      /branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.49070-13099
      /branches/CloningRefactoring/HeuristicLab.Algorithms.DataAnalysis/3.44656-4721
      /branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.45471-5808
      /branches/DataAnalysis SolutionEnsembles/HeuristicLab.Algorithms.DataAnalysis/3.45815-6180
      /branches/DataAnalysis/HeuristicLab.Algorithms.DataAnalysis/3.44458-4459,​4462,​4464
      /branches/DataPreprocessing/HeuristicLab.Algorithms.DataAnalysis/3.410085-11101
      /branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.46284-6795
      /branches/GP.Symbols (TimeLag, Diff, Integral)/HeuristicLab.Algorithms.DataAnalysis/3.45060
      /branches/HeuristicLab.DatasetRefactor/sources/HeuristicLab.Algorithms.DataAnalysis/3.411570-12508
      /branches/HeuristicLab.Problems.Orienteering/HeuristicLab.Algorithms.DataAnalysis/3.411130-12721
      /branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.413819-14091
      /branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.48116-8789
      /branches/LogResidualEvaluator/HeuristicLab.Algorithms.DataAnalysis/3.410202-10483
      /branches/NET40/sources/HeuristicLab.Algorithms.DataAnalysis/3.45138-5162
      /branches/ParallelEngine/HeuristicLab.Algorithms.DataAnalysis/3.45175-5192
      /branches/ProblemInstancesRegressionAndClassification/HeuristicLab.Algorithms.DataAnalysis/3.47773-7810
      /branches/QAPAlgorithms/HeuristicLab.Algorithms.DataAnalysis/3.46350-6627
      /branches/Restructure trunk solution/HeuristicLab.Algorithms.DataAnalysis/3.46828
      /branches/SpectralKernelForGaussianProcesses/HeuristicLab.Algorithms.DataAnalysis/3.410204-10479
      /branches/SuccessProgressAnalysis/HeuristicLab.Algorithms.DataAnalysis/3.45370-5682
      /branches/Trunk/HeuristicLab.Algorithms.DataAnalysis/3.46829-6865
      /branches/VNS/HeuristicLab.Algorithms.DataAnalysis/3.45594-5752
      /branches/histogram/HeuristicLab.Algorithms.DataAnalysis/3.45959-6341
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearDiscriminantAnalysis.cs

    r14185 r14869  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    3637  /// Linear discriminant analysis classification algorithm.
    3738  /// </summary>
    38   [Item("Linear Discriminant Analysis", "Linear discriminant analysis classification algorithm (wrapper for ALGLIB).")]
     39  [Item("Linear Discriminant Analysis (LDA)", "Linear discriminant analysis classification algorithm (wrapper for ALGLIB).")]
    3940  [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 100)]
    4041  [StorableClass]
     
    5960
    6061    #region Fisher LDA
    61     protected override void Run() {
     62    protected override void Run(CancellationToken cancellationToken) {
    6263      var solution = CreateLinearDiscriminantAnalysisSolution(Problem.ProblemData);
    6364      Results.Add(new Result(LinearDiscriminantAnalysisSolutionResultName, "The linear discriminant analysis.", solution));
     
    7071      IEnumerable<int> rows = problemData.TrainingIndices;
    7172      int nClasses = problemData.ClassNames.Count();
    72       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     73      var doubleVariableNames = allowedInputVariables.Where(dataset.VariableHasType<double>).ToArray();
     74      var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>).ToArray();
     75      double[,] inputMatrix = dataset.ToArray(doubleVariableNames.Concat(new string[] { targetVariable }), rows);
     76
     77      var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows);
     78      var factorMatrix = dataset.ToArray(factorVariables, rows);
     79
     80      inputMatrix = factorMatrix.HorzCat(inputMatrix);
     81
    7382      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    7483        throw new NotSupportedException("Linear discriminant analysis does not support NaN or infinity values in the input dataset.");
     
    8291      int info;
    8392      double[] w;
    84       alglib.fisherlda(inputMatrix, inputMatrix.GetLength(0), allowedInputVariables.Count(), nClasses, out info, out w);
     93      alglib.fisherlda(inputMatrix, inputMatrix.GetLength(0), inputMatrix.GetLength(1) - 1, nClasses, out info, out w);
    8594      if (info < 1) throw new ArgumentException("Error in calculation of linear discriminant analysis solution");
    8695
    87       ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
    88       ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();
    89       tree.Root.AddSubtree(startNode);
    90       ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();
    91       startNode.AddSubtree(addition);
     96      var nFactorCoeff = factorMatrix.GetLength(1);
     97      var tree = LinearModelToTreeConverter.CreateTree(factorVariables, w.Take(nFactorCoeff).ToArray(),
     98        doubleVariableNames, w.Skip(nFactorCoeff).Take(doubleVariableNames.Length).ToArray());
    9299
    93       int col = 0;
    94       foreach (string column in allowedInputVariables) {
    95         VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();
    96         vNode.VariableName = column;
    97         vNode.Weight = w[col];
    98         addition.AddSubtree(vNode);
    99         col++;
    100       }
    101 
    102       var model = LinearDiscriminantAnalysis.CreateDiscriminantFunctionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter(), problemData, rows);
     100      var model = CreateDiscriminantFunctionModel(tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter(), problemData, rows);
    103101      SymbolicDiscriminantFunctionClassificationSolution solution = new SymbolicDiscriminantFunctionClassificationSolution(model, (IClassificationProblemData)problemData.Clone());
    104102
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

    r14185 r14869  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
    2728using HeuristicLab.Data;
    28 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    2929using HeuristicLab.Optimization;
    3030using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     
    6060
    6161    #region linear regression
    62     protected override void Run() {
     62    protected override void Run(CancellationToken cancellationToken) {
    6363      double rmsError, cvRmsError;
    6464      var solution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError);
     
    7373      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    7474      IEnumerable<int> rows = problemData.TrainingIndices;
    75       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     75      var doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>);
     76      var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>);
     77      var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows);
     78      double[,] binaryMatrix = dataset.ToArray(factorVariables, rows);
     79      double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows);
     80      var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix);
     81
    7682      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    7783        throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
     
    9197      alglib.lrunpack(lm, out coefficients, out nFeatures);
    9298
    93       ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
    94       ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();
    95       tree.Root.AddSubtree(startNode);
    96       ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();
    97       startNode.AddSubtree(addition);
    98 
    99       int col = 0;
    100       foreach (string column in allowedInputVariables) {
    101         VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();
    102         vNode.VariableName = column;
    103         vNode.Weight = coefficients[col];
    104         addition.AddSubtree(vNode);
    105         col++;
    106       }
    107 
    108       ConstantTreeNode cNode = (ConstantTreeNode)new Constant().CreateTreeNode();
    109       cNode.Value = coefficients[coefficients.Length - 1];
    110       addition.AddSubtree(cNode);
    111 
    112       SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), (IRegressionProblemData)problemData.Clone());
     99      int nFactorCoeff = binaryMatrix.GetLength(1);
     100      int nVarCoeff = doubleVariables.Count();
     101      var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(),
     102        doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(),
     103        @const: coefficients[nFeatures]);
     104     
     105      SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone());
    113106      solution.Model.Name = "Linear Regression Model";
    114107      solution.Name = "Linear Regression Solution";
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassification.cs

    r14185 r14869  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    5758
    5859    #region logit classification
    59     protected override void Run() {
     60    protected override void Run(CancellationToken cancellationToken) {
    6061      double rmsError, relClassError;
    6162      var solution = CreateLogitClassificationSolution(Problem.ProblemData, out rmsError, out relClassError);
     
    6869      var dataset = problemData.Dataset;
    6970      string targetVariable = problemData.TargetVariable;
    70       IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
     71      var doubleVariableNames = problemData.AllowedInputVariables.Where(dataset.VariableHasType<double>);
     72      var factorVariableNames = problemData.AllowedInputVariables.Where(dataset.VariableHasType<string>);
    7173      IEnumerable<int> rows = problemData.TrainingIndices;
    72       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     74      double[,] inputMatrix = dataset.ToArray(doubleVariableNames.Concat(new string[] { targetVariable }), rows);
     75
     76      var factorVariableValues = dataset.GetFactorVariableValues(factorVariableNames, rows);
     77      var factorMatrix = dataset.ToArray(factorVariableValues, rows);
     78      inputMatrix = factorMatrix.HorzCat(inputMatrix);
     79
    7380      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    7481        throw new NotSupportedException("Multinomial logit classification does not support NaN or infinity values in the input dataset.");
     
    95102      relClassError = alglib.mnlrelclserror(lm, inputMatrix, nRows);
    96103
    97       MultinomialLogitClassificationSolution solution = new MultinomialLogitClassificationSolution(new MultinomialLogitModel(lm, targetVariable, allowedInputVariables, classValues), (IClassificationProblemData)problemData.Clone());
     104      MultinomialLogitClassificationSolution solution = new MultinomialLogitClassificationSolution(new MultinomialLogitModel(lm, targetVariable, doubleVariableNames, factorVariableValues, classValues), (IClassificationProblemData)problemData.Clone());
    98105      return solution;
    99106    }
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassificationSolution.cs

    r14185 r14869  
    4343      : base(original, cloner) {
    4444    }
    45     public MultinomialLogitClassificationSolution( MultinomialLogitModel logitModel,IClassificationProblemData problemData)
     45    public MultinomialLogitClassificationSolution(MultinomialLogitModel logitModel, IClassificationProblemData problemData)
    4646      : base(logitModel, problemData) {
    4747    }
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitModel.cs

    r14185 r14869  
    5656    [Storable]
    5757    private double[] classValues;
     58    [Storable]
     59    private List<KeyValuePair<string, IEnumerable<string>>> factorVariables;
     60
    5861    [StorableConstructor]
    5962    private MultinomialLogitModel(bool deserializing)
     
    6871      allowedInputVariables = (string[])original.allowedInputVariables.Clone();
    6972      classValues = (double[])original.classValues.Clone();
     73      this.factorVariables = original.factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList();
    7074    }
    71     public MultinomialLogitModel(alglib.logitmodel logitModel, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues)
     75    public MultinomialLogitModel(alglib.logitmodel logitModel, string targetVariable, IEnumerable<string> doubleInputVariables, IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables, double[] classValues)
    7276      : base(targetVariable) {
    7377      this.name = ItemName;
    7478      this.description = ItemDescription;
    7579      this.logitModel = logitModel;
    76       this.allowedInputVariables = allowedInputVariables.ToArray();
     80      this.allowedInputVariables = doubleInputVariables.ToArray();
     81      this.factorVariables = factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList();
    7782      this.classValues = (double[])classValues.Clone();
     83    }
     84
     85    [StorableHook(HookType.AfterDeserialization)]
     86    private void AfterDeserialization() {
     87      // BackwardsCompatibility3.3
     88      #region Backwards compatible code, remove with 3.4
     89      factorVariables = new List<KeyValuePair<string, IEnumerable<string>>>();
     90      #endregion
    7891    }
    7992
     
    8396
    8497    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    85       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     98
     99      double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
     100      double[,] factorData = dataset.ToArray(factorVariables, rows);
     101
     102      inputData = factorData.HorzCat(inputData);
    86103
    87104      int n = inputData.GetLength(0);
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/Scaling.cs

    r14185 r14869  
    2929
    3030namespace HeuristicLab.Algorithms.DataAnalysis {
     31  [Obsolete("Use transformation classes in Problems.DataAnalysis instead")]
    3132  [StorableClass]
    3233  [Item(Name = "Scaling", Description = "Contains information about scaling of variables for data-analysis algorithms.")]
Note: See TracChangeset for help on using the changeset viewer.