[6567]  1  #region License Information


 2  /* HeuristicLab


[14185]  3  * Copyright (C) 20022016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


[6567]  4  *


 5  * This file is part of HeuristicLab.


 6  *


 7  * HeuristicLab is free software: you can redistribute it and/or modify


 8  * it under the terms of the GNU General Public License as published by


 9  * the Free Software Foundation, either version 3 of the License, or


 10  * (at your option) any later version.


 11  *


 12  * HeuristicLab is distributed in the hope that it will be useful,


 13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


 14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


 15  * GNU General Public License for more details.


 16  *


 17  * You should have received a copy of the GNU General Public License


 18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


 19  */


 20  #endregion


 21 


 22  using System;


 23  using System.Collections.Generic;


 24  using System.Linq;


 25  using HeuristicLab.Common;


 26  using HeuristicLab.Core;


 27  using HeuristicLab.Data;


 28  using HeuristicLab.Optimization;


 29  using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;


 30  using HeuristicLab.Problems.DataAnalysis;


 31 


 32  namespace HeuristicLab.Algorithms.DataAnalysis {


 33  /// <summary>


 34  /// Multinomial logit regression data analysis algorithm.


 35  /// </summary>


[13238]  36  [Item("Multinomial Logit Classification (MNL)", "Multinomial logit classification data analysis algorithm (wrapper for ALGLIB).")]


[12622]  37  [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 180)]


[6567]  38  [StorableClass]


 39  public sealed class MultiNomialLogitClassification : FixedDataAnalysisAlgorithm<IClassificationProblem> {


 40  private const string LogitClassificationModelResultName = "Logit classification solution";


 41 


 42  [StorableConstructor]


 43  private MultiNomialLogitClassification(bool deserializing) : base(deserializing) { }


 44  private MultiNomialLogitClassification(MultiNomialLogitClassification original, Cloner cloner)


 45  : base(original, cloner) {


 46  }


 47  public MultiNomialLogitClassification()


 48  : base() {


 49  Problem = new ClassificationProblem();


 50  }


 51  [StorableHook(HookType.AfterDeserialization)]


 52  private void AfterDeserialization() { }


 53 


 54  public override IDeepCloneable Clone(Cloner cloner) {


 55  return new MultiNomialLogitClassification(this, cloner);


 56  }


 57 


[6633]  58  #region logit classification


[6567]  59  protected override void Run() {


 60  double rmsError, relClassError;


 61  var solution = CreateLogitClassificationSolution(Problem.ProblemData, out rmsError, out relClassError);


[6633]  62  Results.Add(new Result(LogitClassificationModelResultName, "The logit classification solution.", solution));


 63  Results.Add(new Result("Root mean squared error", "The root of the mean of squared errors of the logit regression solution on the training set.", new DoubleValue(rmsError)));


[6567]  64  Results.Add(new Result("Relative classification error", "Relative classification error on the training set (percentage of misclassified cases).", new PercentValue(relClassError)));


 65  }


 66 


 67  public static IClassificationSolution CreateLogitClassificationSolution(IClassificationProblemData problemData, out double rmsError, out double relClassError) {


[12509]  68  var dataset = problemData.Dataset;


[6567]  69  string targetVariable = problemData.TargetVariable;


 70  IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;


[8139]  71  IEnumerable<int> rows = problemData.TrainingIndices;


[6567]  72  double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);


 73  if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x)  double.IsInfinity(x)))


 74  throw new NotSupportedException("Multinomial logit classification does not support NaN or infinity values in the input dataset.");


 75 


[12817]  76  alglib.logitmodel lm = new alglib.logitmodel();


 77  alglib.mnlreport rep = new alglib.mnlreport();


[6567]  78  int nRows = inputMatrix.GetLength(0);


 79  int nFeatures = inputMatrix.GetLength(1)  1;


[6740]  80  double[] classValues = dataset.GetDoubleValues(targetVariable).Distinct().OrderBy(x => x).ToArray();


[6567]  81  int nClasses = classValues.Count();


 82  // map original class values to values [0..nClasses1]


[8139]  83  Dictionary<double, double> classIndices = new Dictionary<double, double>();


[6567]  84  for (int i = 0; i < nClasses; i++) {


[8139]  85  classIndices[classValues[i]] = i;


[6567]  86  }


 87  for (int row = 0; row < nRows; row++) {


[8139]  88  inputMatrix[row, nFeatures] = classIndices[inputMatrix[row, nFeatures]];


[6567]  89  }


 90  int info;


 91  alglib.mnltrainh(inputMatrix, nRows, nFeatures, nClasses, out info, out lm, out rep);


 92  if (info != 1) throw new ArgumentException("Error in calculation of logit classification solution");


 93 


 94  rmsError = alglib.mnlrmserror(lm, inputMatrix, nRows);


 95  relClassError = alglib.mnlrelclserror(lm, inputMatrix, nRows);


 96 


[13941]  97  MultinomialLogitClassificationSolution solution = new MultinomialLogitClassificationSolution(new MultinomialLogitModel(lm, targetVariable, allowedInputVariables, classValues), (IClassificationProblemData)problemData.Clone());


[6567]  98  return solution;


 99  }


 100  #endregion


 101  }


 102  }

