Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/08/19 14:59:31 (6 years ago)
Author:
pfleck
Message:

#2972 merged trunk into branch

Location:
branches/2972_PDPRowSelect
Files:
9 edited

Legend:

Unmodified
Added
Removed
  • branches/2972_PDPRowSelect

  • branches/2972_PDPRowSelect/HeuristicLab.Algorithms.DataAnalysis

  • branches/2972_PDPRowSelect/HeuristicLab.Algorithms.DataAnalysis/3.4

  • branches/2972_PDPRowSelect/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

    r16389 r16518  
    4141  [StorableClass]
    4242  public sealed class LinearRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> {
    43     private const string LinearRegressionModelResultName = "Linear regression solution";
     43    private const string SolutionResultName = "Linear regression solution";
     44    private const string ConfidenceSolutionResultName = "Solution with prediction intervals";
    4445
    4546    [StorableConstructor]
     
    6263    protected override void Run(CancellationToken cancellationToken) {
    6364      double rmsError, cvRmsError;
     65      // produce both solutions, to allow symbolic manipulation of LR solutions as well
     66      // as the calculation of prediction intervals.
     67      // There is no clean way to implement the new model class for LR as a symbolic model.
    6468      var solution = CreateSolution(Problem.ProblemData, out rmsError, out cvRmsError);
    65       Results.Add(new Result(LinearRegressionModelResultName, "The linear regression solution.", solution));
     69#pragma warning disable 168, 3021
     70      var symbolicSolution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError);
     71#pragma warning restore 168, 3021
     72      Results.Add(new Result(SolutionResultName, "The linear regression solution.", symbolicSolution));
     73      Results.Add(new Result(ConfidenceSolutionResultName, "Linear regression solution with parameter covariance matrix " +
     74                                                           "and calculation of prediction intervals", solution));
    6675      Results.Add(new Result("Root mean square error", "The root of the mean of squared errors of the linear regression solution on the training set.", new DoubleValue(rmsError)));
    6776      Results.Add(new Result("Estimated root mean square error (cross-validation)", "The estimated root of the mean of squared errors of the linear regression solution via cross validation.", new DoubleValue(cvRmsError)));
     
    8897      double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant
    8998      alglib.lrunpack(lm, out coefficients, out nFeatures);
    90      
    91       int nFactorCoeff = factorVariables.Sum(kvp=>kvp.Value.Count());
     99
     100      int nFactorCoeff = factorVariables.Sum(kvp => kvp.Value.Count());
    92101      int nVarCoeff = doubleVariables.Count();
    93102      var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(),
     
    132141    }
    133142
    134     private static void PrepareData(IRegressionProblemData problemData, 
    135       out double[,] inputMatrix, 
    136       out IEnumerable<string> doubleVariables, 
     143    private static void PrepareData(IRegressionProblemData problemData,
     144      out double[,] inputMatrix,
     145      out IEnumerable<string> doubleVariables,
    137146      out IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables) {
    138147      var dataset = problemData.Dataset;
  • branches/2972_PDPRowSelect/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegressionModel.cs

    r16415 r16518  
    2222using System;
    2323using System.Collections.Generic;
     24using System.Drawing;
    2425using System.Linq;
    2526using HeuristicLab.Common;
     
    3536  [Item("Linear Regression Model", "Represents a linear regression model.")]
    3637  public sealed class LinearRegressionModel : RegressionModel, IConfidenceRegressionModel {
     38    public static new Image StaticItemImage {
     39      get { return HeuristicLab.Common.Resources.VSImageLibrary.Function; }
     40    }
    3741
    3842    [Storable]
     
    4953      get; private set;
    5054    }
    51    
     55
    5256    public override IEnumerable<string> VariablesUsedForPrediction {
    53       get { return allowedInputVariables.Union(factorVariables.Select(f => f.Key)); }
     57      get { return doubleVariables.Union(factorVariables.Select(f => f.Key)); }
    5458    }
    5559
    5660    [Storable]
    57     private string[] allowedInputVariables;
     61    private string[] doubleVariables;
    5862    [Storable]
    5963    private List<KeyValuePair<string, IEnumerable<string>>> factorVariables;
     64
     65    /// <summary>
     66    /// Enumerable of variable names used by the model including one-hot-encoded of factor variables.
     67    /// </summary>
     68    public IEnumerable<string> ParameterNames {
     69      get {
     70        return factorVariables.SelectMany(kvp => kvp.Value.Select(factorVal => $"{kvp.Key}={factorVal}"))
     71          .Concat(doubleVariables)
     72          .Concat(new[] { "<const>" });
     73      }
     74    }
    6075
    6176    [StorableConstructor]
     
    6984      this.NoiseSigma = original.NoiseSigma;
    7085
    71       allowedInputVariables = (string[])original.allowedInputVariables.Clone();
     86      doubleVariables = (string[])original.doubleVariables.Clone();
    7287      this.factorVariables = original.factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList();
    7388    }
     
    7893      this.W = new double[w.Length];
    7994      Array.Copy(w, W, w.Length);
    80       this.C = new double[covariance.GetLength(0),covariance.GetLength(1)];
     95      this.C = new double[covariance.GetLength(0), covariance.GetLength(1)];
    8196      Array.Copy(covariance, C, covariance.Length);
    8297      this.NoiseSigma = noiseSigma;
    83       var stringInputVariables = factorVariables.Select(f => f.Key).Distinct();
    84       this.allowedInputVariables = doubleInputVariables.ToArray();
     98      this.doubleVariables = doubleInputVariables.ToArray();
     99      // clone
    85100      this.factorVariables = factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList();
    86101    }
     
    95110
    96111    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    97       double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
     112      double[,] inputData = dataset.ToArray(doubleVariables, rows);
    98113      double[,] factorData = dataset.ToArray(factorVariables, rows);
    99114
     
    114129
    115130    public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
    116       double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
     131      double[,] inputData = dataset.ToArray(doubleVariables, rows);
    117132      double[,] factorData = dataset.ToArray(factorVariables, rows);
    118133
     
    123138
    124139      double[] d = new double[C.GetLength(0)];
    125      
     140
    126141      for (int row = 0; row < n; row++) {
    127142        for (int column = 0; column < columns; column++) {
    128           d[column] = inputData[row,column];
     143          d[column] = inputData[row, column];
    129144        }
    130145        d[columns] = 1;
    131146
    132147        double var = 0.0;
    133         for(int i=0;i<d.Length;i++) {
    134           for(int j = 0;j<d.Length;j++) {
     148        for (int i = 0; i < d.Length; i++) {
     149          for (int j = 0; j < d.Length; j++) {
    135150            var += d[i] * C[i, j] * d[j];
    136151          }
    137152        }
    138         yield return var + NoiseSigma*NoiseSigma;
     153        yield return var + NoiseSigma * NoiseSigma;
    139154      }
    140155    }
    141 
    142156
    143157    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
  • branches/2972_PDPRowSelect/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaModel.cs

    r15869 r16518  
    6565
    6666      var ds = ReduceDataset(dataset, rows);
    67       nnModel = new NearestNeighbourModel(ds, Enumerable.Range(0, ds.Rows), k, ds.VariableNames.Last(), ds.VariableNames.Take(transformationMatrix.GetLength(1)), classValues: classValues);
     67      nnModel = new NearestNeighbourModel(ds, Enumerable.Range(0, ds.Rows), k, false, ds.VariableNames.Last(), ds.VariableNames.Take(transformationMatrix.GetLength(1)), classValues: classValues);
    6868    }
    6969
  • branches/2972_PDPRowSelect/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs

    r15583 r16518  
    1 #region License Information
     1#region License Information
    22/* HeuristicLab
    33 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     
    4242    private const string NearestNeighbourClassificationModelResultName = "Nearest neighbour classification solution";
    4343    private const string WeightsParameterName = "Weights";
    44 
     44    private const string SelfMatchParameterName = "SelfMatch";
    4545
    4646    #region parameter properties
    4747    public IFixedValueParameter<IntValue> KParameter {
    4848      get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; }
     49    }
     50    public IFixedValueParameter<BoolValue> SelfMatchParameter {
     51      get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; }
    4952    }
    5053    public IValueParameter<DoubleArray> WeightsParameter {
     
    5356    #endregion
    5457    #region properties
     58    public bool SelfMatch {
     59      get { return SelfMatchParameter.Value.Value; }
     60      set { SelfMatchParameter.Value.Value = value; }
     61    }
    5562    public int K {
    5663      get { return KParameter.Value.Value; }
     
    7380    public NearestNeighbourClassification()
    7481      : base() {
     82      Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    7583      Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3)));
    7684      Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
     
    8391      if (!Parameters.ContainsKey(WeightsParameterName)) {
    8492        Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
     93      }
     94      if (!Parameters.ContainsKey(SelfMatchParameterName)) {
     95        Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    8596      }
    8697      #endregion
     
    95106      double[] weights = null;
    96107      if (Weights != null) weights = Weights.CloneAsArray();
    97       var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, weights);
     108      var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, SelfMatch, weights);
    98109      Results.Add(new Result(NearestNeighbourClassificationModelResultName, "The nearest neighbour classification solution.", solution));
    99110    }
    100111
    101     public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, double[] weights = null) {
     112    public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
    102113      var problemDataClone = (IClassificationProblemData)problemData.Clone();
    103       return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, weights), problemDataClone);
     114      return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, selfMatch, weights), problemDataClone);
    104115    }
    105116
    106     public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, double[] weights = null) {
     117    public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
    107118      return new NearestNeighbourModel(problemData.Dataset,
    108119        problemData.TrainingIndices,
    109120        k,
     121        selfMatch,
    110122        problemData.TargetVariable,
    111123        problemData.AllowedInputVariables,
  • branches/2972_PDPRowSelect/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs

    r16243 r16518  
    1 #region License Information
     1#region License Information
    22/* HeuristicLab
    33 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     
    3737
    3838    private readonly object kdTreeLockObject = new object();
     39
    3940    private alglib.nearestneighbor.kdtree kdTree;
    4041    public alglib.nearestneighbor.kdtree KDTree {
     
    4950    }
    5051
    51 
    5252    public override IEnumerable<string> VariablesUsedForPrediction {
    5353      get { return allowedInputVariables; }
     
    6060    [Storable]
    6161    private int k;
     62    [Storable(DefaultValue = false)]
     63    private bool selfMatch;
    6264    [Storable(DefaultValue = null)]
    6365    private double[] weights; // not set for old versions loaded from disk
     
    9799      kdTree.x = (double[])original.kdTree.x.Clone();
    98100      kdTree.xy = (double[,])original.kdTree.xy.Clone();
    99 
     101      selfMatch = original.selfMatch;
    100102      k = original.k;
    101103      isCompatibilityLoaded = original.IsCompatibilityLoaded;
     
    110112        this.classValues = (double[])original.classValues.Clone();
    111113    }
    112     public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)
     114    public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, bool selfMatch, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)
    113115      : base(targetVariable) {
    114116      Name = ItemName;
    115117      Description = ItemDescription;
     118      this.selfMatch = selfMatch;
    116119      this.k = k;
    117120      this.allowedInputVariables = allowedInputVariables.ToArray();
     
    132135            .Select(name => {
    133136              var pop = dataset.GetDoubleValues(name, rows).StandardDeviationPop();
    134               return  pop.IsAlmost(0) ? 1.0 : 1.0/pop;
     137              return pop.IsAlmost(0) ? 1.0 : 1.0 / pop;
    135138            })
    136139            .Concat(new double[] { 1.0 }) // no scaling for target variable
     
    201204        int numNeighbours;
    202205        lock (kdTreeLockObject) { // gkronber: the following calls change the kdTree data structure
    203           numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);
     206          numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch);
    204207          alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists);
    205208          alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours);
    206209        }
    207 
     210        if (selfMatch) {
     211          // weights for neighbours are 1/d.
     212          // override distances (=0) of exact matches using 1% of the distance of the next closest non-self-match neighbour -> selfmatches weight 100x more than the next closest neighbor.
     213          // if all k neighbours are selfmatches then they all have weight 0.01.
     214          double minDist = dists[0] + 1;
     215          for (int i = 0; i < numNeighbours; i++) {
     216            if ((minDist > dists[i]) && (dists[i] != 0)) {
     217              minDist = dists[i];
     218            }
     219          }
     220          minDist /= 100.0;
     221          for (int i = 0; i < numNeighbours; i++) {
     222            if (dists[i] == 0) {
     223              dists[i] = minDist;
     224            }
     225          }
     226        }
    208227        double distanceWeightedValue = 0.0;
    209228        double distsSum = 0.0;
     
    238257        lock (kdTreeLockObject) {
    239258          // gkronber: the following calls change the kdTree data structure
    240           numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);
     259          numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch);
    241260          alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists);
    242261          alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours);
  • branches/2972_PDPRowSelect/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs

    r15583 r16518  
    4141    private const string NearestNeighbourRegressionModelResultName = "Nearest neighbour regression solution";
    4242    private const string WeightsParameterName = "Weights";
     43    private const string SelfMatchParameterName = "SelfMatch";
    4344
    4445    #region parameter properties
     
    4647      get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; }
    4748    }
    48 
     49    public IFixedValueParameter<BoolValue> SelfMatchParameter {
     50      get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; }
     51    }
    4952    public IValueParameter<DoubleArray> WeightsParameter {
    5053      get { return (IValueParameter<DoubleArray>)Parameters[WeightsParameterName]; }
     
    5962      }
    6063    }
    61 
     64    public bool SelfMatch {
     65      get { return SelfMatchParameter.Value.Value; }
     66      set { SelfMatchParameter.Value.Value = value; }
     67    }
    6268    public DoubleArray Weights {
    6369      get { return WeightsParameter.Value; }
     
    7581      Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3)));
    7682      Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
     83      Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    7784      Problem = new RegressionProblem();
    7885    }
     
    8491      if (!Parameters.ContainsKey(WeightsParameterName)) {
    8592        Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
     93      }
     94      if (!Parameters.ContainsKey(SelfMatchParameterName)) {
     95        Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    8696      }
    8797      #endregion
     
    96106      double[] weights = null;
    97107      if (Weights != null) weights = Weights.CloneAsArray();
    98       var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, weights);
     108      var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, SelfMatch, weights);
    99109      Results.Add(new Result(NearestNeighbourRegressionModelResultName, "The nearest neighbour regression solution.", solution));
    100110    }
    101111
    102     public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, double[] weights = null) {
     112    public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
    103113      var clonedProblemData = (IRegressionProblemData)problemData.Clone();
    104       return new NearestNeighbourRegressionSolution(Train(problemData, k, weights), clonedProblemData);
     114      return new NearestNeighbourRegressionSolution(Train(problemData, k, selfMatch, weights), clonedProblemData);
    105115    }
    106116
    107     public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, double[] weights = null) {
     117    public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
    108118      return new NearestNeighbourModel(problemData.Dataset,
    109119        problemData.TrainingIndices,
    110120        k,
     121        selfMatch,
    111122        problemData.TargetVariable,
    112123        problemData.AllowedInputVariables,
Note: See TracChangeset for help on using the changeset viewer.