Free cookie consent management tool by TermsFeed Policy Generator

Changeset 14836 for branches/TSNE


Ignore:
Timestamp:
04/10/17 15:48:20 (8 years ago)
Author:
gkronber
Message:

#2700 merged changesets from trunk to branch

Location:
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis
Files:
34 edited
2 copied

Legend:

Unmodified
Added
Removed
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis

  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers/OneR.cs

    r14185 r14836  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324using System.Linq;
     25using System.Threading;
    2426using HeuristicLab.Common;
    2527using HeuristicLab.Core;
     
    5860    }
    5961
    60     protected override void Run() {
     62    protected override void Run(CancellationToken cancellationToken) {
    6163      var solution = CreateOneRSolution(Problem.ProblemData, MinBucketSizeParameter.Value.Value);
    6264      Results.Add(new Result("OneR solution", "The 1R classifier.", solution));
     
    6466
    6567    public static IClassificationSolution CreateOneRSolution(IClassificationProblemData problemData, int minBucketSize = 6) {
     68      var classValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices);
     69      var model1 = FindBestDoubleVariableModel(problemData, minBucketSize);
     70      var model2 = FindBestFactorModel(problemData);
     71
     72      if (model1 == null && model2 == null) throw new InvalidProgramException("Could not create OneR solution");
     73      else if (model1 == null) return new OneFactorClassificationSolution(model2, (IClassificationProblemData)problemData.Clone());
     74      else if (model2 == null) return new OneRClassificationSolution(model1, (IClassificationProblemData)problemData.Clone());
     75      else {
     76        var model1EstimatedValues = model1.GetEstimatedClassValues(problemData.Dataset, problemData.TrainingIndices);
     77        var model1NumCorrect = classValues.Zip(model1EstimatedValues, (a, b) => a.IsAlmost(b)).Count(e => e);
     78
     79        var model2EstimatedValues = model2.GetEstimatedClassValues(problemData.Dataset, problemData.TrainingIndices);
     80        var model2NumCorrect = classValues.Zip(model2EstimatedValues, (a, b) => a.IsAlmost(b)).Count(e => e);
     81
     82        if (model1NumCorrect > model2NumCorrect) {
     83          return new OneRClassificationSolution(model1, (IClassificationProblemData)problemData.Clone());
     84        } else {
     85          return new OneFactorClassificationSolution(model2, (IClassificationProblemData)problemData.Clone());
     86        }
     87      }
     88    }
     89
     90    private static OneRClassificationModel FindBestDoubleVariableModel(IClassificationProblemData problemData, int minBucketSize = 6) {
    6691      var bestClassified = 0;
    6792      List<Split> bestSplits = null;
     
    7095      var classValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices);
    7196
    72       foreach (var variable in problemData.AllowedInputVariables) {
     97      var allowedInputVariables = problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<double>);
     98
     99      if (!allowedInputVariables.Any()) return null;
     100
     101      foreach (var variable in allowedInputVariables) {
    73102        var inputValues = problemData.Dataset.GetDoubleValues(variable, problemData.TrainingIndices);
    74103        var samples = inputValues.Zip(classValues, (i, v) => new Sample(i, v)).OrderBy(s => s.inputValue);
    75104
    76         var missingValuesDistribution = samples.Where(s => double.IsNaN(s.inputValue)).GroupBy(s => s.classValue).ToDictionary(s => s.Key, s => s.Count()).MaxItems(s => s.Value).FirstOrDefault();
     105        var missingValuesDistribution = samples
     106          .Where(s => double.IsNaN(s.inputValue)).GroupBy(s => s.classValue)
     107          .ToDictionary(s => s.Key, s => s.Count())
     108          .MaxItems(s => s.Value)
     109          .FirstOrDefault();
    77110
    78111        //calculate class distributions for all distinct inputValues
     
    119152          while (sample.inputValue >= splits[splitIndex].thresholdValue)
    120153            splitIndex++;
    121           correctClassified += sample.classValue == splits[splitIndex].classValue ? 1 : 0;
     154          correctClassified += sample.classValue.IsAlmost(splits[splitIndex].classValue) ? 1 : 0;
    122155        }
    123156        correctClassified += missingValuesDistribution.Value;
     
    133166      //remove neighboring splits with the same class value
    134167      for (int i = 0; i < bestSplits.Count - 1; i++) {
    135         if (bestSplits[i].classValue == bestSplits[i + 1].classValue) {
     168        if (bestSplits[i].classValue.IsAlmost(bestSplits[i + 1].classValue)) {
    136169          bestSplits.Remove(bestSplits[i]);
    137170          i--;
     
    139172      }
    140173
    141       var model = new OneRClassificationModel(problemData.TargetVariable, bestVariable, bestSplits.Select(s => s.thresholdValue).ToArray(), bestSplits.Select(s => s.classValue).ToArray(), bestMissingValuesClass);
    142       var solution = new OneRClassificationSolution(model, (IClassificationProblemData)problemData.Clone());
    143 
    144       return solution;
     174      var model = new OneRClassificationModel(problemData.TargetVariable, bestVariable,
     175        bestSplits.Select(s => s.thresholdValue).ToArray(),
     176        bestSplits.Select(s => s.classValue).ToArray(), bestMissingValuesClass);
     177
     178      return model;
     179    }
     180    private static OneFactorClassificationModel FindBestFactorModel(IClassificationProblemData problemData) {
     181      var classValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices);
     182      var defaultClass = FindMostFrequentClassValue(classValues);
     183      // only select string variables
     184      var allowedInputVariables = problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<string>);
     185
     186      if (!allowedInputVariables.Any()) return null;
     187
     188      OneFactorClassificationModel bestModel = null;
     189      var bestModelNumCorrect = 0;
     190
     191      foreach (var variable in allowedInputVariables) {
     192        var variableValues = problemData.Dataset.GetStringValues(variable, problemData.TrainingIndices);
     193        var groupedClassValues = variableValues
     194          .Zip(classValues, (v, c) => new KeyValuePair<string, double>(v, c))
     195          .GroupBy(kvp => kvp.Key)
     196          .ToDictionary(g => g.Key, g => FindMostFrequentClassValue(g.Select(kvp => kvp.Value)));
     197
     198        var model = new OneFactorClassificationModel(problemData.TargetVariable, variable,
     199          groupedClassValues.Select(kvp => kvp.Key).ToArray(), groupedClassValues.Select(kvp => kvp.Value).ToArray(), defaultClass);
     200
     201        var modelEstimatedValues = model.GetEstimatedClassValues(problemData.Dataset, problemData.TrainingIndices);
     202        var modelNumCorrect = classValues.Zip(modelEstimatedValues, (a, b) => a.IsAlmost(b)).Count(e => e);
     203        if (modelNumCorrect > bestModelNumCorrect) {
     204          bestModelNumCorrect = modelNumCorrect;
     205          bestModel = model;
     206        }
     207      }
     208
     209      return bestModel;
     210    }
     211
     212    private static double FindMostFrequentClassValue(IEnumerable<double> classValues) {
     213      return classValues.GroupBy(c => c).OrderByDescending(g => g.Count()).Select(g => g.Key).First();
    145214    }
    146215
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers/OneRClassificationModel.cs

    r14185 r14836  
    3131  [StorableClass]
    3232  [Item("OneR Classification Model", "A model that uses intervals for one variable to determine the class.")]
    33   public class OneRClassificationModel : ClassificationModel {
     33  public sealed class OneRClassificationModel : ClassificationModel {
    3434    public override IEnumerable<string> VariablesUsedForPrediction {
    3535      get { return new[] { Variable }; }
     
    3737
    3838    [Storable]
    39     protected string variable;
     39    private string variable;
    4040    public string Variable {
    4141      get { return variable; }
     
    4343
    4444    [Storable]
    45     protected double[] splits;
     45    private double[] splits;
    4646    public double[] Splits {
    4747      get { return splits; }
     
    4949
    5050    [Storable]
    51     protected double[] classes;
     51    private double[] classes;
    5252    public double[] Classes {
    5353      get { return classes; }
     
    5555
    5656    [Storable]
    57     protected double missingValuesClass;
     57    private double missingValuesClass;
    5858    public double MissingValuesClass {
    5959      get { return missingValuesClass; }
     
    6161
    6262    [StorableConstructor]
    63     protected OneRClassificationModel(bool deserializing) : base(deserializing) { }
    64     protected OneRClassificationModel(OneRClassificationModel original, Cloner cloner)
     63    private OneRClassificationModel(bool deserializing) : base(deserializing) { }
     64    private OneRClassificationModel(OneRClassificationModel original, Cloner cloner)
    6565      : base(original, cloner) {
    6666      this.variable = (string)original.variable;
    6767      this.splits = (double[])original.splits.Clone();
    6868      this.classes = (double[])original.classes.Clone();
     69      this.missingValuesClass = original.missingValuesClass;
    6970    }
    7071    public override IDeepCloneable Clone(Cloner cloner) { return new OneRClassificationModel(this, cloner); }
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers/OneRClassificationSolution.cs

    r14185 r14836  
    2828  [StorableClass]
    2929  [Item(Name = "OneR Classification Solution", Description = "Represents a OneR classification solution which uses only a single feature with potentially multiple thresholds for class prediction.")]
    30   public class OneRClassificationSolution : ClassificationSolution {
     30  public sealed class OneRClassificationSolution : ClassificationSolution {
    3131    public new OneRClassificationModel Model {
    3232      get { return (OneRClassificationModel)base.Model; }
     
    3535
    3636    [StorableConstructor]
    37     protected OneRClassificationSolution(bool deserializing) : base(deserializing) { }
    38     protected OneRClassificationSolution(OneRClassificationSolution original, Cloner cloner) : base(original, cloner) { }
     37    private OneRClassificationSolution(bool deserializing) : base(deserializing) { }
     38    private OneRClassificationSolution(OneRClassificationSolution original, Cloner cloner) : base(original, cloner) { }
    3939    public OneRClassificationSolution(OneRClassificationModel model, IClassificationProblemData problemData)
    4040      : base(model, problemData) {
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers/ZeroR.cs

    r14185 r14836  
    2121
    2222using System.Linq;
     23using System.Threading;
    2324using HeuristicLab.Common;
    2425using HeuristicLab.Core;
     
    4950    }
    5051
    51     protected override void Run() {
     52    protected override void Run(CancellationToken cancellationToken) {
    5253      var solution = CreateZeroRSolution(Problem.ProblemData);
    5354      Results.Add(new Result("ZeroR solution", "The simplest possible classifier, ZeroR always predicts the majority class.", solution));
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/CrossValidation.cs

    r14185 r14836  
    451451      var aggregatedResults = new List<IResult>();
    452452      foreach (KeyValuePair<string, List<IClassificationSolution>> solutions in resultSolutions) {
    453         // clone manually to correctly clone references between cloned root objects
    454         Cloner cloner = new Cloner();
    455         var problemDataClone = (IClassificationProblemData)cloner.Clone(Problem.ProblemData);
     453        // at least one algorithm (GBT with logistic regression loss) produces a classification solution even though the original problem is a regression problem.
     454        var targetVariable = solutions.Value.First().ProblemData.TargetVariable;
     455        var problemDataClone = new ClassificationProblemData(Problem.ProblemData.Dataset,
     456          Problem.ProblemData.AllowedInputVariables, targetVariable);
    456457        // set partitions of problem data clone correctly
    457458        problemDataClone.TrainingPartition.Start = SamplesStart.Value; problemDataClone.TrainingPartition.End = SamplesEnd.Value;
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/FixedDataAnalysisAlgorithm.cs

    r14185 r14836  
    2121
    2222using System;
    23 using System.Threading;
    24 using System.Threading.Tasks;
    2523using HeuristicLab.Common;
    2624using HeuristicLab.Optimization;
     
    3028namespace HeuristicLab.Algorithms.DataAnalysis {
    3129  [StorableClass]
    32   public abstract class FixedDataAnalysisAlgorithm<T> : Algorithm,
    33     IDataAnalysisAlgorithm<T>,
    34     IStorableContent
    35     where T : class, IDataAnalysisProblem {
    36     public string Filename { get; set; }
    37 
     30  public abstract class FixedDataAnalysisAlgorithm<T> : BasicAlgorithm where T : class, IDataAnalysisProblem {
    3831    #region Properties
    3932    public override Type ProblemType {
     
    4437      set { base.Problem = value; }
    4538    }
    46     [Storable]
    47     private ResultCollection results;
    48     public override ResultCollection Results {
    49       get { return results; }
    50     }
    5139    #endregion
    5240
    53     private DateTime lastUpdateTime;
     41    public override bool SupportsPause { get { return false; } }
    5442
    5543    [StorableConstructor]
    5644    protected FixedDataAnalysisAlgorithm(bool deserializing) : base(deserializing) { }
    57     protected FixedDataAnalysisAlgorithm(FixedDataAnalysisAlgorithm<T> original, Cloner cloner)
    58       : base(original, cloner) {
    59       results = cloner.Clone(original.Results);
    60     }
    61     public FixedDataAnalysisAlgorithm()
    62       : base() {
    63       results = new ResultCollection();
    64     }
    65 
    66     public override void Prepare() {
    67       if (Problem != null) base.Prepare();
    68       results.Clear();
    69       OnPrepared();
    70     }
    71 
    72     public override void Start() {
    73       base.Start();
    74       var cancellationTokenSource = new CancellationTokenSource();
    75 
    76       OnStarted();
    77       Task task = Task.Factory.StartNew(Run, cancellationTokenSource.Token, cancellationTokenSource.Token);
    78       task.ContinueWith(t => {
    79         try {
    80           t.Wait();
    81         }
    82         catch (AggregateException ex) {
    83           try {
    84             ex.Flatten().Handle(x => x is OperationCanceledException);
    85           }
    86           catch (AggregateException remaining) {
    87             if (remaining.InnerExceptions.Count == 1) OnExceptionOccurred(remaining.InnerExceptions[0]);
    88             else OnExceptionOccurred(remaining);
    89           }
    90         }
    91         cancellationTokenSource.Dispose();
    92         cancellationTokenSource = null;
    93         OnStopped();
    94       });
    95     }
    96     private void Run(object state) {
    97       CancellationToken cancellationToken = (CancellationToken)state;
    98       lastUpdateTime = DateTime.UtcNow;
    99       System.Timers.Timer timer = new System.Timers.Timer(250);
    100       timer.AutoReset = true;
    101       timer.Elapsed += new System.Timers.ElapsedEventHandler(timer_Elapsed);
    102       timer.Start();
    103       try {
    104         Run();
    105       }
    106       finally {
    107         timer.Elapsed -= new System.Timers.ElapsedEventHandler(timer_Elapsed);
    108         timer.Stop();
    109         ExecutionTime += DateTime.UtcNow - lastUpdateTime;
    110       }
    111 
    112       cancellationToken.ThrowIfCancellationRequested();
    113     }
    114     protected abstract void Run();
    115     #region Events
    116     protected override void OnProblemChanged() {
    117       Problem.Reset += new EventHandler(Problem_Reset);
    118       base.OnProblemChanged();
    119     }
    120     private void timer_Elapsed(object sender, System.Timers.ElapsedEventArgs e) {
    121       System.Timers.Timer timer = (System.Timers.Timer)sender;
    122       timer.Enabled = false;
    123       DateTime now = DateTime.UtcNow;
    124       ExecutionTime += now - lastUpdateTime;
    125       lastUpdateTime = now;
    126       timer.Enabled = true;
    127     }
    128     #endregion
     45    protected FixedDataAnalysisAlgorithm(FixedDataAnalysisAlgorithm<T> original, Cloner cloner) : base(original, cloner) { }
     46    public FixedDataAnalysisAlgorithm() : base() { }
    12947
    13048  }
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/GBM/GradientBoostingRegressionAlgorithm.cs

    r14558 r14836  
    4444  [StorableClass]
    4545  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 350)]
    46   public class GradientBoostingRegressionAlgorithm : BasicAlgorithm, IDataAnalysisAlgorithm<IRegressionProblem> {
    47     public override Type ProblemType {
    48       get { return typeof(IRegressionProblem); }
    49     }
    50 
    51     public new IRegressionProblem Problem {
    52       get { return (IRegressionProblem)base.Problem; }
    53       set { base.Problem = value; }
    54     }
    55     public override bool SupportsPause
    56     {
    57       get { return false; }
    58     }
     46  public class GradientBoostingRegressionAlgorithm : FixedDataAnalysisAlgorithm<IRegressionProblem> {
    5947
    6048    #region ParameterNames
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessBase.cs

    r14185 r14836  
    2121#endregion
    2222
     23using System.Linq;
    2324using HeuristicLab.Algorithms.GradientDescent;
    2425using HeuristicLab.Common;
     
    119120
    120121      // necessary for BFGS
    121       Parameters.Add(new ValueParameter<BoolValue>("Maximization", new BoolValue(false)));
    122       Parameters["Maximization"].Hidden = true;
     122      Parameters.Add(new FixedValueParameter<BoolValue>("Maximization (BFGS)", new BoolValue(false)));
     123      Parameters["Maximization (BFGS)"].Hidden = true;
    123124
    124125      var randomCreator = new HeuristicLab.Random.RandomCreator();
     
    164165      modelCreator.Successor = updateResults;
    165166
     167      updateResults.MaximizationParameter.ActualName = "Maximization (BFGS)";
    166168      updateResults.StateParameter.ActualName = bfgsInitializer.StateParameter.Name;
    167169      updateResults.QualityParameter.ActualName = NegativeLogLikelihoodParameterName;
     
    197199      // BackwardsCompatibility3.4
    198200      #region Backwards compatible code, remove with 3.5
    199       if (!Parameters.ContainsKey("Maximization")) {
    200         Parameters.Add(new ValueParameter<BoolValue>("Maximization", new BoolValue(false)));
    201         Parameters["Maximization"].Hidden = true;
     201      if (Parameters.ContainsKey("Maximization")) {
     202        Parameters.Remove("Maximization");
     203      }
     204
     205      if (!Parameters.ContainsKey("Maximization (BFGS)")) {
     206        Parameters.Add(new FixedValueParameter<BoolValue>("Maximization (BFGS)", new BoolValue(false)));
     207        Parameters["Maximization (BFGS)"].Hidden = true;
     208        OperatorGraph.Operators.OfType<LbfgsUpdateResults>().First().MaximizationParameter.ActualName = "Maximization BFGS";
    202209      }
    203210
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessClassificationModelCreator.cs

    r14185 r14836  
    6767        HyperparameterGradientsParameter.ActualValue = new RealVector(model.HyperparameterGradients);
    6868        return base.Apply();
    69       } catch (ArgumentException) { } catch (alglib.alglibexception) { }
     69      } catch (ArgumentException) {
     70      } catch (alglib.alglibexception) {
     71      }
    7072      NegativeLogLikelihoodParameter.ActualValue = new DoubleValue(1E300);
    7173      HyperparameterGradientsParameter.ActualValue = new RealVector(Hyperparameter.Count());
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithm.cs

    r14558 r14836  
    3838  [StorableClass]
    3939  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 125)]
    40   public class GradientBoostedTreesAlgorithm : BasicAlgorithm, IDataAnalysisAlgorithm<IRegressionProblem> {
    41     public override Type ProblemType
    42     {
    43       get { return typeof(IRegressionProblem); }
    44     }
    45     public new IRegressionProblem Problem
    46     {
    47       get { return (IRegressionProblem)base.Problem; }
    48       set { base.Problem = value; }
    49     }
    50     public override bool SupportsPause
    51     {
    52       get { return false; }
    53     }
    54 
     40  public class GradientBoostedTreesAlgorithm : FixedDataAnalysisAlgorithm<IRegressionProblem> {
    5541    #region ParameterNames
    5642    private const string IterationsParameterName = "Iterations";
     
    289275          var classificationProblemData = new ClassificationProblemData(problemData.Dataset,
    290276            problemData.AllowedInputVariables, problemData.TargetVariable, problemData.Transformations);
    291           classificationModel.RecalculateModelParameters(classificationProblemData, classificationProblemData.TrainingIndices);
     277          classificationProblemData.TrainingPartition.Start = Problem.ProblemData.TrainingPartition.Start;
     278          classificationProblemData.TrainingPartition.End = Problem.ProblemData.TrainingPartition.End;
     279          classificationProblemData.TestPartition.Start = Problem.ProblemData.TestPartition.Start;
     280          classificationProblemData.TestPartition.End = Problem.ProblemData.TestPartition.End;
     281
     282          classificationModel.SetThresholdsAndClassValues(new double[] { double.NegativeInfinity, 0.0 }, new []{ 0.0, 1.0 });
     283
    292284
    293285          var classificationSolution = new DiscriminantFunctionClassificationSolution(classificationModel, classificationProblemData);
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithmStatic.cs

    r14185 r14836  
    148148    // for custom stepping & termination
    149149    public static IGbmState CreateGbmState(IRegressionProblemData problemData, ILossFunction lossFunction, uint randSeed, int maxSize = 3, double r = 0.66, double m = 0.5, double nu = 0.01) {
     150      // check input variables. Only double variables are allowed.
     151      var invalidInputs =
     152        problemData.AllowedInputVariables.Where(name => !problemData.Dataset.VariableHasType<double>(name));
     153      if (invalidInputs.Any())
     154        throw new NotSupportedException("Gradient tree boosting only supports real-valued variables. Unsupported inputs: " + string.Join(", ", invalidInputs));
     155
    150156      return new GbmState(problemData, lossFunction, randSeed, maxSize, r, m, nu);
    151157    }
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r14787 r14836  
    215215  </ItemGroup>
    216216  <ItemGroup>
     217    <Compile Include="BaselineClassifiers\OneFactorClassificationModel.cs" />
     218    <Compile Include="BaselineClassifiers\OneFactorClassificationSolution.cs" />
    217219    <Compile Include="BaselineClassifiers\OneR.cs" />
    218220    <Compile Include="BaselineClassifiers\OneRClassificationModel.cs" />
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/AlglibUtil.cs

    r14185 r14836  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324using System.Linq;
     
    2728  public static class AlglibUtil {
    2829    public static double[,] PrepareInputMatrix(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) {
    29       List<string> variablesList = variables.ToList();
     30      // check input variables. Only double variables are allowed.
     31      var invalidInputs =
     32        variables.Where(name => !dataset.VariableHasType<double>(name));
     33      if (invalidInputs.Any())
     34        throw new NotSupportedException("Unsupported inputs: " + string.Join(", ", invalidInputs));
     35
    3036      List<int> rowsList = rows.ToList();
    31 
    32       double[,] matrix = new double[rowsList.Count, variablesList.Count];
     37      double[,] matrix = new double[rowsList.Count, variables.Count()];
    3338
    3439      int col = 0;
     
    4550      return matrix;
    4651    }
     52
    4753    public static double[,] PrepareAndScaleInputMatrix(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows, Scaling scaling) {
     54      // check input variables. Only double variables are allowed.
     55      var invalidInputs =
     56        variables.Where(name => !dataset.VariableHasType<double>(name));
     57      if (invalidInputs.Any())
     58        throw new NotSupportedException("Unsupported inputs: " + string.Join(", ", invalidInputs));
     59
    4860      List<string> variablesList = variables.ToList();
    4961      List<int> rowsList = rows.ToList();
     
    6476      return matrix;
    6577    }
     78
     79    /// <summary>
     80    /// Prepares a binary data matrix from a number of factors and specified factor values
     81    /// </summary>
     82    /// <param name="dataset">A dataset that contains the variable values</param>
     83    /// <param name="factorVariables">An enumerable of categorical variables (factors). For each variable an enumerable of values must be specified.</param>
     84    /// <param name="rows">An enumerable of row indices for the dataset</param>
     85    /// <returns></returns>
     86    /// <remarks>Factor variables (categorical variables) are split up into multiple binary variables one for each specified value.</remarks>
     87    public static double[,] PrepareInputMatrix(
     88      IDataset dataset,
     89      IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables,
     90      IEnumerable<int> rows) {
     91      // check input variables. Only string variables are allowed.
     92      var invalidInputs =
     93        factorVariables.Select(kvp => kvp.Key).Where(name => !dataset.VariableHasType<string>(name));
     94      if (invalidInputs.Any())
     95        throw new NotSupportedException("Unsupported inputs: " + string.Join(", ", invalidInputs));
     96
     97      int numBinaryColumns = factorVariables.Sum(kvp => kvp.Value.Count());
     98
     99      List<int> rowsList = rows.ToList();
     100      double[,] matrix = new double[rowsList.Count, numBinaryColumns];
     101
     102      int col = 0;
     103      foreach (var kvp in factorVariables) {
     104        var varName = kvp.Key;
     105        var cats = kvp.Value;
     106        if (!cats.Any()) continue;
     107        foreach (var cat in cats) {
     108          var values = dataset.GetStringValues(varName, rows);
     109          int row = 0;
     110          foreach (var value in values) {
     111            matrix[row, col] = value == cat ? 1 : 0;
     112            row++;
     113          }
     114          col++;
     115        }
     116      }
     117      return matrix;
     118    }
     119
     120    public static IEnumerable<KeyValuePair<string, IEnumerable<string>>> GetFactorVariableValues(IDataset ds, IEnumerable<string> factorVariables, IEnumerable<int> rows) {
     121      return from factor in factorVariables
     122             let distinctValues = ds.GetStringValues(factor, rows).Distinct().ToArray()
     123             // 1 distinct value => skip (constant)
     124             // 2 distinct values => only take one of the two values
     125             // >=3 distinct values => create a binary value for each value
     126             let reducedValues = distinctValues.Length <= 2
     127               ? distinctValues.Take(distinctValues.Length - 1)
     128               : distinctValues
     129             select new KeyValuePair<string, IEnumerable<string>>(factor, reducedValues);
     130    }
    66131  }
    67132}
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearDiscriminantAnalysis.cs

    r14185 r14836  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    3637  /// Linear discriminant analysis classification algorithm.
    3738  /// </summary>
    38   [Item("Linear Discriminant Analysis", "Linear discriminant analysis classification algorithm (wrapper for ALGLIB).")]
     39  [Item("Linear Discriminant Analysis (LDA)", "Linear discriminant analysis classification algorithm (wrapper for ALGLIB).")]
    3940  [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 100)]
    4041  [StorableClass]
     
    5960
    6061    #region Fisher LDA
    61     protected override void Run() {
     62    protected override void Run(CancellationToken cancellationToken) {
    6263      var solution = CreateLinearDiscriminantAnalysisSolution(Problem.ProblemData);
    6364      Results.Add(new Result(LinearDiscriminantAnalysisSolutionResultName, "The linear discriminant analysis.", solution));
     
    7071      IEnumerable<int> rows = problemData.TrainingIndices;
    7172      int nClasses = problemData.ClassNames.Count();
    72       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     73      var doubleVariableNames = allowedInputVariables.Where(dataset.VariableHasType<double>).ToArray();
     74      var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>).ToArray();
     75      double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, doubleVariableNames.Concat(new string[] { targetVariable }), rows);
     76
     77      var factorVariables = AlglibUtil.GetFactorVariableValues(dataset, factorVariableNames, rows);
     78      double[,] factorMatrix = AlglibUtil.PrepareInputMatrix(dataset, factorVariables, rows);
     79
     80      inputMatrix = factorMatrix.HorzCat(inputMatrix);
     81
    7382      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    7483        throw new NotSupportedException("Linear discriminant analysis does not support NaN or infinity values in the input dataset.");
     
    8291      int info;
    8392      double[] w;
    84       alglib.fisherlda(inputMatrix, inputMatrix.GetLength(0), allowedInputVariables.Count(), nClasses, out info, out w);
     93      alglib.fisherlda(inputMatrix, inputMatrix.GetLength(0), inputMatrix.GetLength(1) - 1, nClasses, out info, out w);
    8594      if (info < 1) throw new ArgumentException("Error in calculation of linear discriminant analysis solution");
    8695
     
    92101
    93102      int col = 0;
    94       foreach (string column in allowedInputVariables) {
     103      foreach (var kvp in factorVariables) {
     104        var varName = kvp.Key;
     105        foreach (var cat in kvp.Value) {
     106          BinaryFactorVariableTreeNode vNode =
     107            (BinaryFactorVariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.BinaryFactorVariable().CreateTreeNode();
     108          vNode.VariableName = varName;
     109          vNode.VariableValue = cat;
     110          vNode.Weight = w[col];
     111          addition.AddSubtree(vNode);
     112          col++;
     113        }
     114      }
     115      foreach (string column in doubleVariableNames) {
    95116        VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();
    96117        vNode.VariableName = column;
     
    100121      }
    101122
    102       var model = LinearDiscriminantAnalysis.CreateDiscriminantFunctionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter(), problemData, rows);
     123      var model = CreateDiscriminantFunctionModel(tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter(), problemData, rows);
    103124      SymbolicDiscriminantFunctionClassificationSolution solution = new SymbolicDiscriminantFunctionClassificationSolution(model, (IClassificationProblemData)problemData.Clone());
    104125
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

    r14185 r14836  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    6061
    6162    #region linear regression
    62     protected override void Run() {
     63    protected override void Run(CancellationToken cancellationToken) {
    6364      double rmsError, cvRmsError;
    6465      var solution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError);
     
    7374      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    7475      IEnumerable<int> rows = problemData.TrainingIndices;
    75       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     76      var doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>);
     77      var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>);
     78      var factorVariables = AlglibUtil.GetFactorVariableValues(dataset, factorVariableNames, rows);
     79      double[,] binaryMatrix = AlglibUtil.PrepareInputMatrix(dataset, factorVariables, rows);
     80      double[,] doubleVarMatrix = AlglibUtil.PrepareInputMatrix(dataset, doubleVariables.Concat(new string[] { targetVariable }), rows);
     81      var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix);
     82
    7683      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    7784        throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
     
    98105
    99106      int col = 0;
    100       foreach (string column in allowedInputVariables) {
     107      foreach (var kvp in factorVariables) {
     108        var varName = kvp.Key;
     109        foreach (var cat in kvp.Value) {
     110          BinaryFactorVariableTreeNode vNode =
     111            (BinaryFactorVariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.BinaryFactorVariable().CreateTreeNode();
     112          vNode.VariableName = varName;
     113          vNode.VariableValue = cat;
     114          vNode.Weight = coefficients[col];
     115          addition.AddSubtree(vNode);
     116          col++;
     117        }
     118      }
     119      foreach (string column in doubleVariables) {
    101120        VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();
    102121        vNode.VariableName = column;
     
    110129      addition.AddSubtree(cNode);
    111130
    112       SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), (IRegressionProblemData)problemData.Clone());
     131      SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone());
    113132      solution.Model.Name = "Linear Regression Model";
    114133      solution.Name = "Linear Regression Solution";
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassification.cs

    r14185 r14836  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    5758
    5859    #region logit classification
    59     protected override void Run() {
     60    protected override void Run(CancellationToken cancellationToken) {
    6061      double rmsError, relClassError;
    6162      var solution = CreateLogitClassificationSolution(Problem.ProblemData, out rmsError, out relClassError);
     
    6869      var dataset = problemData.Dataset;
    6970      string targetVariable = problemData.TargetVariable;
    70       IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
     71      var doubleVariableNames = problemData.AllowedInputVariables.Where(dataset.VariableHasType<double>);
     72      var factorVariableNames = problemData.AllowedInputVariables.Where(dataset.VariableHasType<string>);
    7173      IEnumerable<int> rows = problemData.TrainingIndices;
    72       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     74      double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, doubleVariableNames.Concat(new string[] { targetVariable }), rows);
     75
     76      var factorVariableValues = AlglibUtil.GetFactorVariableValues(dataset, factorVariableNames, rows);
     77      var factorMatrix = AlglibUtil.PrepareInputMatrix(dataset, factorVariableValues, rows);
     78      inputMatrix = factorMatrix.HorzCat(inputMatrix);
     79
    7380      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    7481        throw new NotSupportedException("Multinomial logit classification does not support NaN or infinity values in the input dataset.");
     
    95102      relClassError = alglib.mnlrelclserror(lm, inputMatrix, nRows);
    96103
    97       MultinomialLogitClassificationSolution solution = new MultinomialLogitClassificationSolution(new MultinomialLogitModel(lm, targetVariable, allowedInputVariables, classValues), (IClassificationProblemData)problemData.Clone());
     104      MultinomialLogitClassificationSolution solution = new MultinomialLogitClassificationSolution(new MultinomialLogitModel(lm, targetVariable, doubleVariableNames, factorVariableValues, classValues), (IClassificationProblemData)problemData.Clone());
    98105      return solution;
    99106    }
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassificationSolution.cs

    r14185 r14836  
    4343      : base(original, cloner) {
    4444    }
    45     public MultinomialLogitClassificationSolution( MultinomialLogitModel logitModel,IClassificationProblemData problemData)
     45    public MultinomialLogitClassificationSolution(MultinomialLogitModel logitModel, IClassificationProblemData problemData)
    4646      : base(logitModel, problemData) {
    4747    }
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitModel.cs

    r14185 r14836  
    5656    [Storable]
    5757    private double[] classValues;
     58    [Storable]
     59    private List<KeyValuePair<string, IEnumerable<string>>> factorVariables;
     60
    5861    [StorableConstructor]
    5962    private MultinomialLogitModel(bool deserializing)
     
    6871      allowedInputVariables = (string[])original.allowedInputVariables.Clone();
    6972      classValues = (double[])original.classValues.Clone();
     73      this.factorVariables = original.factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList();
    7074    }
    71     public MultinomialLogitModel(alglib.logitmodel logitModel, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues)
     75    public MultinomialLogitModel(alglib.logitmodel logitModel, string targetVariable, IEnumerable<string> doubleInputVariables, IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables, double[] classValues)
    7276      : base(targetVariable) {
    7377      this.name = ItemName;
    7478      this.description = ItemDescription;
    7579      this.logitModel = logitModel;
    76       this.allowedInputVariables = allowedInputVariables.ToArray();
     80      this.allowedInputVariables = doubleInputVariables.ToArray();
     81      this.factorVariables = factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList();
    7782      this.classValues = (double[])classValues.Clone();
     83    }
     84
     85    [StorableHook(HookType.AfterDeserialization)]
     86    private void AfterDeserialization() {
     87      // BackwardsCompatibility3.3
     88      #region Backwards compatible code, remove with 3.4
     89      factorVariables = new List<KeyValuePair<string, IEnumerable<string>>>();
     90      #endregion
    7891    }
    7992
     
    8396
    8497    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
     98
    8599      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     100      double[,] factorData = AlglibUtil.PrepareInputMatrix(dataset, factorVariables, rows);
     101
     102      inputData = factorData.HorzCat(inputData);
    86103
    87104      int n = inputData.GetLength(0);
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/MctsSymbolicRegressionAlgorithm.cs

    r14558 r14836  
    3737  [StorableClass]
    3838  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 250)]
    39   public class MctsSymbolicRegressionAlgorithm : BasicAlgorithm {
    40     public override Type ProblemType
    41     {
    42       get { return typeof(IRegressionProblem); }
    43     }
    44     public new IRegressionProblem Problem
    45     {
    46       get { return (IRegressionProblem)base.Problem; }
    47       set { base.Problem = value; }
    48     }
     39  public class MctsSymbolicRegressionAlgorithm : FixedDataAnalysisAlgorithm<IRegressionProblem> {
    4940    public override bool SupportsPause
    5041    {
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs

    r14235 r14836  
    2222using System;
    2323using System.Linq;
     24using System.Threading;
    2425using HeuristicLab.Common;
    2526using HeuristicLab.Core;
     
    9192
    9293    #region nearest neighbour
    93     protected override void Run() {
     94    protected override void Run(CancellationToken cancellationToken) {
    9495      double[] weights = null;
    9596      if (Weights != null) weights = Weights.CloneAsArray();
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs

    r14322 r14836  
    144144      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    145145        throw new NotSupportedException(
    146           "Nearest neighbour classification does not support NaN or infinity values in the input dataset.");
     146          "Nearest neighbour model does not support NaN or infinity values in the input dataset.");
    147147
    148148      this.kdTree = new alglib.nearestneighbor.kdtree();
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs

    r14235 r14836  
    2121
    2222using System;
     23using System.Threading;
    2324using HeuristicLab.Common;
    2425using HeuristicLab.Core;
     
    9293
    9394    #region nearest neighbour
    94     protected override void Run() {
     95    protected override void Run(CancellationToken cancellationToken) {
    9596      double[] weights = null;
    9697      if (Weights != null) weights = Weights.CloneAsArray();
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkClassification.cs

    r14185 r14836  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    168169
    169170    #region neural network
    170     protected override void Run() {
     171    protected override void Run(CancellationToken cancellationToken) {
    171172      double rmsError, avgRelError, relClassError;
    172173      var solution = CreateNeuralNetworkClassificationSolution(Problem.ProblemData, HiddenLayers, NodesInFirstHiddenLayer, NodesInSecondHiddenLayer, Decay, Restarts, out rmsError, out avgRelError, out relClassError);
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleClassification.cs

    r14185 r14836  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    154155
    155156    #region neural network ensemble
    156     protected override void Run() {
     157    protected override void Run(CancellationToken cancellationToken) {
    157158      double rmsError, avgRelError, relClassError;
    158159      var solution = CreateNeuralNetworkEnsembleClassificationSolution(Problem.ProblemData, EnsembleSize, HiddenLayers, NodesInFirstHiddenLayer, NodesInSecondHiddenLayer, Decay, Restarts, out rmsError, out avgRelError, out relClassError);
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleRegression.cs

    r14185 r14836  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    154155
    155156    #region neural network ensemble
    156     protected override void Run() {
     157    protected override void Run(CancellationToken cancellationToken) {
    157158      double rmsError, avgRelError;
    158159      var solution = CreateNeuralNetworkEnsembleRegressionSolution(Problem.ProblemData, EnsembleSize, HiddenLayers, NodesInFirstHiddenLayer, NodesInSecondHiddenLayer, Decay, Restarts, out rmsError, out avgRelError);
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkRegression.cs

    r14185 r14836  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    170171
    171172    #region neural network
    172     protected override void Run() {
     173    protected override void Run(CancellationToken cancellationToken) {
    173174      double rmsError, avgRelError;
    174175      var solution = CreateNeuralNetworkRegressionSolution(Problem.ProblemData, HiddenLayers, NodesInFirstHiddenLayer, NodesInSecondHiddenLayer, Decay, Restarts, out rmsError, out avgRelError);
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/NonlinearRegression/NonlinearRegression.cs

    r14319 r14836  
    2121
    2222using System;
     23using System.Collections.Generic;
    2324using System.Linq;
     25using System.Threading;
    2426using HeuristicLab.Analysis;
    2527using HeuristicLab.Common;
     
    157159
    158160    #region nonlinear regression
    159     protected override void Run() {
     161    protected override void Run(CancellationToken cancellationToken) {
    160162      IRegressionSolution bestSolution = null;
    161163      if (InitializeParametersRandomly) {
     
    207209      var parser = new InfixExpressionParser();
    208210      var tree = parser.Parse(modelStructure);
     211      // parser handles double and string variables equally by creating a VariableTreeNode
     212      // post-process to replace VariableTreeNodes by FactorVariableTreeNodes for all string variables
     213      var factorSymbol = new FactorVariable();
     214      factorSymbol.VariableNames =
     215        problemData.AllowedInputVariables.Where(name => problemData.Dataset.VariableHasType<string>(name));
     216      factorSymbol.AllVariableNames = factorSymbol.VariableNames;
     217      factorSymbol.VariableValues =
     218        factorSymbol.VariableNames.Select(name =>
     219        new KeyValuePair<string, Dictionary<string, int>>(name,
     220        problemData.Dataset.GetReadOnlyStringValues(name).Distinct()
     221        .Select((n, i) => Tuple.Create(n, i))
     222        .ToDictionary(tup => tup.Item1, tup => tup.Item2)));
     223
     224      foreach (var parent in tree.IterateNodesPrefix().ToArray()) {
     225        for (int i = 0; i < parent.SubtreeCount; i++) {
     226          var varChild = parent.GetSubtree(i) as VariableTreeNode;
     227          var factorVarChild = parent.GetSubtree(i) as FactorVariableTreeNode;
     228          if (varChild != null && factorSymbol.VariableNames.Contains(varChild.VariableName)) {
     229            parent.RemoveSubtree(i);
     230            var factorTreeNode = (FactorVariableTreeNode)factorSymbol.CreateTreeNode();
     231            factorTreeNode.VariableName = varChild.VariableName;
     232            factorTreeNode.Weights =
     233              factorTreeNode.Symbol.GetVariableValues(factorTreeNode.VariableName).Select(_ => 1.0).ToArray();
     234            // weight = 1.0 for each value
     235            parent.InsertSubtree(i, factorTreeNode);
     236          } else if (factorVarChild != null && factorSymbol.VariableNames.Contains(factorVarChild.VariableName)) {
     237            if (factorSymbol.GetVariableValues(factorVarChild.VariableName).Count() != factorVarChild.Weights.Length)
     238              throw new ArgumentException(
     239                string.Format("Factor variable {0} needs exactly {1} weights",
     240                factorVarChild.VariableName,
     241                factorSymbol.GetVariableValues(factorVarChild.VariableName).Count()));
     242            parent.RemoveSubtree(i);
     243            var factorTreeNode = (FactorVariableTreeNode)factorSymbol.CreateTreeNode();
     244            factorTreeNode.VariableName = factorVarChild.VariableName;
     245            factorTreeNode.Weights = factorVarChild.Weights;
     246            parent.InsertSubtree(i, factorTreeNode);
     247          }
     248        }
     249      }
    209250
    210251      if (!SymbolicRegressionConstantOptimizationEvaluator.CanOptimizeConstants(tree)) throw new ArgumentException("The optimizer does not support the specified model structure.");
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassification.cs

    r14185 r14836  
    2020#endregion
    2121
     22using System.Threading;
    2223using HeuristicLab.Common;
    2324using HeuristicLab.Core;
     
    132133
    133134    #region random forest
    134     protected override void Run() {
     135    protected override void Run(CancellationToken cancellationToken) {
    135136      double rmsError, relClassificationError, outOfBagRmsError, outOfBagRelClassificationError;
    136137      if (SetSeedRandomly) Seed = new System.Random().Next();
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegression.cs

    r14185 r14836  
    2020#endregion
    2121
     22using System.Threading;
    2223using HeuristicLab.Common;
    2324using HeuristicLab.Core;
     
    131132
    132133    #region random forest
    133     protected override void Run() {
     134    protected override void Run(CancellationToken cancellationToken) {
    134135      double rmsError, avgRelError, outOfBagRmsError, outOfBagAvgRelError;
    135136      if (SetSeedRandomly) Seed = new System.Random().Next();
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorClassification.cs

    r14185 r14836  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    143144
    144145    #region support vector classification
    145     protected override void Run() {
     146    protected override void Run(CancellationToken cancellationToken) {
    146147      IClassificationProblemData problemData = Problem.ProblemData;
    147148      IEnumerable<string> selectedInputVariables = problemData.AllowedInputVariables;
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorRegression.cs

    r14185 r14836  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    151152
    152153    #region support vector regression
    153     protected override void Run() {
     154    protected override void Run(CancellationToken cancellationToken) {
    154155      IRegressionProblemData problemData = Problem.ProblemData;
    155156      IEnumerable<string> selectedInputVariables = problemData.AllowedInputVariables;
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TimeSeries/AutoregressiveModeling.cs

    r14185 r14836  
    2222using System;
    2323using System.Linq;
     24using System.Threading;
    2425using HeuristicLab.Common;
    2526using HeuristicLab.Core;
     
    6364    }
    6465
    65     protected override void Run() {
     66    protected override void Run(CancellationToken cancellationToken) {
    6667      double rmsError, cvRmsError;
    6768      var solution = CreateAutoRegressiveSolution(Problem.ProblemData, TimeOffset, out rmsError, out cvRmsError);
  • branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClustering.cs

    r14185 r14836  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    7778
    7879    #region k-Means clustering
    79     protected override void Run() {
     80    protected override void Run(CancellationToken cancellationToken) {
    8081      var solution = CreateKMeansSolution(Problem.ProblemData, K.Value, Restarts.Value);
    8182      Results.Add(new Result(KMeansSolutionResultName, "The k-Means clustering solution.", solution));
Note: See TracChangeset for help on using the changeset viewer.