Changeset 14330


Ignore:
Timestamp:
10/13/16 19:47:41 (3 years ago)
Author:
gkronber
Message:

#2650 Merged r14282:14322 from trunk to branch (fixing conflicts)

Location:
branches/symbreg-factors-2650
Files:
53 edited

Legend:

Unmodified
Added
Removed
  • branches/symbreg-factors-2650

  • branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis

  • branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesModelSurrogate.cs

    r14239 r14330  
    2121#endregion
    2222
     23using System;
    2324using System.Collections.Generic;
     25using System.Diagnostics.Eventing.Reader;
    2426using System.Linq;
    2527using HeuristicLab.Common;
     
    3638  public sealed class GradientBoostedTreesModelSurrogate : RegressionModel, IGradientBoostedTreesModel {
    3739    // don't store the actual model!
    38     private IGradientBoostedTreesModel actualModel; // the actual model is only recalculated when necessary
     40    // the actual model is only recalculated when necessary
     41    private readonly Lazy<IGradientBoostedTreesModel> actualModel;
     42    private IGradientBoostedTreesModel ActualModel {
     43      get { return actualModel.Value; }
     44    }
    3945
    4046    [Storable]
     
    5763
    5864    public override IEnumerable<string> VariablesUsedForPrediction {
    59       get
    60       {
    61         lock (actualModel) { if (actualModel == null) actualModel = RecalculateModel(); }
    62         return actualModel.Models.SelectMany(x => x.VariablesUsedForPrediction).Distinct().OrderBy(x => x);
     65      get {
     66        return ActualModel.Models.SelectMany(x => x.VariablesUsedForPrediction).Distinct().OrderBy(x => x);
    6367      }
    6468    }
    6569
    6670    [StorableConstructor]
    67     private GradientBoostedTreesModelSurrogate(bool deserializing) : base(deserializing) { }
     71    private GradientBoostedTreesModelSurrogate(bool deserializing)
     72      : base(deserializing) {
     73      actualModel = new Lazy<IGradientBoostedTreesModel>(() => RecalculateModel());
     74    }
    6875
    6976    private GradientBoostedTreesModelSurrogate(GradientBoostedTreesModelSurrogate original, Cloner cloner)
    7077      : base(original, cloner) {
    71       if (original.actualModel != null) this.actualModel = cloner.Clone(original.actualModel);
     78      IGradientBoostedTreesModel clonedModel = null;
     79      if (original.ActualModel != null) clonedModel = cloner.Clone(original.ActualModel);
     80      actualModel = new Lazy<IGradientBoostedTreesModel>(CreateLazyInitFunc(clonedModel)); // only capture clonedModel in the closure
    7281
    7382      this.trainingProblemData = cloner.Clone(original.trainingProblemData);
     
    7988      this.m = original.m;
    8089      this.nu = original.nu;
     90    }
     91
     92    private Func<IGradientBoostedTreesModel> CreateLazyInitFunc(IGradientBoostedTreesModel clonedModel) {
     93      return () => {
     94        return clonedModel == null ? RecalculateModel() : clonedModel;
     95      };
    8196    }
    8297
     
    100115      IGradientBoostedTreesModel model)
    101116      : this(trainingProblemData, seed, lossFunction, iterations, maxSize, r, m, nu) {
    102       this.actualModel = model;
     117      actualModel = new Lazy<IGradientBoostedTreesModel>(() => model);
    103118    }
    104119
     
    109124    // forward message to actual model (recalculate model first if necessary)
    110125    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    111       lock (actualModel) { if (actualModel == null) actualModel = RecalculateModel(); }
    112       return actualModel.GetEstimatedValues(dataset, rows);
     126      return ActualModel.GetEstimatedValues(dataset, rows);
    113127    }
    114128
     
    123137    public IEnumerable<IRegressionModel> Models {
    124138      get {
    125         lock(actualModel) { if (actualModel == null) actualModel = RecalculateModel();}
    126         return actualModel.Models;
     139        return ActualModel.Models;
    127140      }
    128141    }
     
    130143    public IEnumerable<double> Weights {
    131144      get {
    132         lock(actualModel) { if (actualModel == null) actualModel = RecalculateModel();}
    133         return actualModel.Weights;
     145        return ActualModel.Weights;
    134146      }
    135147    }
  • branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs

    r14239 r14330  
    3636  public sealed class NearestNeighbourModel : ClassificationModel, INearestNeighbourModel {
    3737
     38    private readonly object kdTreeLockObject = new object();
    3839    private alglib.nearestneighbor.kdtree kdTree;
    3940    public alglib.nearestneighbor.kdtree KDTree {
     
    4748      }
    4849    }
     50
    4951
    5052    public override IEnumerable<string> VariablesUsedForPrediction {
     
    201203        }
    202204        int numNeighbours;
    203         lock (kdTree) { // gkronber: the following calls change the kdTree data structure
     205        lock (kdTreeLockObject) { // gkronber: the following calls change the kdTree data structure
    204206          numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);
    205207          alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists);
     
    237239        }
    238240        int numNeighbours;
    239         lock (kdTree) {
     241        lock (kdTreeLockObject) {
    240242          // gkronber: the following calls change the kdTree data structure
    241243          numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);
  • branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/NonlinearRegression/NonlinearRegression.cs

    r14277 r14330  
    2121
    2222using System;
    23 using System.Collections.Generic;
    2423using System.Linq;
     24using HeuristicLab.Analysis;
    2525using HeuristicLab.Common;
    2626using HeuristicLab.Core;
    2727using HeuristicLab.Data;
     28using HeuristicLab.Optimization;
    2829using HeuristicLab.Parameters;
    2930using HeuristicLab.Optimization;
     
    4849    private const string SetSeedRandomlyParameterName = "SetSeedRandomly";
    4950    private const string SeedParameterName = "Seed";
     51    private const string InitParamsRandomlyParameterName = "InitializeParametersRandomly";
    5052
    5153    public IFixedValueParameter<StringValue> ModelStructureParameter {
     
    6668    public IFixedValueParameter<IntValue> RestartsParameter {
    6769      get { return (IFixedValueParameter<IntValue>)Parameters[RestartsParameterName]; }
     70    }
     71
     72    public IFixedValueParameter<BoolValue> InitParametersRandomlyParameter {
     73      get { return (IFixedValueParameter<BoolValue>)Parameters[InitParamsRandomlyParameterName]; }
    6874    }
    6975
     
    9197      get { return SetSeedRandomlyParameter.Value.Value; }
    9298      set { SetSeedRandomlyParameter.Value.Value = value; }
     99    }
     100
     101    public bool InitializeParametersRandomly {
     102      get { return InitParametersRandomlyParameter.Value.Value; }
     103      set { InitParametersRandomlyParameter.Value.Value = value; }
    93104    }
    94105
     
    103114      Parameters.Add(new FixedValueParameter<StringValue>(ModelStructureParameterName, "The function for which the parameters must be fit (only numeric constants are tuned).", new StringValue("1.0 * x*x + 0.0")));
    104115      Parameters.Add(new FixedValueParameter<IntValue>(IterationsParameterName, "The maximum number of iterations for constants optimization.", new IntValue(200)));
    105       Parameters.Add(new FixedValueParameter<IntValue>(RestartsParameterName, "The number of independent random restarts", new IntValue(10)));
     116      Parameters.Add(new FixedValueParameter<IntValue>(RestartsParameterName, "The number of independent random restarts (>0)", new IntValue(10)));
    106117      Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The PRNG seed value.", new IntValue()));
    107118      Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "Switch to determine if the random number seed should be initialized randomly.", new BoolValue(true)));
     119      Parameters.Add(new FixedValueParameter<BoolValue>(InitParamsRandomlyParameterName, "Switch to determine if the real-valued model parameters should be initialized randomly in each restart.", new BoolValue(false)));
     120
     121      SetParameterHiddenState();
     122
     123      InitParametersRandomlyParameter.Value.ValueChanged += (sender, args) => {
     124        SetParameterHiddenState();
     125      };
     126    }
     127
     128    private void SetParameterHiddenState() {
     129      var hide = !InitializeParametersRandomly;
     130      RestartsParameter.Hidden = hide;
     131      SeedParameter.Hidden = hide;
     132      SetSeedRandomlyParameter.Hidden = hide;
    108133    }
    109134
     
    118143      if (!Parameters.ContainsKey(SetSeedRandomlyParameterName))
    119144        Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "Switch to determine if the random number seed should be initialized randomly.", new BoolValue(true)));
     145      if (!Parameters.ContainsKey(InitParamsRandomlyParameterName))
     146        Parameters.Add(new FixedValueParameter<BoolValue>(InitParamsRandomlyParameterName, "Switch to determine if the numeric parameters of the model should be initialized randomly.", new BoolValue(false)));
     147
     148      SetParameterHiddenState();
     149      InitParametersRandomlyParameter.Value.ValueChanged += (sender, args) => {
     150        SetParameterHiddenState();
     151      };
    120152      #endregion
    121153    }
     
    127159    #region nonlinear regression
    128160    protected override void Run() {
    129       if (SetSeedRandomly) Seed = (new System.Random()).Next();
    130       var rand = new MersenneTwister((uint)Seed);
    131161      IRegressionSolution bestSolution = null;
    132       for (int r = 0; r < Restarts; r++) {
    133         var solution = CreateRegressionSolution(Problem.ProblemData, ModelStructure, Iterations, rand);
    134         if (bestSolution == null || solution.TrainingRootMeanSquaredError < bestSolution.TrainingRootMeanSquaredError) {
    135           bestSolution = solution;
     162      if (InitializeParametersRandomly) {
     163        var qualityTable = new DataTable("RMSE table");
     164        qualityTable.VisualProperties.YAxisLogScale = true;
     165        var trainRMSERow = new DataRow("RMSE (train)");
     166        trainRMSERow.VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points;
     167        var testRMSERow = new DataRow("RMSE test");
     168        testRMSERow.VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points;
     169
     170        qualityTable.Rows.Add(trainRMSERow);
     171        qualityTable.Rows.Add(testRMSERow);
     172        Results.Add(new Result(qualityTable.Name, qualityTable.Name + " for all restarts", qualityTable));
     173        if (SetSeedRandomly) Seed = (new System.Random()).Next();
     174        var rand = new MersenneTwister((uint)Seed);
     175        bestSolution = CreateRegressionSolution(Problem.ProblemData, ModelStructure, Iterations, rand);
     176        trainRMSERow.Values.Add(bestSolution.TrainingRootMeanSquaredError);
     177        testRMSERow.Values.Add(bestSolution.TestRootMeanSquaredError);
     178        for (int r = 0; r < Restarts; r++) {
     179          var solution = CreateRegressionSolution(Problem.ProblemData, ModelStructure, Iterations, rand);
     180          trainRMSERow.Values.Add(solution.TrainingRootMeanSquaredError);
     181          testRMSERow.Values.Add(solution.TestRootMeanSquaredError);
     182          if (solution.TrainingRootMeanSquaredError < bestSolution.TrainingRootMeanSquaredError) {
     183            bestSolution = solution;
     184          }
    136185        }
     186      } else {
     187        bestSolution = CreateRegressionSolution(Problem.ProblemData, ModelStructure, Iterations);
    137188      }
    138189
     
    148199    /// The starting point for the numeric constants is initialized randomly if a random number generator is specified (~N(0,1)). Otherwise the user specified constants are
    149200    /// used as a starting point.
    150     /// </summary>
     201    /// </summary>-
    151202    /// <param name="problemData">Training and test data</param>
    152203    /// <param name="modelStructure">The function as infix expression</param>
     
    154205    /// <param name="random">Optional random number generator for random initialization of numeric constants.</param>
    155206    /// <returns></returns>
    156     public static ISymbolicRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData, string modelStructure, int maxIterations, IRandom random = null) {
     207    public static ISymbolicRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData, string modelStructure, int maxIterations, IRandom rand = null) {
    157208      var parser = new InfixExpressionParser();
    158209      var tree = parser.Parse(modelStructure);
     
    183234
    184235      // initialize constants randomly
    185       if (random != null) {
     236      if (rand != null) {
    186237        foreach (var node in tree.IterateNodesPrefix().OfType<ConstantTreeNode>()) {
    187           node.Value = NormalDistributedRandom.NextDouble(random, 0, 1);
     238          double f = Math.Exp(NormalDistributedRandom.NextDouble(rand, 0, 1));
     239          double s = rand.NextDouble() < 0.5 ? -1 : 1;
     240          node.Value = s * node.Value * f;
    188241        }
    189242      }
  • branches/symbreg-factors-2650/HeuristicLab.Data.Views

  • branches/symbreg-factors-2650/HeuristicLab.Data.Views/3.3

  • branches/symbreg-factors-2650/HeuristicLab.Data.Views/3.3/StringConvertibleArrayView.Designer.cs

    r14185 r14330  
    6565      // lengthTextBox
    6666      //
    67       this.lengthTextBox.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
    68                   | System.Windows.Forms.AnchorStyles.Right)));
     67      this.lengthTextBox.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) 
     68            | System.Windows.Forms.AnchorStyles.Right)));
    6969      this.lengthTextBox.Location = new System.Drawing.Point(52, 0);
    7070      this.lengthTextBox.Name = "lengthTextBox";
     
    7979      this.dataGridView.AllowUserToAddRows = false;
    8080      this.dataGridView.AllowUserToDeleteRows = false;
    81       this.dataGridView.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
    82                   | System.Windows.Forms.AnchorStyles.Left)
    83                   | System.Windows.Forms.AnchorStyles.Right)));
    84       this.dataGridView.ClipboardCopyMode = System.Windows.Forms.DataGridViewClipboardCopyMode.EnableWithoutHeaderText;
     81      this.dataGridView.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom) 
     82            | System.Windows.Forms.AnchorStyles.Left)
     83            | System.Windows.Forms.AnchorStyles.Right)));
     84      this.dataGridView.ClipboardCopyMode = System.Windows.Forms.DataGridViewClipboardCopyMode.Disable;
    8585      this.dataGridView.ColumnHeadersHeightSizeMode = System.Windows.Forms.DataGridViewColumnHeadersHeightSizeMode.AutoSize;
    8686      this.dataGridView.ColumnHeadersVisible = false;
     
    101101      // StringConvertibleArrayView
    102102      //
    103       this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
    104103      this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Inherit;
    105104      this.Controls.Add(this.dataGridView);
  • branches/symbreg-factors-2650/HeuristicLab.Data.Views/3.3/StringConvertibleArrayView.cs

    r14185 r14330  
    2323using System.ComponentModel;
    2424using System.Drawing;
     25using System.Linq;
    2526using System.Text;
    2627using System.Windows.Forms;
     
    203204        DataGridViewCell cell = dataGridView[column.Index, i];
    204205        if (cell.Selected) {
     206          s.Append(Content.ElementNames.ElementAt(i));
     207          s.Append("\t");
    205208          s.Append(Content.GetValue(i));
    206209          s.Append(Environment.NewLine);
  • branches/symbreg-factors-2650/HeuristicLab.MainForm.WindowsForms/3.3/Controls/ProgressView.cs

    r14185 r14330  
    141141
    142142    private void UpdateProgressValue() {
    143       if (InvokeRequired) Invoke((Action)UpdateProgressValue);
    144       else {
     143      // prevent problems with object disposal and invoke as suggested by http://stackoverflow.com/a/18647091
     144      if (!IsHandleCreated) return;
     145      if (InvokeRequired) {
     146        try {
     147          Invoke((Action)UpdateProgressValue);
     148        }
     149        catch (InvalidOperationException) {
     150          // swallow ObjectDisposedException
     151          // which might occur if the invoke call is executed after or while the control is disposing
     152        }
     153      } else {
    145154        if (content != null) {
    146155          double progressValue = content.ProgressValue;
     
    149158          } else {
    150159            progressBar.Style = ProgressBarStyle.Blocks;
    151             progressBar.Value = (int)Math.Round(progressBar.Minimum + progressValue * (progressBar.Maximum - progressBar.Minimum));
     160            progressBar.Value =
     161              (int)Math.Round(progressBar.Minimum + progressValue * (progressBar.Maximum - progressBar.Minimum));
    152162          }
    153163        }
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis

  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic

  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification

  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SymbolicClassificationModel.cs

    r14185 r14330  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324using HeuristicLab.Common;
     
    3435  public abstract class SymbolicClassificationModel : SymbolicDataAnalysisModel, ISymbolicClassificationModel {
    3536    [Storable]
    36     private readonly string targetVariable;
     37    private string targetVariable;
    3738    public string TargetVariable {
    3839      get { return targetVariable; }
     40      set {
     41        if (string.IsNullOrEmpty(value) || targetVariable == value) return;
     42        targetVariable = value;
     43        OnTargetVariableChanged(this, EventArgs.Empty);
     44      }
    3945    }
    4046
    4147    [StorableConstructor]
    42     protected SymbolicClassificationModel(bool deserializing) : base(deserializing) { }
     48    protected SymbolicClassificationModel(bool deserializing)
     49      : base(deserializing) {
     50      targetVariable = string.Empty;
     51    }
    4352
    4453    protected SymbolicClassificationModel(SymbolicClassificationModel original, Cloner cloner)
     
    6473      Scale(problemData, problemData.TargetVariable);
    6574    }
     75
     76    #region events
     77    public event EventHandler TargetVariableChanged;
     78    private void OnTargetVariableChanged(object sender, EventArgs args) {
     79      var changed = TargetVariableChanged;
     80      if (changed != null)
     81        changed(sender, args);
     82    }
     83    #endregion
    6684  }
    6785}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression

  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4

  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SymbolicRegressionModel.cs

    r14185 r14330  
    3535  public class SymbolicRegressionModel : SymbolicDataAnalysisModel, ISymbolicRegressionModel {
    3636    [Storable]
    37     private readonly string targetVariable;
     37    private string targetVariable;
    3838    public string TargetVariable {
    3939      get { return targetVariable; }
     40      set {
     41        if (string.IsNullOrEmpty(value) || targetVariable == value) return;
     42        targetVariable = value;
     43        OnTargetVariableChanged(this, EventArgs.Empty);
     44      }
    4045    }
    4146
    4247    [StorableConstructor]
    43     protected SymbolicRegressionModel(bool deserializing) : base(deserializing) { }
     48    protected SymbolicRegressionModel(bool deserializing)
     49      : base(deserializing) {
     50      targetVariable = string.Empty;
     51    }
    4452
    4553    protected SymbolicRegressionModel(SymbolicRegressionModel original, Cloner cloner)
     
    7482      Scale(problemData, problemData.TargetVariable);
    7583    }
     84
     85    #region events
     86    public event EventHandler TargetVariableChanged;
     87    private void OnTargetVariableChanged(object sender, EventArgs args) {
     88      var changed = TargetVariableChanged;
     89      if (changed != null)
     90        changed(sender, args);
     91    }
     92    #endregion
    7693  }
    7794}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Importer/InfixExpressionParser.cs

    r14251 r14330  
    226226          pos++;
    227227          yield return new Token { TokenType = TokenType.Eq, strVal = "=" };
     228        } else {
     229          throw new ArgumentException("Invalid character: " + str[pos]);
    228230        }
    229231      }
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interpreter/SymbolicDataAnalysisExpressionTreeLinearInterpreter.cs

    r14249 r14330  
    3838    private const string EvaluatedSolutionsParameterName = "EvaluatedSolutions";
    3939
    40     private SymbolicDataAnalysisExpressionTreeInterpreter interpreter;
     40    private readonly SymbolicDataAnalysisExpressionTreeInterpreter interpreter;
    4141
    4242    public override bool CanChangeName {
     
    7272    private SymbolicDataAnalysisExpressionTreeLinearInterpreter(bool deserializing)
    7373      : base(deserializing) {
     74      interpreter = new SymbolicDataAnalysisExpressionTreeInterpreter();
    7475    }
    7576
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Views

  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Views/3.4/FeatureCorrelation/TimeframeFeatureCorrelationCalculator.cs

    r13938 r14330  
    7272      }
    7373
     74      var var1 = dataset.GetDoubleValues(variable, indices).ToArray();
     75
    7476      for (int i = 0; i < length; i++) {
    7577        for (int j = start; j <= frames; j++) {
     
    8082          }
    8183
    82           IEnumerable<double> var1 = dataset.GetDoubleValues(variable, indices);
    8384          IEnumerable<double> var2 = dataset.GetDoubleValues(doubleVariableNames[i], indices);
    8485
    85           var valuesInFrame = var1.Take(j);
    86           var help = var1.Skip(j).ToList();
    87           help.AddRange(valuesInFrame);
    88           var1 = help;
    89 
    9086          var error = OnlineCalculatorError.None;
    91           elements[i, j] = calc.Calculate(var1, var2, out error);
     87          elements[i, j] = calc.Calculate(var1.Skip(j), var2.Take(var1.Length-j), out error);
    9288
    9389          if (!error.Equals(OnlineCalculatorError.None)) {
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionTargetResponseGradientView.cs

    r14277 r14330  
    125125
    126126      if (sharedFixedVariables != null)
    127         sharedFixedVariables.ItemChanged += SharedFixedVariables_ItemChanged;
     127        sharedFixedVariables.ItemChanged -= SharedFixedVariables_ItemChanged;
    128128
    129129      sharedFixedVariables = new ModifiableDataset(doubleVariables.Concat(factorVariables), doubleVariableValues.Concat(factorVariableValues));
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationModel.cs

    r13992 r14330  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324using HeuristicLab.Common;
     
    3334    public string TargetVariable {
    3435      get { return targetVariable; }
    35       protected set { targetVariable = value; }
     36      set {
     37        if (string.IsNullOrEmpty(value) || targetVariable == value) return;
     38        targetVariable = value;
     39        OnTargetVariableChanged(this, EventArgs.Empty);
     40      }
    3641    }
    3742
    38     protected ClassificationModel(bool deserializing) : base(deserializing) { }
     43    protected ClassificationModel(bool deserializing)
     44      : base(deserializing) {
     45      targetVariable = string.Empty;
     46    }
    3947    protected ClassificationModel(ClassificationModel original, Cloner cloner)
    4048      : base(original, cloner) {
     
    5563    }
    5664
    57     [StorableHook(HookType.AfterDeserialization)]
    58     private void AfterDeserialization() {
    59       // BackwardsCompatibility3.3
    60       #region Backwards compatible code, remove with 3.4
    61       targetVariable = string.Empty;
    62       #endregion
    63     }
    64 
    6565    public abstract IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows);
    6666    public abstract IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData);
     67
     68    #region events
     69    public event EventHandler TargetVariableChanged;
     70    private void OnTargetVariableChanged(object sender, EventArgs args) {
     71      var changed = TargetVariableChanged;
     72      if (changed != null)
     73        changed(sender, args);
     74    }
     75    #endregion
    6776  }
    6877}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolutionBase.cs

    r14185 r14330  
    8888    [StorableHook(HookType.AfterDeserialization)]
    8989    private void AfterDeserialization() {
     90      if (string.IsNullOrEmpty(Model.TargetVariable))
     91        Model.TargetVariable = this.ProblemData.TargetVariable;
     92
    9093      if (!this.ContainsKey(TrainingNormalizedGiniCoefficientResultName))
    9194        Add(new Result(TrainingNormalizedGiniCoefficientResultName, "Normalized Gini coefficient of the model on the training partition.", new DoubleValue()));
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionModel.cs

    r13992 r14330  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324using HeuristicLab.Common;
     
    3334    public string TargetVariable {
    3435      get { return targetVariable; }
    35       protected set { targetVariable = value; }
     36      set {
     37        if (string.IsNullOrEmpty(value) || targetVariable == value) return;
     38        targetVariable = value;
     39        OnTargetVariableChanged(this, EventArgs.Empty);
     40      }
    3641    }
    3742
    38     protected RegressionModel(bool deserializing) : base(deserializing) { }
     43    protected RegressionModel(bool deserializing)
     44      : base(deserializing) {
     45      targetVariable = string.Empty;
     46    }
    3947
    4048    protected RegressionModel(RegressionModel original, Cloner cloner)
     
    5664    }
    5765
    58     [StorableHook(HookType.AfterDeserialization)]
    59     private void AfterDeserialization() {
    60       // BackwardsCompatibility3.3
    61       #region Backwards compatible code, remove with 3.4
    62       targetVariable = string.Empty;
    63       #endregion
    64     }
    6566    public abstract IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows);
    6667    public abstract IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData);
     68
     69    #region events
     70    public event EventHandler TargetVariableChanged;
     71    private void OnTargetVariableChanged(object sender, EventArgs args) {
     72      var changed = TargetVariableChanged;
     73      if (changed != null)
     74        changed(sender, args);
     75    }
     76    #endregion
    6777  }
    6878}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionBase.cs

    r14185 r14330  
    176176    [StorableHook(HookType.AfterDeserialization)]
    177177    private void AfterDeserialization() {
     178      if (string.IsNullOrEmpty(Model.TargetVariable))
     179        Model.TargetVariable = this.ProblemData.TargetVariable;
     180
    178181      // BackwardsCompatibility3.4
    179182      #region Backwards compatible code, remove with 3.5
     
    230233
    231234      double trainingR = OnlinePearsonsRCalculator.Calculate(originalTrainingValues, estimatedTrainingValues, out errorState);
    232       TrainingRSquared = errorState == OnlineCalculatorError.None ? trainingR*trainingR : double.NaN;
     235      TrainingRSquared = errorState == OnlineCalculatorError.None ? trainingR * trainingR : double.NaN;
    233236      double testR = OnlinePearsonsRCalculator.Calculate(originalTestValues, estimatedTestValues, out errorState);
    234       TestRSquared = errorState == OnlineCalculatorError.None ? testR*testR : double.NaN;
     237      TestRSquared = errorState == OnlineCalculatorError.None ? testR * testR : double.NaN;
    235238
    236239      double trainingRelError = OnlineMeanAbsolutePercentageErrorCalculator.Calculate(originalTrainingValues, estimatedTrainingValues, out errorState);
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/Classification/IClassificationModel.cs

    r14239 r14330  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
     24
    2325namespace HeuristicLab.Problems.DataAnalysis {
    2426  /// <summary>
     
    2931    IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows);
    3032    IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData);
    31     string TargetVariable { get; }
     33    string TargetVariable { get; set; }
     34    event EventHandler TargetVariableChanged;
    3235  }
    3336}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/Regression/IRegressionModel.cs

    r14239 r14330  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324
     
    3031    IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows);
    3132    IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData);
    32     string TargetVariable { get; }
     33    string TargetVariable { get; set; }
     34    event EventHandler TargetVariableChanged;
    3335  }
    3436}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/ClassificationPerformanceMeasuresCalculator.cs

    r14185 r14330  
    2323using System.Collections.Generic;
    2424using HeuristicLab.Common;
    25 using HeuristicLab.Problems.DataAnalysis.OnlineCalculators;
    2625
    2726namespace HeuristicLab.Problems.DataAnalysis {
    28   public class ClassificationPerformanceMeasuresCalculator {
     27  public class ClassificationPerformanceMeasuresCalculator : IDeepCloneable {
    2928
    3029    public ClassificationPerformanceMeasuresCalculator(string positiveClassName, double positiveClassValue) {
     
    3231      this.positiveClassValue = positiveClassValue;
    3332      Reset();
     33    }
     34
     35    protected ClassificationPerformanceMeasuresCalculator(ClassificationPerformanceMeasuresCalculator original, Cloner cloner = null) {
     36      positiveClassName = original.positiveClassName;
     37      positiveClassValue = original.positiveClassValue;
     38      truePositiveCount = original.truePositiveCount;
     39      falsePositiveCount = original.falsePositiveCount;
     40      trueNegativeCount = original.trueNegativeCount;
     41      falseNegativeCount = original.falseNegativeCount;
     42      errorState = original.errorState;
    3443    }
    3544
     
    138147      errorState = ErrorState;
    139148    }
     149
     150    // IDeepCloneable interface members
     151    public object Clone() {
     152      return new ClassificationPerformanceMeasuresCalculator(this);
     153    }
     154
     155    public IDeepCloneable Clone(Cloner cloner) {
     156      var clone = cloner.GetClone(this);
     157      if (clone == null) {
     158        clone = new ClassificationPerformanceMeasuresCalculator(this);
     159        cloner.RegisterClonedObject(this, clone);
     160      }
     161      return clone;
     162    }
    140163  }
    141164}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineAccuracyCalculator.cs

    r14185 r14330  
    2525
    2626namespace HeuristicLab.Problems.DataAnalysis {
    27   public class OnlineAccuracyCalculator : IOnlineCalculator {
     27  public class OnlineAccuracyCalculator : IOnlineCalculator, IDeepCloneable {
    2828
    2929    private int correctlyClassified;
     
    3737    public OnlineAccuracyCalculator() {
    3838      Reset();
     39    }
     40
     41    // private constructor used internally by the Clone() method
     42    protected OnlineAccuracyCalculator(OnlineAccuracyCalculator other, Cloner cloner = null) {
     43      correctlyClassified = other.correctlyClassified;
     44      n = other.n;
     45      errorState = other.errorState;
    3946    }
    4047
     
    9097      }
    9198    }
     99
     100    // IDeepCloneable interface members
     101    public object Clone() {
     102      return new OnlineAccuracyCalculator(this);
     103    }
     104
     105    public IDeepCloneable Clone(Cloner cloner) {
     106      var clone = cloner.GetClone(this);
     107      if (clone == null) {
     108        clone = new OnlineAccuracyCalculator(this);
     109        cloner.RegisterClonedObject(this, clone);
     110      }
     111      return clone;
     112    }
    92113  }
    93114}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineBoundedMeanSquaredErrorCalculator.cs

    r14185 r14330  
    2222using System;
    2323using System.Collections.Generic;
     24using HeuristicLab.Common;
    2425
    2526namespace HeuristicLab.Problems.DataAnalysis {
    26   public class OnlineBoundedMeanSquaredErrorCalculator : IOnlineCalculator {
     27  public class OnlineBoundedMeanSquaredErrorCalculator : IOnlineCalculator, IDeepCloneable {
    2728
    2829    private double errorSum;
     
    3839
    3940
    40     public OnlineBoundedMeanSquaredErrorCalculator(double lowerBound, double upperbound) {
     41    public OnlineBoundedMeanSquaredErrorCalculator(double lowerBound, double upperBound) {
    4142      LowerBound = lowerBound;
    42       UpperBound = upperbound;
     43      UpperBound = upperBound;
    4344      Reset();
     45    }
     46
     47    protected OnlineBoundedMeanSquaredErrorCalculator(OnlineBoundedMeanSquaredErrorCalculator original, Cloner cloner = null) {
     48      LowerBound = original.LowerBound;
     49      UpperBound = original.UpperBound;
     50      n = original.n;
     51      errorSum = original.errorSum;
     52      errorState = original.ErrorState;
    4453    }
    4554
     
    96105      }
    97106    }
     107
     108    // IDeepCloneable interface members
     109    public object Clone() {
     110      return new OnlineBoundedMeanSquaredErrorCalculator(this);
     111    }
     112
     113    public IDeepCloneable Clone(Cloner cloner) {
     114      var clone = cloner.GetClone(this);
     115      if (clone == null) {
     116        clone = new OnlineBoundedMeanSquaredErrorCalculator(this);
     117        cloner.RegisterClonedObject(this, clone);
     118      }
     119      return clone;
     120    }
    98121  }
    99122}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineCovarianceCalculator.cs

    r14185 r14330  
    2222using System;
    2323using System.Collections.Generic;
     24using HeuristicLab.Common;
    2425
    2526namespace HeuristicLab.Problems.DataAnalysis {
    26   public class OnlineCovarianceCalculator : IOnlineCalculator {
     27  public class OnlineCovarianceCalculator : IOnlineCalculator, IDeepCloneable {
    2728
    2829    private double xMean, yMean, Cn;
     
    3637    public OnlineCovarianceCalculator() {
    3738      Reset();
     39    }
     40
     41    protected OnlineCovarianceCalculator(OnlineCovarianceCalculator other, Cloner cloner = null) {
     42      Cn = other.Cn;
     43      xMean = other.xMean;
     44      yMean = other.yMean;
     45      n = other.n;
     46      errorState = other.errorState;
    3847    }
    3948
     
    94103      }
    95104    }
     105
     106    // IDeepCloneable interface members
     107    public object Clone() {
     108      return new OnlineCovarianceCalculator(this);
     109    }
     110
     111    public IDeepCloneable Clone(Cloner cloner) {
     112      var clone = cloner.GetClone(this);
     113      if (clone == null) {
     114        clone = new OnlineCovarianceCalculator(this);
     115        cloner.RegisterClonedObject(this, clone);
     116      }
     117      return clone;
     118    }
    96119  }
    97120}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineDirectionalSymmetryCalculator.cs

    r14185 r14330  
    2626
    2727namespace HeuristicLab.Problems.DataAnalysis {
    28   public class OnlineDirectionalSymmetryCalculator : IOnlineTimeSeriesCalculator {
     28  public class OnlineDirectionalSymmetryCalculator : IOnlineTimeSeriesCalculator, IDeepCloneable {
    2929    private int n;
    3030    private int nCorrect;
     
    3939    public OnlineDirectionalSymmetryCalculator() {
    4040      Reset();
     41    }
     42
     43    protected OnlineDirectionalSymmetryCalculator(OnlineDirectionalSymmetryCalculator other, Cloner cloner = null) {
     44      n = other.n;
     45      nCorrect = other.nCorrect;
     46      errorState = other.errorState;
    4147    }
    4248
     
    94100      return dsCalculator.DirectionalSymmetry;
    95101    }
    96    
     102
    97103    public static double Calculate(IEnumerable<double> startValues, IEnumerable<IEnumerable<double>> actualContinuations, IEnumerable<IEnumerable<double>> predictedContinuations, out OnlineCalculatorError errorState) {
    98104      IEnumerator<double> startValueEnumerator = startValues.GetEnumerator();
     
    116122      }
    117123    }
     124
     125    // IDeepCloneable interface members
     126    public object Clone() {
     127      return new OnlineDirectionalSymmetryCalculator(this);
     128    }
     129
     130    public IDeepCloneable Clone(Cloner cloner) {
     131      var clone = cloner.GetClone(this);
     132      if (clone == null) {
     133        clone = new OnlineDirectionalSymmetryCalculator(this);
     134        cloner.RegisterClonedObject(this, clone);
     135      }
     136      return clone;
     137    }
    118138  }
    119139}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineLinearScalingParameterCalculator.cs

    r14185 r14330  
    2525
    2626namespace HeuristicLab.Problems.DataAnalysis {
    27   public class OnlineLinearScalingParameterCalculator {
     27  public class OnlineLinearScalingParameterCalculator : IDeepCloneable {
    2828
    2929    /// <summary>
     
    6565      Reset();
    6666    }
     67
     68    protected OnlineLinearScalingParameterCalculator(OnlineLinearScalingParameterCalculator other, Cloner cloner) {
     69      targetMeanCalculator = (OnlineMeanAndVarianceCalculator)other.targetMeanCalculator.Clone(cloner);
     70      originalMeanAndVarianceCalculator = (OnlineMeanAndVarianceCalculator)other.originalMeanAndVarianceCalculator.Clone(cloner);
     71      originalTargetCovarianceCalculator = (OnlineCovarianceCalculator)other.originalTargetCovarianceCalculator.Clone(cloner);
     72      // do not reset the calculators here
     73    }
     74
    6775
    6876    public void Reset() {
     
    117125      }
    118126    }
     127
     128    // IDeepCloneable interface members
     129    public object Clone() {
     130      var cloner = new Cloner();
     131      return new OnlineLinearScalingParameterCalculator(this, cloner);
     132    }
     133
     134    public IDeepCloneable Clone(Cloner cloner) {
     135      var clone = cloner.GetClone(this);
     136      if (clone == null) {
     137        clone = new OnlineLinearScalingParameterCalculator(this, cloner);
     138        cloner.RegisterClonedObject(this, clone);
     139      }
     140      return clone;
     141    }
    119142  }
    120143}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineMaxAbsoluteErrorCalculator.cs

    r14185 r14330  
    2222using System;
    2323using System.Collections.Generic;
     24using HeuristicLab.Common;
    2425
    2526namespace HeuristicLab.Problems.DataAnalysis {
    26   public class OnlineMaxAbsoluteErrorCalculator : IOnlineCalculator {
     27  public class OnlineMaxAbsoluteErrorCalculator : IOnlineCalculator, IDeepCloneable {
    2728
    2829    private double mae;
     
    3637    public OnlineMaxAbsoluteErrorCalculator() {
    3738      Reset();
     39    }
     40
     41    protected OnlineMaxAbsoluteErrorCalculator(OnlineMaxAbsoluteErrorCalculator other, Cloner cloner = null) {
     42      mae = other.mae;
     43      n = other.n;
     44      errorState = other.errorState;
    3845    }
    3946
     
    8895      }
    8996    }
     97
     98    // IDeepCloneable interface members
     99    public object Clone() {
     100      return new OnlineMaxAbsoluteErrorCalculator(this);
     101    }
     102
     103    public IDeepCloneable Clone(Cloner cloner) {
     104      var clone = cloner.GetClone(this);
     105      if (clone == null) {
     106        clone = new OnlineMaxAbsoluteErrorCalculator(this);
     107        cloner.RegisterClonedObject(this, clone);
     108      }
     109      return clone;
     110    }
    90111  }
    91112}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineMeanAbsoluteErrorCalculator.cs

    r14185 r14330  
    2222using System;
    2323using System.Collections.Generic;
     24using HeuristicLab.Common;
    2425
    2526namespace HeuristicLab.Problems.DataAnalysis {
    26   public class OnlineMeanAbsoluteErrorCalculator : IOnlineCalculator {
     27  public class OnlineMeanAbsoluteErrorCalculator : IOnlineCalculator, IDeepCloneable {
    2728
    2829    private double sae;
     
    3637    public OnlineMeanAbsoluteErrorCalculator() {
    3738      Reset();
     39    }
     40
     41    protected OnlineMeanAbsoluteErrorCalculator(OnlineMeanAbsoluteErrorCalculator other, Cloner cloner = null) {
     42      sae = other.sae;
     43      n = other.n;
     44      errorState = other.errorState;
    3845    }
    3946
     
    8794      }
    8895    }
     96
     97    // IDeepCloneable interface members
     98    public object Clone() {
     99      return new OnlineMeanAbsoluteErrorCalculator(this);
     100    }
     101
     102    public IDeepCloneable Clone(Cloner cloner) {
     103      var clone = cloner.GetClone(this);
     104      if (clone == null) {
     105        clone = new OnlineMeanAbsoluteErrorCalculator(this);
     106        cloner.RegisterClonedObject(this, clone);
     107      }
     108      return clone;
     109    }
    89110  }
    90111}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineMeanAbsolutePercentageErrorCalculator.cs

    r14185 r14330  
    2525
    2626namespace HeuristicLab.Problems.DataAnalysis {
    27   public class OnlineMeanAbsolutePercentageErrorCalculator : IOnlineCalculator {
     27  public class OnlineMeanAbsolutePercentageErrorCalculator : IOnlineCalculator, IDeepCloneable {
    2828
    2929    private double sre;
     
    3737    public OnlineMeanAbsolutePercentageErrorCalculator() {
    3838      Reset();
     39    }
     40
     41    protected OnlineMeanAbsolutePercentageErrorCalculator(OnlineMeanAbsolutePercentageErrorCalculator other, Cloner cloner = null) {
     42      sre = other.sre;
     43      n = other.n;
     44      errorState = other.errorState;
    3945    }
    4046
     
    8995      }
    9096    }
     97
     98    // IDeepCloneable interface members
     99    public object Clone() {
     100      return new OnlineMeanAbsolutePercentageErrorCalculator(this);
     101    }
     102
     103    public IDeepCloneable Clone(Cloner cloner) {
     104      var clone = cloner.GetClone(this);
     105      if (clone == null) {
     106        clone = new OnlineMeanAbsolutePercentageErrorCalculator(this);
     107        cloner.RegisterClonedObject(this, clone);
     108      }
     109      return clone;
     110    }
    91111  }
    92112}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineMeanAndVarianceCalculator.cs

    r14185 r14330  
    2121
    2222using System.Collections.Generic;
     23using HeuristicLab.Common;
    2324
    2425namespace HeuristicLab.Problems.DataAnalysis {
    25   public class OnlineMeanAndVarianceCalculator {
     26  public class OnlineMeanAndVarianceCalculator : IDeepCloneable {
    2627
    2728    private double m_oldM, m_newM, m_oldS, m_newS;
     
    6667    }
    6768
     69    protected OnlineMeanAndVarianceCalculator(OnlineMeanAndVarianceCalculator other, Cloner cloner = null) {
     70      m_oldS = other.m_oldS;
     71      m_oldM = other.m_oldM;
     72      m_newS = other.m_newS;
     73      m_newM = other.m_newM;
     74      n = other.n;
     75      errorState = other.errorState;
     76      varianceErrorState = other.varianceErrorState;
     77    }
     78
    6879    public void Reset() {
    6980      n = 0;
     
    7586      if (double.IsNaN(x) || double.IsInfinity(x) || x > 1E13 || x < -1E13 || (errorState & OnlineCalculatorError.InvalidValueAdded) > 0) {
    7687        errorState = errorState | OnlineCalculatorError.InvalidValueAdded;
    77         varianceErrorState = errorState | OnlineCalculatorError.InvalidValueAdded;
     88        varianceErrorState = varianceErrorState | OnlineCalculatorError.InvalidValueAdded;
    7889      } else {
    7990        n++;
     
    106117      varianceErrorState = meanAndVarianceCalculator.VarianceErrorState;
    107118    }
     119
     120    // IDeepCloneable members
     121    public object Clone() {
     122      return new OnlineMeanAndVarianceCalculator(this);
     123    }
     124
     125    public IDeepCloneable Clone(Cloner cloner) {
     126      var clone = cloner.GetClone(this);
     127      if (clone == null) {
     128        clone = new OnlineMeanAndVarianceCalculator(this);
     129        cloner.RegisterClonedObject(this, clone);
     130      }
     131      return clone;
     132    }
    108133  }
    109134}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineMeanErrorCalculator.cs

    r14185 r14330  
    2222using System;
    2323using System.Collections.Generic;
     24using HeuristicLab.Common;
    2425
    2526namespace HeuristicLab.Problems.DataAnalysis {
    26   public class OnlineMeanErrorCalculator : IOnlineCalculator {
     27  public class OnlineMeanErrorCalculator : IOnlineCalculator, IDeepCloneable {
    2728
    2829    private readonly OnlineMeanAndVarianceCalculator meanAndVarianceCalculator;
     
    3435      meanAndVarianceCalculator = new OnlineMeanAndVarianceCalculator();
    3536      Reset();
     37    }
     38
     39    protected OnlineMeanErrorCalculator(OnlineMeanErrorCalculator other, Cloner cloner) {
     40      meanAndVarianceCalculator = (OnlineMeanAndVarianceCalculator)other.meanAndVarianceCalculator.Clone(cloner);
    3641    }
    3742
     
    7479      }
    7580    }
     81
     82    // IDeepCloneable members
     83    public object Clone() {
     84      var cloner = new Cloner();
     85      return new OnlineMeanErrorCalculator(this, cloner);
     86    }
     87
     88    public IDeepCloneable Clone(Cloner cloner) {
     89      var clone = cloner.GetClone(this);
     90      if (clone == null) {
     91        clone = new OnlineMeanErrorCalculator(this, cloner);
     92        cloner.RegisterClonedObject(this, clone);
     93      }
     94      return clone;
     95    }
    7696  }
    7797}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineMeanSquaredErrorCalculator.cs

    r14185 r14330  
    2222using System;
    2323using System.Collections.Generic;
     24using HeuristicLab.Common;
    2425
    2526namespace HeuristicLab.Problems.DataAnalysis {
    26   public class OnlineMeanSquaredErrorCalculator : IOnlineCalculator {
     27  public class OnlineMeanSquaredErrorCalculator : IOnlineCalculator, IDeepCloneable {
    2728
    2829    private double sse;
     
    3637    public OnlineMeanSquaredErrorCalculator() {
    3738      Reset();
     39    }
     40
     41    protected OnlineMeanSquaredErrorCalculator(OnlineMeanSquaredErrorCalculator other, Cloner cloner = null) {
     42      sse = other.sse;
     43      n = other.n;
     44      errorState = other.errorState;
    3845    }
    3946
     
    8794      }
    8895    }
     96
     97    // IDeepCloneable members
     98    public object Clone() {
     99      return new OnlineMeanSquaredErrorCalculator(this);
     100    }
     101
     102    public IDeepCloneable Clone(Cloner cloner) {
     103      var clone = cloner.GetClone(this);
     104      if (clone == null) {
     105        clone = new OnlineMeanSquaredErrorCalculator(this);
     106        cloner.RegisterClonedObject(this, clone);
     107      }
     108      return clone;
     109    }
    89110  }
    90111}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineNormalizedMeanSquaredErrorCalculator.cs

    r14226 r14330  
    2222using System;
    2323using System.Collections.Generic;
     24using HeuristicLab.Common;
    2425
    2526namespace HeuristicLab.Problems.DataAnalysis {
    26   public class OnlineNormalizedMeanSquaredErrorCalculator : IOnlineCalculator {
     27  public class OnlineNormalizedMeanSquaredErrorCalculator : IOnlineCalculator, IDeepCloneable {
    2728    private OnlineMeanAndVarianceCalculator meanSquaredErrorCalculator;
    2829    private OnlineMeanAndVarianceCalculator originalVarianceCalculator;
     
    4041      originalVarianceCalculator = new OnlineMeanAndVarianceCalculator();
    4142      Reset();
     43    }
     44
     45    protected OnlineNormalizedMeanSquaredErrorCalculator(OnlineNormalizedMeanSquaredErrorCalculator other, Cloner cloner) {
     46      meanSquaredErrorCalculator = (OnlineMeanAndVarianceCalculator)other.meanSquaredErrorCalculator.Clone(cloner);
     47      originalVarianceCalculator = (OnlineMeanAndVarianceCalculator)other.originalVarianceCalculator.Clone(cloner);
    4248    }
    4349
     
    9298      }
    9399    }
     100
     101    // IDeepCloneable members
     102    public object Clone() {
     103      var cloner = new Cloner();
     104      return new OnlineNormalizedMeanSquaredErrorCalculator(this, cloner);
     105    }
     106
     107    public IDeepCloneable Clone(Cloner cloner) {
     108      var clone = cloner.GetClone(this);
     109      if (clone == null) {
     110        clone = new OnlineNormalizedMeanSquaredErrorCalculator(this, cloner);
     111        cloner.RegisterClonedObject(this, clone);
     112      }
     113      return clone;
     114    }
    94115  }
    95116}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlinePearsonsRCalculator.cs

    r14185 r14330  
    2525
    2626namespace HeuristicLab.Problems.DataAnalysis {
    27   public class OnlinePearsonsRCalculator : IOnlineCalculator {
     27  public class OnlinePearsonsRCalculator : IOnlineCalculator, IDeepCloneable {
    2828    private OnlineCovarianceCalculator covCalculator = new OnlineCovarianceCalculator();
    2929    private OnlineMeanAndVarianceCalculator sxCalculator = new OnlineMeanAndVarianceCalculator();
     
    4646
    4747    public OnlinePearsonsRCalculator() { }
     48
     49    protected OnlinePearsonsRCalculator(OnlinePearsonsRCalculator other, Cloner cloner) {
     50      covCalculator = (OnlineCovarianceCalculator)other.covCalculator.Clone(cloner);
     51      sxCalculator = (OnlineMeanAndVarianceCalculator)other.sxCalculator.Clone(cloner);
     52      syCalculator = (OnlineMeanAndVarianceCalculator)other.syCalculator.Clone(cloner);
     53    }
    4854
    4955    #region IOnlineCalculator Members
     
    9197      }
    9298    }
     99
     100    // IDeepCloneable members
     101    public object Clone() {
     102      var cloner = new Cloner();
     103      return new OnlinePearsonsRCalculator(this, cloner);
     104    }
     105
     106    public IDeepCloneable Clone(Cloner cloner) {
     107      var clone = cloner.GetClone(this);
     108      if (clone == null) {
     109        clone = new OnlinePearsonsRCalculator(this, cloner);
     110        cloner.RegisterClonedObject(this, clone);
     111      }
     112      return clone;
     113    }
    93114  }
    94115}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlinePearsonsRSquaredCalculator.cs

    r14185 r14330  
    2626namespace HeuristicLab.Problems.DataAnalysis {
    2727  [Obsolete("Use OnlinePearsonsRCalculator directly")]
    28   public class OnlinePearsonsRSquaredCalculator : IOnlineCalculator {
     28  public class OnlinePearsonsRSquaredCalculator : IOnlineCalculator, IDeepCloneable {
    2929    private readonly OnlinePearsonsRCalculator rCalculator = new OnlinePearsonsRCalculator();
    3030
     
    3737
    3838    public OnlinePearsonsRSquaredCalculator() { }
     39
     40    protected OnlinePearsonsRSquaredCalculator(OnlinePearsonsRSquaredCalculator other, Cloner cloner) {
     41      this.rCalculator = (OnlinePearsonsRCalculator)other.rCalculator.Clone(cloner);
     42    }
    3943
    4044    #region IOnlineCalculator Members
     
    5963      return r * r;
    6064    }
     65
     66    // IDeepCloneable members
     67    public object Clone() {
     68      var cloner = new Cloner();
     69      return new OnlinePearsonsRSquaredCalculator(this, cloner);
     70    }
     71
     72    public IDeepCloneable Clone(Cloner cloner) {
     73      var clone = cloner.GetClone(this);
     74      if (clone == null) {
     75        clone = new OnlinePearsonsRSquaredCalculator(this, cloner);
     76        cloner.RegisterClonedObject(this, clone);
     77      }
     78      return clone;
     79    }
    6180  }
    6281}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineTheilsUStatisticCalculator.cs

    r14185 r14330  
    2222using System;
    2323using System.Collections.Generic;
     24using HeuristicLab.Common;
    2425
    2526namespace HeuristicLab.Problems.DataAnalysis {
    26   public class OnlineTheilsUStatisticCalculator : IOnlineTimeSeriesCalculator {
     27  public class OnlineTheilsUStatisticCalculator : IOnlineTimeSeriesCalculator, IDeepCloneable {
    2728    private OnlineMeanAndVarianceCalculator squaredErrorMeanCalculator;
    2829    private OnlineMeanAndVarianceCalculator unbiasedEstimatorMeanCalculator;
     
    4344      unbiasedEstimatorMeanCalculator = new OnlineMeanAndVarianceCalculator();
    4445      Reset();
     46    }
     47
     48    protected OnlineTheilsUStatisticCalculator(OnlineTheilsUStatisticCalculator other, Cloner cloner) {
     49      squaredErrorMeanCalculator = (OnlineMeanAndVarianceCalculator)other.squaredErrorMeanCalculator.Clone(cloner);
     50      unbiasedEstimatorMeanCalculator = (OnlineMeanAndVarianceCalculator)other.unbiasedEstimatorMeanCalculator.Clone(cloner);
    4551    }
    4652
     
    125131      }
    126132    }
     133
     134    // IDeepCloneable members
     135    public object Clone() {
     136      var cloner = new Cloner();
     137      return new OnlineTheilsUStatisticCalculator(this, cloner);
     138    }
     139
     140    public IDeepCloneable Clone(Cloner cloner) {
     141      var clone = cloner.GetClone(this);
     142      if (clone == null) {
     143        clone = new OnlineTheilsUStatisticCalculator(this, cloner);
     144        cloner.RegisterClonedObject(this, clone);
     145      }
     146      return clone;
     147    }
    127148  }
    128149}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineWeightedDirectionalSymmetryCalculator.cs

    r14185 r14330  
    2626
    2727namespace HeuristicLab.Problems.DataAnalysis {
    28   public class OnlineWeightedDirectionalSymmetryCalculator : IOnlineTimeSeriesCalculator {
     28  public class OnlineWeightedDirectionalSymmetryCalculator : IOnlineTimeSeriesCalculator, IDeepCloneable {
    2929    private int n;
    3030    private double correctSum;
     
    4040    public OnlineWeightedDirectionalSymmetryCalculator() {
    4141      Reset();
     42    }
     43
     44    protected OnlineWeightedDirectionalSymmetryCalculator(OnlineWeightedDirectionalSymmetryCalculator other, Cloner cloner = null) {
     45      n = other.n;
     46      correctSum = other.correctSum;
     47      incorrectSum = other.incorrectSum;
     48      errorState = other.errorState;
    4249    }
    4350
     
    118125      }
    119126    }
     127
     128    // IDeepCloneable members
     129    public object Clone() {
     130      return new OnlineWeightedDirectionalSymmetryCalculator(this);
     131    }
     132
     133    public IDeepCloneable Clone(Cloner cloner) {
     134      var clone = cloner.GetClone(this);
     135      if (clone == null) {
     136        clone = new OnlineWeightedDirectionalSymmetryCalculator(this);
     137        cloner.RegisterClonedObject(this, clone);
     138      }
     139      return clone;
     140    }
    120141  }
    121142}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.Instances.DataAnalysis

  • branches/symbreg-factors-2650/HeuristicLab.Problems.Instances.DataAnalysis.Views

  • branches/symbreg-factors-2650/HeuristicLab.Problems.Instances.DataAnalysis.Views/3.3/DataAnalysisImportTypeDialog.cs

    r14185 r14330  
    158158      }
    159159      catch (Exception ex) {
    160         if (ex is IOException || ex is InvalidOperationException || ex is ArgumentException || ex is TableFileParser.DataFormatException) {
     160        if (ex is IOException || ex is InvalidOperationException || ex is ArgumentException) {
    161161          OkButton.Enabled = false;
    162162          ErrorTextBox.Text = ex.Message;
  • branches/symbreg-factors-2650/HeuristicLab.Problems.Instances.DataAnalysis.Views/3.3/RegressionInstanceProviderView.cs

    r14185 r14330  
    5252
    5353          try {
    54             var progress = mainForm.AddOperationProgressToContent(activeView.Content, "Loading problem instance.");
     54            var progress = mainForm.AddOperationProgressToContent(activeView.Content,
     55              "Loading problem instance.");
    5556
    56             Content.ProgressChanged += (o, args) => { progress.ProgressValue = args.ProgressPercentage / 100.0; };
     57            Content.ProgressChanged +=
     58              (o, args) => { progress.ProgressValue = args.ProgressPercentage / 100.0; };
    5759
    58             instance = Content.ImportData(importTypeDialog.Path, importTypeDialog.ImportType, importTypeDialog.CSVFormat);
    59           } catch (IOException ex) {
     60            instance = Content.ImportData(importTypeDialog.Path, importTypeDialog.ImportType,
     61              importTypeDialog.CSVFormat);
     62          } catch (Exception ex) {
    6063            ErrorWhileParsing(ex);
     64            return;
     65          } finally {
    6166            mainForm.RemoveOperationProgressFromContent(activeView.Content);
    62             return;
    6367          }
     68
    6469          try {
    6570            GenericConsumer.Load(instance);
    66           } catch (IOException ex) {
     71          } catch (Exception ex) {
    6772            ErrorWhileLoading(ex, importTypeDialog.Path);
    6873          } finally {
  • branches/symbreg-factors-2650/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/VariableNetworks/VariableNetwork.cs

    r14277 r14330  
    2626using HeuristicLab.Common;
    2727using HeuristicLab.Core;
     28using HeuristicLab.Problems.DataAnalysis;
    2829using HeuristicLab.Random;
    2930
     
    195196      int nl = xs.Length;
    196197      int nRows = xs.First().Count;
    197       double[,] K = new double[nRows, nRows];
    198 
    199       // sample length-scales
     198
     199      // sample u iid ~ N(0, 1)
     200      var u = Enumerable.Range(0, nRows).Select(_ => NormalDistributedRandom.NextDouble(random, 0, 1)).ToArray();
     201
     202      // sample actual length-scales
    200203      var l = Enumerable.Range(0, nl)
    201204        .Select(_ => random.NextDouble() * 2 + 0.5)
    202205        .ToArray();
    203       // calculate covariance matrix
     206
     207      double[,] K = CalculateCovariance(xs, l);
     208
     209      // decompose
     210      alglib.trfac.spdmatrixcholesky(ref K, nRows, false);
     211
     212
     213      // calc y = Lu
     214      var y = new double[u.Length];
     215      alglib.ablas.rmatrixmv(nRows, nRows, K, 0, 0, 0, u, 0, ref y, 0);
     216
     217      // calculate relevance by removing dimensions
     218      relevance = CalculateRelevance(y, u, xs, l);
     219
     220
     221      // calculate variable relevance
     222      // as per Rasmussen and Williams "Gaussian Processes for Machine Learning" page 106:
     223      // ,,For the squared exponential covariance function [...] the l1, ..., lD hyperparameters
     224      // play the role of characteristic length scales [...]. Such a covariance function implements
     225      // automatic relevance determination (ARD) [Neal, 1996], since the inverse of the length-scale
     226      // determines how relevant an input is: if the length-scale has a very large value, the covariance
     227      // will become almost independent of that input, effectively removing it from inference.''
     228      // relevance = l.Select(li => 1.0 / li).ToArray();
     229
     230      return y;
     231    }
     232
     233    // calculate variable relevance based on removal of variables
     234    //  1) to remove a variable we set it's length scale to infinity (no relation of the variable value to the target)
     235    //  2) calculate MSE of the original target values (y) to the updated targes y' (after variable removal)
     236    //  3) relevance is larger if MSE(y,y') is large
     237    //  4) scale impacts so that the most important variable has impact = 1
     238    private double[] CalculateRelevance(double[] y, double[] u, List<double>[] xs, double[] l) {
     239      int nRows = xs.First().Count;
     240      var changedL = new double[l.Length];
     241      var relevance = new double[l.Length];
     242      for (int i = 0; i < l.Length; i++) {
     243        Array.Copy(l, changedL, changedL.Length);
     244        changedL[i] = double.MaxValue;
     245        var changedK = CalculateCovariance(xs, changedL);
     246
     247        var yChanged = new double[u.Length];
     248        alglib.ablas.rmatrixmv(nRows, nRows, changedK, 0, 0, 0, u, 0, ref yChanged, 0);
     249
     250        OnlineCalculatorError error;
     251        var mse = OnlineMeanSquaredErrorCalculator.Calculate(y, yChanged, out error);
     252        if (error != OnlineCalculatorError.None) mse = double.MaxValue;
     253        relevance[i] = mse;
     254      }
     255      // scale so that max relevance is 1.0
     256      var maxRel = relevance.Max();
     257      for (int i = 0; i < relevance.Length; i++) relevance[i] /= maxRel;
     258      return relevance;
     259    }
     260
     261    private double[,] CalculateCovariance(List<double>[] xs, double[] l) {
     262      int nRows = xs.First().Count;
     263      double[,] K = new double[nRows, nRows];
    204264      for (int r = 0; r < nRows; r++) {
    205265        double[] xi = xs.Select(x => x[r]).ToArray();
     
    213273        }
    214274      }
    215 
    216275      // add a small diagonal matrix for numeric stability
    217276      for (int i = 0; i < nRows; i++) {
     
    219278      }
    220279
    221       // decompose
    222       alglib.trfac.spdmatrixcholesky(ref K, nRows, false);
    223 
    224       // sample u iid ~ N(0, 1)
    225       var u = Enumerable.Range(0, nRows).Select(_ => NormalDistributedRandom.NextDouble(random, 0, 1)).ToArray();
    226 
    227       // calc y = Lu
    228       var y = new double[u.Length];
    229       alglib.ablas.rmatrixmv(nRows, nRows, K, 0, 0, 0, u, 0, ref y, 0);
    230 
    231       // calculate variable relevance
    232       // as per Rasmussen and Williams "Gaussian Processes for Machine Learning" page 106:
    233       // ,,For the squared exponential covariance function [...] the l1, ..., lD hyperparameters
    234       // play the role of characteristic length scales [...]. Such a covariance function implements
    235       // automatic relevance determination (ARD) [Neal, 1996], since the inverse of the length-scale
    236       // determines how relevant an input is: if the length-scale has a very large value, the covariance
    237       // will become almost independent of that input, effectively removing it from inference.''
    238       relevance = l.Select(li => 1.0 / li).ToArray();
    239 
    240       return y;
     280      return K;
    241281    }
    242282  }
  • branches/symbreg-factors-2650/HeuristicLab.Problems.Instances.DataAnalysis/3.3/TableFileParser.cs

    r14185 r14330  
    198198    /// <param name="columnNamesInFirstLine"></param>
    199199    public void Parse(Stream stream, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator, bool columnNamesInFirstLine, int lineLimit = -1) {
    200       using (StreamReader reader = new StreamReader(stream, Encoding)) {
     200      if (lineLimit > 0) estimatedNumberOfLines = lineLimit;
     201
     202      using (var reader = new StreamReader(stream)) {
    201203        tokenizer = new Tokenizer(reader, numberFormat, dateTimeFormatInfo, separator);
     204        var strValues = new List<List<string>>();
    202205        values = new List<IList>();
    203         if (lineLimit > 0) estimatedNumberOfLines = lineLimit;
    204 
    205         if (columnNamesInFirstLine) {
    206           ParseVariableNames();
    207           if (!tokenizer.HasNext())
    208             Error(
    209               "Couldn't parse data values. Probably because of incorrect number format (the parser expects english number format with a '.' as decimal separator).",
    210               "", tokenizer.CurrentLineNumber);
    211         }
    212 
    213 
    214         // read values... start in first row
     206        Prepare(columnNamesInFirstLine, strValues);
     207
    215208        int nLinesParsed = 0;
    216209        int colIdx = 0;
    217         int numValuesInFirstRow = columnNamesInFirstLine ? variableNames.Count : -1; // number of variables or inizialize based on first row of values (-1)
    218210        while (tokenizer.HasNext() && (lineLimit < 0 || nLinesParsed < lineLimit)) {
    219211          if (tokenizer.PeekType() == TokenTypeEnum.NewLine) {
     
    221213
    222214            // all rows have to have the same number of values
    223             // the first row defines how many samples are needed
    224             if (numValuesInFirstRow < 0) numValuesInFirstRow = values.Count; // set to number of colums in the first row
    225             else if (colIdx > 0 && numValuesInFirstRow != colIdx) { // read at least one value in the row (support for skipping empty lines)
    226               Error("The first row of the dataset has " + numValuesInFirstRow + " columns." + Environment.NewLine +
     215            // the first row defines how many elements are needed
     216            if (colIdx > 0 && values.Count != colIdx) {
     217              // read at least one value in the row (support for skipping empty lines)
     218              Error("The first row of the dataset has " + values.Count + " columns." + Environment.NewLine +
    227219                    "Line " + tokenizer.CurrentLineNumber + " has " + colIdx + " columns.", "",
    228                     tokenizer.CurrentLineNumber);
     220                tokenizer.CurrentLineNumber);
    229221            }
    230222            OnReport(tokenizer.BytesRead);
     
    234226          } else {
    235227            // read one value
    236             TokenTypeEnum type; string strVal; double dblVal; DateTime dateTimeVal;
     228            TokenTypeEnum type;
     229            string strVal;
     230            double dblVal;
     231            DateTime dateTimeVal;
    237232            tokenizer.Next(out type, out strVal, out dblVal, out dateTimeVal);
    238233
    239             // initialize columns on the first row (fixing data types as presented in the first row...)
    240             if (nLinesParsed == 0) {
    241               values.Add(CreateList(type, estimatedNumberOfLines));
    242             } else if (colIdx == values.Count) {
    243               Error("The first row of the dataset has " + numValuesInFirstRow + " columns." + Environment.NewLine +
     234            if (colIdx == values.Count) {
     235              Error("The first row of the dataset has " + values.Count + " columns." + Environment.NewLine +
    244236                    "Line " + tokenizer.CurrentLineNumber + " has more columns.", "",
    245237                tokenizer.CurrentLineNumber);
    246238            }
    247239            if (!IsColumnTypeCompatible(values[colIdx], type)) {
    248               values[colIdx] = ConvertToStringColumn(values[colIdx]);
     240              values[colIdx] = strValues[colIdx];
    249241            }
     242
    250243            // add the value to the column
    251             AddValue(type, values[colIdx++], strVal, dblVal, dateTimeVal);
     244            AddValue(type, values[colIdx], strVal, dblVal, dateTimeVal);
     245            if (!(values[colIdx] is List<string>)) { // optimization: don't store the string values in another list if the column is list<string>
     246              strValues[colIdx].Add(strVal);
     247            }
     248            colIdx++;
    252249          }
    253250        }
    254 
    255         if (!values.Any() || values.First().Count == 0)
    256           Error("Couldn't parse data values. Probably because of incorrect number format " +
    257                 "(the parser expects english number format with a '.' as decimal separator).", "", tokenizer.CurrentLineNumber);
    258       }
     251      }
     252
     253      if (!values.Any() || values.First().Count == 0)
     254        Error("Couldn't parse data values. Probably because of incorrect number format " +
     255              "(the parser expects english number format with a '.' as decimal separator).", "", tokenizer.CurrentLineNumber);
    259256
    260257      this.rows = values.First().Count;
     
    277274      // for large files we created a lot of memory pressure, cannot hurt to run GC.Collect here (TableFileParser is called seldomly on user interaction)
    278275      GC.Collect(2, GCCollectionMode.Forced);
     276    }
     277
     278    private void Prepare(bool columnNamesInFirstLine, List<List<string>> strValues) {
     279      if (columnNamesInFirstLine) {
     280        ParseVariableNames();
     281        if (!tokenizer.HasNext())
     282          Error(
     283            "Couldn't parse data values. Probably because of incorrect number format (the parser expects english number format with a '.' as decimal separator).",
     284            "", tokenizer.CurrentLineNumber);
     285      }
     286      // read first line to determine types and allocate specific lists
     287      // read values... start in first row
     288      int colIdx = 0;
     289      while (tokenizer.PeekType() != TokenTypeEnum.NewLine) {
     290        // read one value
     291        TokenTypeEnum type; string strVal; double dblVal; DateTime dateTimeVal;
     292        tokenizer.Next(out type, out strVal, out dblVal, out dateTimeVal);
     293
     294        // initialize column
     295        values.Add(CreateList(type, estimatedNumberOfLines));
     296        if (type == TokenTypeEnum.String)
     297          strValues.Add(new List<string>(0)); // optimization: don't store the string values in another list if the column is list<string>
     298        else
     299          strValues.Add(new List<string>(estimatedNumberOfLines));
     300
     301        AddValue(type, values[colIdx], strVal, dblVal, dateTimeVal);
     302        if (type != TokenTypeEnum.String)
     303          strValues[colIdx].Add(strVal);
     304        colIdx++;
     305      }
     306      tokenizer.Skip(); // skip newline
    279307    }
    280308
     
    530558                type = TokenTypeEnum.Double;
    531559                doubleVals[i] = doubleVal;
    532               } else if (DateTime.TryParse(tok, dateTimeFormatInfo, DateTimeStyles.None, out dateTimeValue)) {
     560              } else if (DateTime.TryParse(tok, dateTimeFormatInfo, DateTimeStyles.NoCurrentDateDefault, out dateTimeValue)
     561                && dateTimeValue.Year > 1 && dateTimeValue.Month > 1 && dateTimeValue.Day > 1 // if no date is given it is returned as 1.1.0001 -> don't allow this
     562                ) {
    533563                type = TokenTypeEnum.DateTime;
    534564                dateTimeVals[i] = dateTimeValue;
     
    606636
    607637    private void Error(string message, string token, int lineNumber) {
    608       throw new DataFormatException("Error while parsing.\n" + message, token, lineNumber);
     638      throw new IOException(string.Format("Error while parsing. {0} (token: {1} lineNumber: {2}).", message, token, lineNumber));
    609639    }
    610640    #endregion
    611 
    612     [Serializable]
    613     public class DataFormatException : Exception {
    614       private int line;
    615       public int Line {
    616         get { return line; }
    617       }
    618       private string token;
    619       public string Token {
    620         get { return token; }
    621       }
    622       public DataFormatException(string message, string token, int line)
    623         : base(message + "\nToken: " + token + " (line: " + line + ")") {
    624         this.token = token;
    625         this.line = line;
    626       }
    627 
    628       public DataFormatException(SerializationInfo info, StreamingContext context) : base(info, context) { }
    629     }
    630641  }
    631642}
  • branches/symbreg-factors-2650/HeuristicLab.Tests

  • branches/symbreg-factors-2650/HeuristicLab.Tests/HeuristicLab.Problems.Instances.DataAnalysis-3.3/TableFileParserTest.cs

    r14185 r14330  
    2424using System.Globalization;
    2525using System.IO;
     26using System.Text;
    2627using Microsoft.VisualStudio.TestTools.UnitTesting;
    2728
     
    4748        Assert.AreEqual(4, parser.Columns);
    4849        Assert.AreEqual(parser.Values[3][0], 3.14);
    49       } finally {
     50      }
     51      finally {
    5052        File.Delete(tempFileName);
    5153      }
     
    7173        Assert.AreEqual(4, parser.Columns);
    7274        Assert.AreEqual(parser.Values[3][0], 3.14);
    73       } finally {
     75      }
     76      finally {
    7477        File.Delete(tempFileName);
    7578      }
     
    9497        Assert.AreEqual(4, parser.Columns);
    9598        Assert.AreEqual(parser.Values[3][0], 3.14);
    96       } finally {
     99      }
     100      finally {
    97101        File.Delete(tempFileName);
    98102      }
     
    118122        Assert.AreEqual(4, parser.Columns);
    119123        Assert.AreEqual(parser.Values[3][0], 3.14);
    120       } finally {
     124      }
     125      finally {
    121126        File.Delete(tempFileName);
    122127      }
     
    141146        Assert.AreEqual(4, parser.Columns);
    142147        Assert.AreEqual((double)parser.Values[3][0], 3);
    143       } finally {
     148      }
     149      finally {
    144150        File.Delete(tempFileName);
    145151      }
     
    165171        Assert.AreEqual(4, parser.Columns);
    166172        Assert.AreEqual((double)parser.Values[3][0], 3);
    167       } finally {
     173      }
     174      finally {
    168175        File.Delete(tempFileName);
    169176      }
     
    188195        Assert.AreEqual(4, parser.Columns);
    189196        Assert.AreEqual((double)parser.Values[3][0], 3);
    190       } finally {
     197      }
     198      finally {
    191199        File.Delete(tempFileName);
    192200      }
     
    211219        Assert.AreEqual(4, parser.Columns);
    212220        Assert.AreEqual((double)parser.Values[3][0], 3);
    213       } finally {
     221      }
     222      finally {
    214223        File.Delete(tempFileName);
    215224      }
     
    235244        Assert.AreEqual(4, parser.Columns);
    236245        Assert.AreEqual((double)parser.Values[3][0], 3);
    237       } finally {
     246      }
     247      finally {
    238248        File.Delete(tempFileName);
    239249      }
     
    259269        Assert.AreEqual(4, parser.Columns);
    260270        Assert.AreEqual((double)parser.Values[3][0], 3);
    261       } finally {
     271      }
     272      finally {
    262273        File.Delete(tempFileName);
    263274      }
     
    283294        Assert.AreEqual(4, parser.Columns);
    284295        Assert.AreEqual((double)parser.Values[3][0], 3.14);
    285       } finally {
     296      }
     297      finally {
    286298        File.Delete(tempFileName);
    287299      }
     
    307319        Assert.AreEqual(4, parser.Columns);
    308320        Assert.AreEqual((double)parser.Values[3][0], 3.14);
    309       } finally {
     321      }
     322      finally {
    310323        File.Delete(tempFileName);
    311324      }
     
    330343        Assert.AreEqual(4, parser.Columns);
    331344        Assert.AreEqual((double)parser.Values[3][0], 3.14);
    332       } finally {
     345      }
     346      finally {
    333347        File.Delete(tempFileName);
    334348      }
     
    354368        Assert.AreEqual(4, parser.Columns);
    355369        Assert.AreEqual((double)parser.Values[3][0], 3.14);
    356       } finally {
     370      }
     371      finally {
    357372        File.Delete(tempFileName);
    358373      }
     
    377392        Assert.AreEqual(4, parser.Columns);
    378393        Assert.AreEqual((double)parser.Values[3][0], 3);
    379       } finally {
     394      }
     395      finally {
    380396        File.Delete(tempFileName);
    381397      }
     
    401417        Assert.AreEqual(4, parser.Columns);
    402418        Assert.AreEqual((double)parser.Values[3][0], 3);
    403       } finally {
     419      }
     420      finally {
    404421        File.Delete(tempFileName);
    405422      }
     
    424441        Assert.AreEqual(4, parser.Rows);
    425442        Assert.AreEqual(4, parser.Columns);
    426       } finally {
     443      }
     444      finally {
    427445        File.Delete(tempFileName);
    428446      }
     
    447465        Assert.AreEqual(4, parser.Columns);
    448466        Assert.AreEqual((double)parser.Values[3][0], 3.14);
    449       } finally {
     467      }
     468      finally {
    450469        File.Delete(tempFileName);
    451470      }
     
    471490        Assert.AreEqual(4, parser.Columns);
    472491        Assert.AreEqual((double)parser.Values[3][0], 3.14);
    473       } finally {
     492      }
     493      finally {
    474494        File.Delete(tempFileName);
    475495      }
     
    494514        Assert.AreEqual(4, parser.Columns);
    495515        Assert.AreEqual((double)parser.Values[3][0], 3.14);
    496       } finally {
     516      }
     517      finally {
    497518        File.Delete(tempFileName);
    498519      }
     
    518539        Assert.AreEqual(4, parser.Columns);
    519540        Assert.AreEqual((double)parser.Values[3][0], 3.14);
    520       } finally {
     541      }
     542      finally {
    521543        File.Delete(tempFileName);
    522544      }
     
    539561        Assert.AreEqual(3, parser.Rows);
    540562        Assert.AreEqual(4507, parser.Columns);
    541       } finally {
     563      }
     564      finally {
    542565        File.Delete(tempFileName);
    543566      }
     
    562585        Assert.AreEqual(4, parser.Columns);
    563586        Assert.AreEqual((double)parser.Values[3][0], 3);
    564       } finally {
     587      }
     588      finally {
    565589        File.Delete(tempFileName);
    566590      }
     
    586610        Assert.AreEqual(4, parser.Columns);
    587611        Assert.AreEqual((double)parser.Values[3][0], 3);
    588       } finally {
     612      }
     613      finally {
    589614        File.Delete(tempFileName);
    590615      }
     
    627652        Assert.IsTrue(double.IsPositiveInfinity((double)parser.Values[1][3])); // NOTE: in DE-DE NumberFormat just "unendlich" is not allowed (compare with InvariantCulture)
    628653        Assert.IsTrue(double.IsNegativeInfinity((double)parser.Values[1][4]));
    629       } finally {
     654      }
     655      finally {
    630656        File.Delete(tempFileName);
    631657      }
     
    664690        Assert.IsTrue(double.IsPositiveInfinity((double)parser.Values[1][3])); // NOTE: in InvariantCulture +Infinity is not allowed (compare with DE-DE)
    665691        Assert.IsTrue(double.IsNegativeInfinity((double)parser.Values[1][4]));
    666       } finally {
    667         File.Delete(tempFileName);
     692      }
     693      finally {
     694        File.Delete(tempFileName);
     695      }
     696    }
     697
     698
     699    [TestMethod]
     700    [TestCategory("Problems.Instances")]
     701    [TestProperty("Time", "short")]
     702    public void ParseWithTypeConversion() {
     703      // the parser tries to determine the column type (double, datetime, string) by looking at the values in the first few rows
     704      // if the values are of a different type then the type of the column is converted
     705      {
     706        // case 1
     707        // default for values is double and therefore a column with all missing values should be List<double> and contain NaN
     708        var tmpFileName = Path.GetTempFileName();
     709        WriteToFile(tmpFileName,
     710          @"stringCol,note
     711,missing val
     7123.14,double
     713");
     714
     715        TableFileParser parser = new TableFileParser();
     716        try {
     717          parser.Parse(tmpFileName,
     718            CultureInfo.InvariantCulture.NumberFormat,
     719            CultureInfo.InvariantCulture.DateTimeFormat,
     720            separator: ',', columnNamesInFirstLine: true);
     721          Assert.IsTrue(parser.Values[0] is List<double>);
     722          Assert.AreEqual(double.NaN, parser.Values[0][0]);
     723          Assert.AreEqual(3.14, parser.Values[0][1]);
     724        }
     725        finally {
     726          File.Delete(tmpFileName);
     727        }
     728
     729      }
     730
     731      {
     732        // case 2
     733        // 'The first missing values are replaced with double.NaN while the last ones with string.Empty.'
     734
     735        var tmpFileName = Path.GetTempFileName();
     736        WriteToFile(tmpFileName,
     737          @"stringCol,note
     738,missing val
     7393.14,
     740,missing val
     741str,a string --> column is converted to List<string>
     742,missing val
     743");
     744
     745        TableFileParser parser = new TableFileParser();
     746        try {
     747          parser.Parse(tmpFileName,
     748            CultureInfo.InvariantCulture.NumberFormat,
     749            CultureInfo.InvariantCulture.DateTimeFormat,
     750            separator: ',', columnNamesInFirstLine: true);
     751          Assert.IsTrue(parser.Values[0] is List<string>);
     752          Assert.AreEqual(string.Empty, parser.Values[0][0]);
     753          Assert.AreEqual("3.14", parser.Values[0][1]);
     754          Assert.AreEqual(string.Empty, parser.Values[0][2]);
     755          Assert.AreEqual("str", parser.Values[0][3]);
     756          Assert.AreEqual(string.Empty, parser.Values[0][4]);
     757        }
     758        finally {
     759          File.Delete(tmpFileName);
     760        }
     761      }
     762
     763      {
     764        // case 3
     765        // DateTime conversion to strings
     766        var tmpFileName = Path.GetTempFileName();
     767        WriteToFile(tmpFileName,
     768          @"stringCol,note
     769,missing val
     7703.1.2016,
     771,missing val
     772str,a string --> column is converted to List<string>
     773,missing val
     774");
     775
     776        TableFileParser parser = new TableFileParser();
     777        try {
     778          parser.Parse(tmpFileName,
     779            CultureInfo.InvariantCulture.NumberFormat,
     780            CultureInfo.InvariantCulture.DateTimeFormat,
     781            separator: ',', columnNamesInFirstLine: true);
     782          Assert.IsTrue(parser.Values[0] is List<string>);
     783          Assert.AreEqual(string.Empty, parser.Values[0][0]);
     784          Assert.AreEqual("3.1.2016", parser.Values[0][1]);
     785          Assert.AreEqual(string.Empty, parser.Values[0][2]);
     786          Assert.AreEqual("str", parser.Values[0][3]);
     787          Assert.AreEqual(string.Empty, parser.Values[0][4]);
     788        }
     789        finally {
     790          File.Delete(tmpFileName);
     791        }
     792      }
     793    }
     794
     795    [TestMethod]
     796    [TestCategory("Problems.Instances")]
     797    [TestProperty("Time", "short")]
     798    public void ParseDateTime() {
     799      {
     800        // case 1 dates and datetimes should be parsed as datetime column
     801        var tmpFileName = Path.GetTempFileName();
     802        WriteToFile(tmpFileName,
     803          @"stringCol,note
     80419.6.2016,date
     80519.6.2016 8:15,datetime
     806");
     807
     808        TableFileParser parser = new TableFileParser();
     809        try {
     810          parser.Parse(tmpFileName,
     811            CultureInfo.GetCultureInfo("de-de").NumberFormat,
     812            CultureInfo.GetCultureInfo("de-de").DateTimeFormat,
     813            separator: ',', columnNamesInFirstLine: true);
     814          Assert.IsTrue(parser.Values[0] is List<DateTime>);
     815          Assert.AreEqual(new DateTime(2016, 6, 19), parser.Values[0][0]);
     816          Assert.AreEqual(new DateTime(2016, 6, 19, 8, 15, 0), parser.Values[0][1]);
     817
     818          WriteToFile(tmpFileName,
     819            @"stringCol,note
     8202016/6/19,date
     8212016/6/19 8:15,datetime
     822");
     823
     824          parser.Parse(tmpFileName,
     825            CultureInfo.InvariantCulture.NumberFormat,
     826            CultureInfo.InvariantCulture.DateTimeFormat,
     827            separator: ',', columnNamesInFirstLine: true);
     828          Assert.IsTrue(parser.Values[0] is List<DateTime>);
     829          Assert.AreEqual(new DateTime(2016, 6, 19), parser.Values[0][0]);
     830          Assert.AreEqual(new DateTime(2016, 6, 19, 8, 15, 0), parser.Values[0][1]);
     831        }
     832
     833        finally {
     834          File.Delete(tmpFileName);
     835        }
     836      }
     837
     838      {
     839        // case 2 never parse time values as datetimes
     840        var tmpFileName = Path.GetTempFileName();
     841        WriteToFile(tmpFileName,
     842          @"stringCol,note
     8438:15,time value
     8449:40,time value
     845");
     846
     847        TableFileParser parser = new TableFileParser();
     848        try {
     849          parser.Parse(tmpFileName,
     850            CultureInfo.InvariantCulture.NumberFormat,
     851            CultureInfo.InvariantCulture.DateTimeFormat,
     852            separator: ',', columnNamesInFirstLine: true);
     853          Assert.IsTrue(parser.Values[0] is List<string>); // time values should be parsed as strings
     854          Assert.AreEqual("8:15", parser.Values[0][0]);
     855          Assert.AreEqual("9:40", parser.Values[0][1]);
     856        }
     857        finally {
     858          File.Delete(tmpFileName);
     859        }
     860      }
     861    }
     862
     863
     864
     865    [TestMethod]
     866    [TestCategory("Problems.Instances")]
     867    [TestProperty("Time", "short")]
     868    public void CheckTypeConversionAndLongFiles() {
     869      {
     870        // case 1 incorrect input after 500 rows should lead to exceptions
     871        var tmpFileName = Path.GetTempFileName();
     872        // create input data
     873        var sb = new StringBuilder();
     874        sb.AppendLine("col1,col2");
     875        for (int r = 0; r < 2000; r++) {
     876          sb.AppendLine("3.15, 3.15");
     877        }
     878        // add a row with only one input value
     879        sb.AppendLine("3.15");
     880
     881        WriteToFile(tmpFileName, sb.ToString());
     882
     883        TableFileParser parser = new TableFileParser();
     884        try {
     885          parser.Parse(tmpFileName,
     886            CultureInfo.InvariantCulture.NumberFormat,
     887            CultureInfo.InvariantCulture.DateTimeFormat,
     888            separator: ',', columnNamesInFirstLine: true);
     889          // Parse should fail with an exception
     890          Assert.Fail("expected exception TableFileParser.DataFormatException");
     891        }
     892        catch (IOException) {
     893          // ignore the expected exception
     894        }
     895
     896        finally {
     897          File.Delete(tmpFileName);
     898        }
     899      }
     900      {
     901        // case 2
     902        var tmpFileName = Path.GetTempFileName();
     903        // create input data
     904        var sb = new StringBuilder();
     905        sb.AppendLine("doubleCol,stringCol");
     906        for (int r = 0; r < 2000; r++) {
     907          sb.AppendLine("3.15, 3.15");
     908        }
     909        // add a row with a string value --> the column should be converted to string
     910        sb.AppendLine("3.15,str");
     911
     912        WriteToFile(tmpFileName, sb.ToString());
     913
     914        TableFileParser parser = new TableFileParser();
     915        try {
     916          parser.Parse(tmpFileName,
     917            CultureInfo.InvariantCulture.NumberFormat,
     918            CultureInfo.InvariantCulture.DateTimeFormat,
     919            separator: ',', columnNamesInFirstLine: true);
     920          Assert.IsTrue(parser.Values[0] is List<double>);
     921          Assert.IsTrue(parser.Values[1] is List<string>);
     922          Assert.AreEqual(parser.Values[1][0], "3.15");
     923          Assert.AreEqual(parser.Values[1][2000], "str");
     924        }
     925
     926        finally {
     927          File.Delete(tmpFileName);
     928        }
    668929      }
    669930    }
Note: See TracChangeset for help on using the changeset viewer.