Ignore:
Timestamp:
12/15/18 12:36:08 (11 months ago)
Author:
gkronber
Message:

#2892: Merging r15750:16382 (HEAD) from trunk to branch, resolving conflicts

Location:
branches/2892_LR-prediction-intervals
Files:
22 edited

Legend:

Unmodified
Added
Removed
  • branches/2892_LR-prediction-intervals

  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis

  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Dataset.cs

    r15583 r16388  
    3838    protected Dataset(Dataset original, Cloner cloner)
    3939      : base(original, cloner) {
     40      // no need to clone the variable values because these can't be modified
    4041      variableValues = new Dictionary<string, IList>(original.variableValues);
    4142      variableNames = new List<string>(original.variableNames);
    4243      rows = original.rows;
    4344    }
     45
    4446    public override IDeepCloneable Clone(Cloner cloner) { return new Dataset(this, cloner); }
    4547
     
    5860    /// <param name="variableValues">The values for the variables (column-oriented storage). Values are not cloned!</param>
    5961    public Dataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues)
    60       : base() {
     62      : this(variableNames, variableValues, cloneValues: true) {
     63    }
     64
     65    protected Dataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues, bool cloneValues = false) {
    6166      Name = "-";
    62       if (!variableNames.Any()) {
     67
     68      if (variableNames.Any()) {
     69        this.variableNames = new List<string>(variableNames);
     70      } else {
    6371        this.variableNames = Enumerable.Range(0, variableValues.Count()).Select(x => "Column " + x).ToList();
    64       } else if (variableNames.Count() != variableValues.Count()) {
    65         throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues");
    66       } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) {
    67         throw new ArgumentException("The number of values must be equal for every variable");
    68       } else if (variableNames.Distinct().Count() != variableNames.Count()) {
    69         var duplicateVariableNames =
    70           variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList();
    71         string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine;
    72         foreach (var duplicateVariableName in duplicateVariableNames)
    73           message += duplicateVariableName + Environment.NewLine;
    74         throw new ArgumentException(message);
    75       }
     72      }
     73      // check if the arguments are consistent (no duplicate variables, same number of rows, correct data types, ...)
     74      CheckArguments(this.variableNames, variableValues);
     75
    7676      rows = variableValues.First().Count;
    77       this.variableNames = new List<string>(variableNames);
    78       this.variableValues = new Dictionary<string, IList>(this.variableNames.Count);
    79       for (int i = 0; i < this.variableNames.Count; i++) {
    80         var values = variableValues.ElementAt(i);
    81         this.variableValues.Add(this.variableNames[i], values);
     77
     78      if (cloneValues) {
     79        this.variableValues = CloneValues(this.variableNames, variableValues);
     80      } else {
     81        this.variableValues = new Dictionary<string, IList>(this.variableNames.Count);
     82        for (int i = 0; i < this.variableNames.Count; i++) {
     83          var variableName = this.variableNames[i];
     84          var values = variableValues.ElementAt(i);
     85          this.variableValues.Add(variableName, values);
     86        }
    8287      }
    8388    }
     
    111116
    112117    public ModifiableDataset ToModifiable() {
    113       var values = new List<IList>();
    114       foreach (var v in variableNames) {
    115         if (VariableHasType<double>(v)) {
    116           values.Add(new List<double>((IList<double>)variableValues[v]));
    117         } else if (VariableHasType<string>(v)) {
    118           values.Add(new List<string>((IList<string>)variableValues[v]));
    119         } else if (VariableHasType<DateTime>(v)) {
    120           values.Add(new List<DateTime>((IList<DateTime>)variableValues[v]));
    121         } else {
    122           throw new ArgumentException("Unknown variable type.");
    123         }
    124       }
    125       return new ModifiableDataset(variableNames, values);
    126     }
     118      return new ModifiableDataset(variableNames, variableNames.Select(v => variableValues[v]), true);
     119    }
     120
    127121    /// <summary>
    128122    /// Shuffle a dataset's rows
     
    135129    }
    136130
    137     protected Dataset(Dataset dataset) : this(dataset.variableNames, dataset.variableValues.Values) { }
     131
    138132
    139133    #region Backwards compatible code, remove with 3.5
     
    173167      }
    174168    }
     169
     170    public bool ContainsVariable(string variableName) {
     171      return variableValues.ContainsKey(variableName);
     172    }
    175173    public IEnumerable<string> DoubleVariables {
    176174      get { return variableValues.Where(p => p.Value is IList<double>).Select(p => p.Key); }
     
    231229      return new ReadOnlyCollection<DateTime>(values);
    232230    }
    233 
    234 
    235231    private IEnumerable<T> GetValues<T>(string variableName, IEnumerable<int> rows) {
    236232      var values = GetValues<T>(variableName);
     
    248244      return variableValues[variableName] is IList<T>;
    249245    }
     246    protected Type GetVariableType(string variableName) {
     247      IList list;
     248      variableValues.TryGetValue(variableName, out list);
     249      if (list == null)
     250        throw new ArgumentException("The variable " + variableName + " does not exist in the dataset.");
     251      return GetElementType(list);
     252    }
     253    protected static Type GetElementType(IList list) {
     254      var type = list.GetType();
     255      return type.IsGenericType ? type.GetGenericArguments()[0] : type.GetElementType();
     256    }
     257    protected static bool IsAllowedType(IList list) {
     258      var type = GetElementType(list);
     259      return IsAllowedType(type);
     260    }
     261    protected static bool IsAllowedType(Type type) {
     262      return type == typeof(double) || type == typeof(string) || type == typeof(DateTime);
     263    }
     264
     265    protected static void CheckArguments(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) {
     266      if (variableNames.Count() != variableValues.Count()) {
     267        throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues");
     268      } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) {
     269        throw new ArgumentException("The number of values must be equal for every variable");
     270      } else if (variableNames.Distinct().Count() != variableNames.Count()) {
     271        var duplicateVariableNames =
     272          variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList();
     273        string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine;
     274        foreach (var duplicateVariableName in duplicateVariableNames)
     275          message += duplicateVariableName + Environment.NewLine;
     276        throw new ArgumentException(message);
     277      }
     278      // check if all the variables are supported
     279      foreach (var t in variableNames.Zip(variableValues, Tuple.Create)) {
     280        var variableName = t.Item1;
     281        var values = t.Item2;
     282
     283        if (!IsAllowedType(values)) {
     284          throw new ArgumentException(string.Format("Unsupported type {0} for variable {1}.", GetElementType(values), variableName));
     285        }
     286      }
     287    }
     288
     289    protected static Dictionary<string, IList> CloneValues(Dictionary<string, IList> variableValues) {
     290      return variableValues.ToDictionary(x => x.Key, x => CloneValues(x.Value));
     291    }
     292
     293    protected static Dictionary<string, IList> CloneValues(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) {
     294      return variableNames.Zip(variableValues, Tuple.Create).ToDictionary(x => x.Item1, x => CloneValues(x.Item2));
     295    }
     296
     297    protected static IList CloneValues(IList values) {
     298      var doubleValues = values as IList<double>;
     299      if (doubleValues != null) return new List<double>(doubleValues);
     300
     301      var stringValues = values as IList<string>;
     302      if (stringValues != null) return new List<string>(stringValues);
     303
     304      var dateTimeValues = values as IList<DateTime>;
     305      if (dateTimeValues != null) return new List<DateTime>(dateTimeValues);
     306
     307      throw new ArgumentException(string.Format("Unsupported variable type {0}.", GetElementType(values)));
     308    }
    250309
    251310    #region IStringConvertibleMatrix Members
    252311    [Storable]
    253     protected int rows;
     312    private int rows;
    254313    public int Rows {
    255314      get { return rows; }
     315      protected set { rows = value; }
    256316    }
    257317    int IStringConvertibleMatrix.Rows {
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationModel.cs

    r15583 r16388  
    6666    public abstract IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData);
    6767
     68    public virtual bool IsProblemDataCompatible(IClassificationProblemData problemData, out string errorMessage) {
     69      return IsProblemDataCompatible(this, problemData, out errorMessage);
     70    }
     71
     72    public override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) {
     73      if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
     74      var classificationProblemData = problemData as IClassificationProblemData;
     75      if (classificationProblemData == null)
     76        throw new ArgumentException("The problem data is not a regression problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
     77      return IsProblemDataCompatible(classificationProblemData, out errorMessage);
     78    }
     79
     80    public static bool IsProblemDataCompatible(IClassificationModel model, IClassificationProblemData problemData, out string errorMessage) {
     81      if (model == null) throw new ArgumentNullException("model", "The provided model is null.");
     82      if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
     83      errorMessage = string.Empty;
     84
     85      if (model.TargetVariable != problemData.TargetVariable)
     86        errorMessage = string.Format("The target variable of the model {0} does not match the target variable of the problemData {1}.", model.TargetVariable, problemData.TargetVariable);
     87
     88      var evaluationErrorMessage = string.Empty;
     89      var datasetCompatible = model.IsDatasetCompatible(problemData.Dataset, out evaluationErrorMessage);
     90      if (!datasetCompatible)
     91        errorMessage += evaluationErrorMessage;
     92
     93      return string.IsNullOrEmpty(errorMessage);
     94    }
     95
    6896    #region events
    6997    public event EventHandler TargetVariableChanged;
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationProblemData.cs

    r15583 r16388  
    467467    }
    468468    #endregion
    469 
    470     protected override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) {
    471       if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
    472       IClassificationProblemData classificationProblemData = problemData as IClassificationProblemData;
    473       if (classificationProblemData == null)
    474         throw new ArgumentException("The problem data is no classification problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
    475 
    476       var returnValue = base.IsProblemDataCompatible(classificationProblemData, out errorMessage);
    477       //check targetVariable
    478       if (classificationProblemData.InputVariables.All(var => var.Value != TargetVariable)) {
    479         errorMessage = string.Format("The target variable {0} is not present in the new problem data.", TargetVariable)
    480                        + Environment.NewLine + errorMessage;
    481         return false;
    482       }
    483 
    484       var newClassValues = classificationProblemData.Dataset.GetDoubleValues(TargetVariable).Distinct().OrderBy(x => x);
    485       if (!newClassValues.SequenceEqual(ClassValues)) {
    486         errorMessage = errorMessage + string.Format("The class values differ in the provided classification problem data.");
    487         returnValue = false;
    488       }
    489 
    490       var newPositivieClassName = classificationProblemData.PositiveClass;
    491       if (newPositivieClassName != PositiveClass) {
    492         errorMessage = errorMessage + string.Format("The positive class differs in the provided classification problem data.");
    493         returnValue = false;
    494       }
    495 
    496       return returnValue;
    497     }
    498 
    499     public override void AdjustProblemDataProperties(IDataAnalysisProblemData problemData) {
    500       if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
    501       ClassificationProblemData classificationProblemData = problemData as ClassificationProblemData;
    502       if (classificationProblemData == null)
    503         throw new ArgumentException("The problem data is not a classification problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
    504 
    505       base.AdjustProblemDataProperties(problemData);
    506       TargetVariable = classificationProblemData.TargetVariable;
    507       for (int i = 0; i < classificationProblemData.ClassNames.Count(); i++)
    508         ClassNamesParameter.Value[i, 0] = classificationProblemData.ClassNames.ElementAt(i);
    509 
    510       PositiveClass = classificationProblemData.PositiveClass;
    511 
    512       for (int i = 0; i < Classes; i++) {
    513         for (int j = 0; j < Classes; j++) {
    514           ClassificationPenaltiesParameter.Value[i, j] = classificationProblemData.GetClassificationPenalty(ClassValuesCache[i], ClassValuesCache[j]);
    515         }
    516       }
    517     }
    518469  }
    519470}
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolutionBase.cs

    r15583 r16388  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324using System.Linq;
     
    4445    public new IClassificationProblemData ProblemData {
    4546      get { return (IClassificationProblemData)base.ProblemData; }
    46       set { base.ProblemData = value; }
     47      set {
     48        if (value == null) throw new ArgumentNullException("The problemData must not be null.");
     49        string errorMessage = string.Empty;
     50        if (!Model.IsProblemDataCompatible(value, out errorMessage)) throw new ArgumentException(errorMessage);
     51
     52        base.ProblemData = value;
     53      }
    4754    }
    4855
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolutionVariableImpactsCalculator.cs

    r15674 r16388  
    100100      var problemData = solution.ProblemData;
    101101      var dataset = problemData.Dataset;
     102      var model = (IClassificationModel)solution.Model.Clone(); //mkommend: clone of model is necessary, because the thresholds for IDiscriminantClassificationModels are updated
    102103
    103104      IEnumerable<int> rows;
     
    137138      // calculate impacts for double variables
    138139      foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType<double>)) {
    139         var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacementMethod);
     140        var newEstimates = EvaluateModelWithReplacedVariable(model, inputVariable, modifiableDataset, rows, replacementMethod);
    140141        var newAccuracy = OnlineAccuracyCalculator.Calculate(targetValues, newEstimates, out error);
    141142        if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation with replaced inputs.");
     
    150151          var smallestImpact = double.PositiveInfinity;
    151152          foreach (var repl in problemData.Dataset.GetStringValues(inputVariable, rows).Distinct()) {
    152             var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows,
     153            var newEstimates = EvaluateModelWithReplacedVariable(model, inputVariable, modifiableDataset, rows,
    153154              Enumerable.Repeat(repl, dataset.Rows));
    154155            var newAccuracy = OnlineAccuracyCalculator.Calculate(targetValues, newEstimates, out error);
     
    164165          // calculate impacts for factor variables
    165166
    166           var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows,
     167          var newEstimates = EvaluateModelWithReplacedVariable(model, inputVariable, modifiableDataset, rows,
    167168            factorReplacementMethod);
    168169          var newAccuracy = OnlineAccuracyCalculator.Calculate(targetValues, newEstimates, out error);
     
    263264      var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();
    264265      dataset.ReplaceVariable(variable, replacementValues.ToList());
     266
     267      var discModel = model as IDiscriminantFunctionClassificationModel;
     268      if (discModel != null) {
     269        var problemData = new ClassificationProblemData(dataset, dataset.VariableNames, model.TargetVariable);
     270        discModel.RecalculateModelParameters(problemData, rows);
     271      }
     272
    265273      //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
    266274      var estimates = model.GetEstimatedClassValues(dataset, rows).ToList();
     
    273281      var originalValues = dataset.GetReadOnlyStringValues(variable).ToList();
    274282      dataset.ReplaceVariable(variable, replacementValues.ToList());
     283
     284
     285      var discModel = model as IDiscriminantFunctionClassificationModel;
     286      if (discModel != null) {
     287        var problemData = new ClassificationProblemData(dataset, dataset.VariableNames, model.TargetVariable);
     288        discModel.RecalculateModelParameters(problemData, rows);
     289      }
     290
    275291      //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
    276292      var estimates = model.GetEstimatedClassValues(dataset, rows).ToList();
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/ConstantModel.cs

    r15583 r16388  
    8383    }
    8484
     85    public virtual bool IsProblemDataCompatible(IClassificationProblemData problemData, out string errorMessage) {
     86      return ClassificationModel.IsProblemDataCompatible(this, problemData, out errorMessage);
     87    }
     88
     89    public override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) {
     90      if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
     91
     92      var regressionProblemData = problemData as IRegressionProblemData;
     93      if (regressionProblemData != null)
     94        return IsProblemDataCompatible(regressionProblemData, out errorMessage);
     95
     96      var classificationProblemData = problemData as IClassificationProblemData;
     97      if (classificationProblemData != null)
     98        return IsProblemDataCompatible(classificationProblemData, out errorMessage);
     99
     100      throw new ArgumentException("The problem data is not a regression nor a classification problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
     101    }
     102
    85103    #region IStringConvertibleValue
    86104    public bool ReadOnly { get; private set; }
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisModel.cs

    r15583 r16388  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324using HeuristicLab.Common;
     
    3839
    3940    public abstract IEnumerable<string> VariablesUsedForPrediction { get; }
     41
     42    public virtual bool IsDatasetCompatible(IDataset dataset, out string errorMessage) {
     43      if (dataset == null) throw new ArgumentNullException("dataset", "The provided dataset is null.");
     44      return IsDatasetCompatible(this, dataset, out errorMessage);
     45    }
     46
     47    public abstract bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage);
     48
     49    public static bool IsDatasetCompatible(IDataAnalysisModel model, IDataset dataset, out string errorMessage) {
     50      if(model == null) throw new ArgumentNullException("model", "The provided model is null.");
     51      if (dataset == null) throw new ArgumentNullException("dataset", "The provided dataset is null.");
     52      errorMessage = string.Empty;
     53
     54      foreach (var variable in model.VariablesUsedForPrediction) {
     55        if (!dataset.ContainsVariable(variable)) {
     56          if (string.IsNullOrEmpty(errorMessage)) {
     57            errorMessage = "The following variables must be present in the dataset for model evaluation:";
     58          }
     59          errorMessage += System.Environment.NewLine + " " + variable;
     60        }
     61      }
     62
     63      return string.IsNullOrEmpty(errorMessage);
     64    }
    4065  }
    4166}
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs

    r15583 r16388  
    163163
    164164      var variables = dataset.VariableNames.Where(variable => dataset.VariableHasType<double>(variable) || dataset.VariableHasType<string>(variable));
    165       var inputVariables = new CheckedItemList<StringValue>(variables.Select(x => new StringValue(x)));
     165      var inputVariables = new CheckedItemList<StringValue>(variables.Select(x => new StringValue(x).AsReadOnly()));
    166166      foreach (StringValue x in inputVariables)
    167167        inputVariables.SetItemCheckedState(x, allowedInputVariables.Contains(x.Value));
     
    207207      if (listeners != null) listeners(this, EventArgs.Empty);
    208208    }
    209 
    210     protected virtual bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) {
    211       errorMessage = string.Empty;
    212       if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
    213 
    214       //check allowed input variables
    215       StringBuilder message = new StringBuilder();
    216       var variables = new HashSet<string>(problemData.InputVariables.Select(x => x.Value));
    217       foreach (var item in AllowedInputVariables) {
    218         if (!variables.Contains(item))
    219           message.AppendLine("Input variable '" + item + "' is not present in the new problem data.");
    220       }
    221 
    222       if (message.Length != 0) {
    223         errorMessage = message.ToString();
    224         return false;
    225       }
    226       return true;
    227 
    228     }
    229 
    230     public virtual void AdjustProblemDataProperties(IDataAnalysisProblemData problemData) {
    231       DataAnalysisProblemData data = problemData as DataAnalysisProblemData;
    232       if (data == null) throw new ArgumentException("The problem data is not a data analysis problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
    233 
    234       string errorMessage;
    235       if (!data.IsProblemDataCompatible(this, out errorMessage)) {
    236         throw new InvalidOperationException(errorMessage);
    237       }
    238 
    239       foreach (var inputVariable in InputVariables) {
    240         var variable = data.InputVariables.FirstOrDefault(i => i.Value == inputVariable.Value);
    241         InputVariables.SetItemCheckedState(inputVariable, variable != null && data.InputVariables.ItemChecked(variable));
    242       }
    243     }
    244209  }
    245210}
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisSolution.cs

    r15583 r16388  
    5858      get { return (IDataAnalysisProblemData)this[ProblemDataResultName].Value; }
    5959      set {
    60         if (this[ProblemDataResultName].Value != value) {
    61           if (value != null) {
    62             ProblemData.Changed -= new EventHandler(ProblemData_Changed);
    63             this[ProblemDataResultName].Value = value;
    64             ProblemData.Changed += new EventHandler(ProblemData_Changed);
    65             OnProblemDataChanged();
    66           }
    67         }
     60        if (value == null) throw new ArgumentNullException("The problemData must not be null.");
     61        if (this[ProblemDataResultName].Value == value) return;
     62        string errorMessage = string.Empty;
     63        if (!Model.IsProblemDataCompatible(value, out errorMessage)) throw new ArgumentException(errorMessage);
     64
     65        ProblemData.Changed -= new EventHandler(ProblemData_Changed);
     66        this[ProblemDataResultName].Value = value;
     67        ProblemData.Changed += new EventHandler(ProblemData_Changed);
     68        OnProblemDataChanged();
    6869      }
    6970    }
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionModel.cs

    r15583 r16388  
    6767    public abstract IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData);
    6868
     69    public virtual bool IsProblemDataCompatible(IRegressionProblemData problemData, out string errorMessage) {
     70      return IsProblemDataCompatible(this, problemData, out errorMessage);
     71    }
     72
     73    public override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) {
     74      if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
     75      var regressionProblemData = problemData as IRegressionProblemData;
     76      if (regressionProblemData == null)
     77        throw new ArgumentException("The problem data is not a regression problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
     78      return IsProblemDataCompatible(regressionProblemData, out errorMessage);
     79    }
     80
     81    public static bool IsProblemDataCompatible(IRegressionModel model, IRegressionProblemData problemData, out string errorMessage) {
     82      if (model == null) throw new ArgumentNullException("model", "The provided model is null.");
     83      if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
     84      errorMessage = string.Empty;
     85
     86      if (model.TargetVariable != problemData.TargetVariable)
     87        errorMessage = string.Format("The target variable of the model {0} does not match the target variable of the problemData {1}.", model.TargetVariable, problemData.TargetVariable);
     88
     89      var evaluationErrorMessage = string.Empty;
     90      var datasetCompatible = model.IsDatasetCompatible(problemData.Dataset, out evaluationErrorMessage);
     91      if (!datasetCompatible)
     92        errorMessage += evaluationErrorMessage;
     93
     94      return string.IsNullOrEmpty(errorMessage);
     95    }
     96
    6997    #region events
    7098    public event EventHandler TargetVariableChanged;
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionProblemData.cs

    r15583 r16388  
    161161      OnChanged();
    162162    }
    163 
    164     protected override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) {
    165       if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
    166       IRegressionProblemData regressionProblemData = problemData as IRegressionProblemData;
    167       if (regressionProblemData == null)
    168         throw new ArgumentException("The problem data is not a regression problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
    169 
    170       var returnValue = base.IsProblemDataCompatible(problemData, out errorMessage);
    171       return returnValue;
    172     }
    173 
    174     public override void AdjustProblemDataProperties(IDataAnalysisProblemData problemData) {
    175       if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
    176       RegressionProblemData regressionProblemData = problemData as RegressionProblemData;
    177       if (regressionProblemData == null)
    178         throw new ArgumentException("The problem data is not a regression problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
    179 
    180       base.AdjustProblemDataProperties(problemData);
    181     }
    182163  }
    183164}
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionBase.cs

    r15583 r16388  
    7070    public new IRegressionProblemData ProblemData {
    7171      get { return (IRegressionProblemData)base.ProblemData; }
    72       set { base.ProblemData = value; }
     72      set {
     73        if (value == null) throw new ArgumentNullException("The problemData must not be null.");
     74        string errorMessage = string.Empty;
     75        if (!Model.IsProblemDataCompatible(value, out errorMessage)) throw new ArgumentException(errorMessage);
     76
     77        base.ProblemData = value;
     78      }
    7379    }
    7480
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionVariableImpactsCalculator.cs

    r15673 r16388  
    5252      All
    5353    }
    54    
     54
    5555    private const string ReplacementParameterName = "Replacement Method";
    5656    private const string DataPartitionParameterName = "DataPartition";
     
    9696      DataPartitionEnum data = DataPartitionEnum.Training,
    9797      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Median,
    98       FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best) {
     98      FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best,
     99      Func<double, string, bool> progressCallback = null) {
    99100
    100101      var problemData = solution.ProblemData;
     
    134135      var allowedInputVariables = dataset.VariableNames.Where(v => inputvariables.Contains(v)).ToList();
    135136
     137      int curIdx = 0;
     138      int count = allowedInputVariables.Where(problemData.Dataset.VariableHasType<double>).Count();
    136139      // calculate impacts for double variables
    137140      foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType<double>)) {
     141        //Report the current progress in percent. If the callback returns true, it means the execution shall be stopped
     142        if (progressCallback != null) {
     143          curIdx++;
     144          if (progressCallback((double)curIdx / count, string.Format("Calculating impact for variable {0} ({1} of {2})", inputVariable, curIdx, count))) { return null; }
     145        }
    138146        var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacementMethod);
    139147        var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error);
     
    180188    }
    181189
     190
    182191    private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable<int> rows, ReplacementMethodEnum replacement = ReplacementMethodEnum.Median) {
    183192      var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/TimeSeriesPrognosis/TimeSeriesPrognosisProblemData.cs

    r15583 r16388  
    16201620      OnChanged();
    16211621    }
    1622 
    1623     protected override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) {
    1624       if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
    1625       ITimeSeriesPrognosisProblemData timeseriesProblemData = problemData as ITimeSeriesPrognosisProblemData;
    1626       if (timeseriesProblemData == null)
    1627         throw new ArgumentException("The problem data is not a time-series problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
    1628 
    1629       var returnValue = base.IsProblemDataCompatible(problemData, out errorMessage);
    1630       //check targetVariable
    1631       if (problemData.InputVariables.All(var => var.Value != TargetVariable)) {
    1632         errorMessage = string.Format("The target variable {0} is not present in the new problem data.", TargetVariable)
    1633                        + Environment.NewLine + errorMessage;
    1634         return false;
    1635       }
    1636       return returnValue;
    1637     }
    1638 
    1639     public override void AdjustProblemDataProperties(IDataAnalysisProblemData problemData) {
    1640       TimeSeriesPrognosisProblemData timeSeriesProblemData = problemData as TimeSeriesPrognosisProblemData;
    1641       if (timeSeriesProblemData == null)
    1642         throw new ArgumentException("The problem data is not a timeseries problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
    1643 
    1644       var trainingDataStart = TrainingIndices.First();
    1645 
    1646       base.AdjustProblemDataProperties(problemData);
    1647 
    1648       TestPartition.Start = trainingDataStart;
    1649 
    1650       TrainingHorizon = timeSeriesProblemData.TrainingHorizon;
    1651       TestHorizon = timeSeriesProblemData.TestHorizon;
    1652     }
    1653 
    16541622  }
    16551623}
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/Classification/IClassificationModel.cs

    r15583 r16388  
    3131    IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows);
    3232    IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData);
     33    bool IsProblemDataCompatible(IClassificationProblemData problemData, out string errorMessage);
    3334    string TargetVariable { get; set; }
    3435    event EventHandler TargetVariableChanged;
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisModel.cs

    r15583 r16388  
    3030  public interface IDataAnalysisModel : INamedItem {
    3131    IEnumerable<string> VariablesUsedForPrediction { get; }
     32    bool IsDatasetCompatible(IDataset dataset, out string errorMessage);
     33    bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage);
    3234  }
    3335}
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisProblemData.cs

    r15583 r16388  
    4949
    5050    event EventHandler Changed;
    51 
    52     void AdjustProblemDataProperties(IDataAnalysisProblemData problemData);
    5351  }
    5452}
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataset.cs

    r15583 r16388  
    3333    IEnumerable<string> DateTimeVariables { get; }
    3434
     35    bool ContainsVariable(string variablename);
    3536    bool VariableHasType<T>(string variableName);
    3637
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/Regression/IRegressionModel.cs

    r15583 r16388  
    3131    IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows);
    3232    IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData);
     33    bool IsProblemDataCompatible(IRegressionProblemData problemData, out string errorMessage);
    3334    string TargetVariable { get; set; }
    3435    event EventHandler TargetVariableChanged;
  • branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/ModifiableDataset.cs

    r15583 r16388  
    3939
    4040    private ModifiableDataset(ModifiableDataset original, Cloner cloner) : base(original, cloner) {
    41       var variables = variableValues.Keys.ToList();
    42       foreach (var v in variables) {
    43         var type = GetVariableType(v);
    44         if (type == typeof(DateTime)) {
    45           variableValues[v] = GetDateTimeValues(v).ToList();
    46         } else if (type == typeof(double)) {
    47           variableValues[v] = GetDoubleValues(v).ToList();
    48         } else if (type == typeof(string)) {
    49           variableValues[v] = GetStringValues(v).ToList();
    50         } else {
    51           throw new ArgumentException("Unsupported type " + type + " for variable " + v);
     41      variableNames = new List<string>(original.variableNames);
     42      variableValues = CloneValues(original.variableValues);
     43    }
     44
     45    public override IDeepCloneable Clone(Cloner cloner) { return new ModifiableDataset(this, cloner); }
     46
     47    public ModifiableDataset() { }
     48
     49    public ModifiableDataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues, bool cloneValues = false) :
     50      base(variableNames, variableValues, cloneValues) { }
     51
     52    public Dataset ToDataset() {
     53      return new Dataset(variableNames, variableNames.Select(v => variableValues[v]));
     54    }
     55
     56
     57    public IEnumerable<object> GetRow(int row) {
     58      if (row < 0 || row >= Rows)
     59        throw new ArgumentException(string.Format("Invalid row {0} specified. The dataset contains {1} row(s).", row, Rows));
     60
     61      return variableValues.Select(x => x.Value[row]);
     62    }
     63
     64    public void AddRow(IEnumerable<object> values) {
     65      var list = values.ToList();
     66      if (list.Count != variableNames.Count)
     67        throw new ArgumentException("The number of values must be equal to the number of variable names.");
     68      // check if all the values are of the correct type
     69      for (int i = 0; i < list.Count; ++i) {
     70        if (list[i].GetType() != GetVariableType(variableNames[i])) {
     71          throw new ArgumentException("The type of the provided value does not match the variable type.");
    5272        }
    5373      }
    54     }
    55     public override IDeepCloneable Clone(Cloner cloner) { return new ModifiableDataset(this, cloner); }
    56     public ModifiableDataset() : base() { }
    57 
    58     public ModifiableDataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) : base(variableNames, variableValues) { }
     74      // add values
     75      for (int i = 0; i < list.Count; ++i) {
     76        variableValues[variableNames[i]].Add(list[i]);
     77      }
     78      Rows++;
     79      OnRowsChanged();
     80      OnReset();
     81    }
    5982
    6083    public void ReplaceRow(int row, IEnumerable<object> values) {
     
    7295        variableValues[variableNames[i]][row] = list[i];
    7396      }
     97      OnReset();
     98    }
     99
     100    // slow, avoid using this
     101    public void RemoveRow(int row) {
     102      foreach (var list in variableValues.Values)
     103        list.RemoveAt(row);
     104      Rows--;
     105      OnRowsChanged();
     106      OnReset();
     107    }
     108
     109    // adds a new variable to the dataset
     110    public void AddVariable(string variableName, IList values) {
     111      InsertVariable(variableName, Columns, values);
     112    }
     113
     114    public void InsertVariable(string variableName, int position, IList values) {
     115      if (variableValues.ContainsKey(variableName))
     116        throw new ArgumentException(string.Format("Variable {0} is already present in the dataset.", variableName));
     117
     118      if (position < 0 || position > Columns)
     119        throw new ArgumentException(string.Format("Incorrect position {0} specified. The position must be between 0 and {1}.", position, Columns));
     120
     121      if (values == null)
     122        throw new ArgumentNullException("values", "Values must not be null. At least an empty list of values has to be provided.");
     123
     124      if (values.Count != Rows)
     125        throw new ArgumentException(string.Format("{0} values are provided, but {1} rows are present in the dataset.", values.Count, Rows));
     126
     127      if (!IsAllowedType(values))
     128        throw new ArgumentException(string.Format("Unsupported type {0} for variable {1}.", GetElementType(values), variableName));
     129
     130      variableNames.Insert(position, variableName);
     131      variableValues[variableName] = values;
     132
     133      OnColumnsChanged();
     134      OnColumnNamesChanged();
    74135      OnReset();
    75136    }
     
    85146    }
    86147
    87     public void AddRow(IEnumerable<object> values) {
    88       var list = values.ToList();
    89       if (list.Count != variableNames.Count)
    90         throw new ArgumentException("The number of values must be equal to the number of variable names.");
    91       // check if all the values are of the correct type
    92       for (int i = 0; i < list.Count; ++i) {
    93         if (list[i].GetType() != GetVariableType(variableNames[i])) {
    94           throw new ArgumentException("The type of the provided value does not match the variable type.");
    95         }
    96       }
    97       // add values
    98       for (int i = 0; i < list.Count; ++i) {
    99         variableValues[variableNames[i]].Add(list[i]);
    100       }
    101       rows++;
    102       OnRowsChanged();
    103       OnReset();
    104     }
    105 
    106     // adds a new variable to the dataset
    107     public void AddVariable<T>(string variableName, IEnumerable<T> values) {
    108       if (variableValues.ContainsKey(variableName))
    109         throw new ArgumentException("Variable " + variableName + " is already present in the dataset.");
    110       int count = values.Count();
    111       if (count != rows)
    112         throw new ArgumentException("The number of values must exactly match the number of rows in the dataset.");
    113       variableValues[variableName] = new List<T>(values);
    114       variableNames.Add(variableName);
    115       OnColumnsChanged();
    116       OnColumnNamesChanged();
    117       OnReset();
    118     }
    119148
    120149    public void RemoveVariable(string variableName) {
    121150      if (!variableValues.ContainsKey(variableName))
    122         throw new ArgumentException("The variable " + variableName + " does not exist in the dataset.");
     151        throw new ArgumentException(string.Format("The variable {0} does not exist in the dataset.", variableName));
    123152      variableValues.Remove(variableName);
    124153      variableNames.Remove(variableName);
     
    128157    }
    129158
    130     // slow, avoid to use this
    131     public void RemoveRow(int row) {
    132       foreach (var list in variableValues.Values)
    133         list.RemoveAt(row);
    134       rows--;
     159    public void ClearValues() {
     160      foreach (var list in variableValues.Values) {
     161        list.Clear();
     162      }
     163      Rows = 0;
    135164      OnRowsChanged();
    136165      OnReset();
    137166    }
     167
    138168
    139169    public void SetVariableValue(object value, string variableName, int row) {
     
    151181    }
    152182
    153     private Type GetVariableType(string variableName) {
    154       IList list;
    155       variableValues.TryGetValue(variableName, out list);
    156       if (list == null)
    157         throw new ArgumentException("The variable " + variableName + " does not exist in the dataset.");
    158       return list.GetType().GetGenericArguments()[0];
    159     }
    160 
    161183    bool IStringConvertibleMatrix.SetValue(string value, int rowIndex, int columnIndex) {
    162184      var variableName = variableNames[columnIndex];
Note: See TracChangeset for help on using the changeset viewer.