- Timestamp:
- 12/15/18 12:36:08 (6 years ago)
- Location:
- branches/2892_LR-prediction-intervals
- Files:
-
- 22 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2892_LR-prediction-intervals
- Property svn:ignore
-
old new 1 *.docstates 2 *.psess 3 *.resharper 4 *.suo 5 *.user 6 *.vsp 7 Doxygen 8 FxCopResults.txt 9 Google.ProtocolBuffers-0.9.1.dll 10 Google.ProtocolBuffers-2.4.1.473.dll 11 HeuristicLab 3.3.5.1.ReSharper.user 12 HeuristicLab 3.3.6.0.ReSharper.user 13 HeuristicLab.4.5.resharper.user 14 HeuristicLab.ExtLibs.6.0.ReSharper.user 15 HeuristicLab.Scripting.Development 16 HeuristicLab.resharper.user 17 ProtoGen.exe 1 18 TestResults 19 _ReSharper.HeuristicLab 20 _ReSharper.HeuristicLab 3.3 21 _ReSharper.HeuristicLab 3.3 Tests 22 _ReSharper.HeuristicLab.ExtLibs 23 bin 24 protoc.exe 25 obj 26 .vs
-
- Property svn:mergeinfo changed
-
Property
svn:global-ignores
set to
*.nuget
packages
- Property svn:ignore
-
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis
- Property svn:mergeinfo changed
-
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Dataset.cs
r15583 r16388 38 38 protected Dataset(Dataset original, Cloner cloner) 39 39 : base(original, cloner) { 40 // no need to clone the variable values because these can't be modified 40 41 variableValues = new Dictionary<string, IList>(original.variableValues); 41 42 variableNames = new List<string>(original.variableNames); 42 43 rows = original.rows; 43 44 } 45 44 46 public override IDeepCloneable Clone(Cloner cloner) { return new Dataset(this, cloner); } 45 47 … … 58 60 /// <param name="variableValues">The values for the variables (column-oriented storage). Values are not cloned!</param> 59 61 public Dataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) 60 : base() { 62 : this(variableNames, variableValues, cloneValues: true) { 63 } 64 65 protected Dataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues, bool cloneValues = false) { 61 66 Name = "-"; 62 if (!variableNames.Any()) { 67 68 if (variableNames.Any()) { 69 this.variableNames = new List<string>(variableNames); 70 } else { 63 71 this.variableNames = Enumerable.Range(0, variableValues.Count()).Select(x => "Column " + x).ToList(); 64 } else if (variableNames.Count() != variableValues.Count()) { 65 throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues"); 66 } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) { 67 throw new ArgumentException("The number of values must be equal for every variable"); 68 } else if (variableNames.Distinct().Count() != variableNames.Count()) { 69 var duplicateVariableNames = 70 variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList(); 71 string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine; 72 foreach (var duplicateVariableName in duplicateVariableNames) 73 message += duplicateVariableName + Environment.NewLine; 74 throw new ArgumentException(message); 75 } 72 } 73 // check if the arguments are consistent (no duplicate variables, same number of rows, correct data types, ...) 74 CheckArguments(this.variableNames, variableValues); 75 76 76 rows = variableValues.First().Count; 77 this.variableNames = new List<string>(variableNames); 78 this.variableValues = new Dictionary<string, IList>(this.variableNames.Count); 79 for (int i = 0; i < this.variableNames.Count; i++) { 80 var values = variableValues.ElementAt(i); 81 this.variableValues.Add(this.variableNames[i], values); 77 78 if (cloneValues) { 79 this.variableValues = CloneValues(this.variableNames, variableValues); 80 } else { 81 this.variableValues = new Dictionary<string, IList>(this.variableNames.Count); 82 for (int i = 0; i < this.variableNames.Count; i++) { 83 var variableName = this.variableNames[i]; 84 var values = variableValues.ElementAt(i); 85 this.variableValues.Add(variableName, values); 86 } 82 87 } 83 88 } … … 111 116 112 117 public ModifiableDataset ToModifiable() { 113 var values = new List<IList>(); 114 foreach (var v in variableNames) { 115 if (VariableHasType<double>(v)) { 116 values.Add(new List<double>((IList<double>)variableValues[v])); 117 } else if (VariableHasType<string>(v)) { 118 values.Add(new List<string>((IList<string>)variableValues[v])); 119 } else if (VariableHasType<DateTime>(v)) { 120 values.Add(new List<DateTime>((IList<DateTime>)variableValues[v])); 121 } else { 122 throw new ArgumentException("Unknown variable type."); 123 } 124 } 125 return new ModifiableDataset(variableNames, values); 126 } 118 return new ModifiableDataset(variableNames, variableNames.Select(v => variableValues[v]), true); 119 } 120 127 121 /// <summary> 128 122 /// Shuffle a dataset's rows … … 135 129 } 136 130 137 protected Dataset(Dataset dataset) : this(dataset.variableNames, dataset.variableValues.Values) { } 131 138 132 139 133 #region Backwards compatible code, remove with 3.5 … … 173 167 } 174 168 } 169 170 public bool ContainsVariable(string variableName) { 171 return variableValues.ContainsKey(variableName); 172 } 175 173 public IEnumerable<string> DoubleVariables { 176 174 get { return variableValues.Where(p => p.Value is IList<double>).Select(p => p.Key); } … … 231 229 return new ReadOnlyCollection<DateTime>(values); 232 230 } 233 234 235 231 private IEnumerable<T> GetValues<T>(string variableName, IEnumerable<int> rows) { 236 232 var values = GetValues<T>(variableName); … … 248 244 return variableValues[variableName] is IList<T>; 249 245 } 246 protected Type GetVariableType(string variableName) { 247 IList list; 248 variableValues.TryGetValue(variableName, out list); 249 if (list == null) 250 throw new ArgumentException("The variable " + variableName + " does not exist in the dataset."); 251 return GetElementType(list); 252 } 253 protected static Type GetElementType(IList list) { 254 var type = list.GetType(); 255 return type.IsGenericType ? type.GetGenericArguments()[0] : type.GetElementType(); 256 } 257 protected static bool IsAllowedType(IList list) { 258 var type = GetElementType(list); 259 return IsAllowedType(type); 260 } 261 protected static bool IsAllowedType(Type type) { 262 return type == typeof(double) || type == typeof(string) || type == typeof(DateTime); 263 } 264 265 protected static void CheckArguments(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) { 266 if (variableNames.Count() != variableValues.Count()) { 267 throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues"); 268 } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) { 269 throw new ArgumentException("The number of values must be equal for every variable"); 270 } else if (variableNames.Distinct().Count() != variableNames.Count()) { 271 var duplicateVariableNames = 272 variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList(); 273 string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine; 274 foreach (var duplicateVariableName in duplicateVariableNames) 275 message += duplicateVariableName + Environment.NewLine; 276 throw new ArgumentException(message); 277 } 278 // check if all the variables are supported 279 foreach (var t in variableNames.Zip(variableValues, Tuple.Create)) { 280 var variableName = t.Item1; 281 var values = t.Item2; 282 283 if (!IsAllowedType(values)) { 284 throw new ArgumentException(string.Format("Unsupported type {0} for variable {1}.", GetElementType(values), variableName)); 285 } 286 } 287 } 288 289 protected static Dictionary<string, IList> CloneValues(Dictionary<string, IList> variableValues) { 290 return variableValues.ToDictionary(x => x.Key, x => CloneValues(x.Value)); 291 } 292 293 protected static Dictionary<string, IList> CloneValues(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) { 294 return variableNames.Zip(variableValues, Tuple.Create).ToDictionary(x => x.Item1, x => CloneValues(x.Item2)); 295 } 296 297 protected static IList CloneValues(IList values) { 298 var doubleValues = values as IList<double>; 299 if (doubleValues != null) return new List<double>(doubleValues); 300 301 var stringValues = values as IList<string>; 302 if (stringValues != null) return new List<string>(stringValues); 303 304 var dateTimeValues = values as IList<DateTime>; 305 if (dateTimeValues != null) return new List<DateTime>(dateTimeValues); 306 307 throw new ArgumentException(string.Format("Unsupported variable type {0}.", GetElementType(values))); 308 } 250 309 251 310 #region IStringConvertibleMatrix Members 252 311 [Storable] 253 pr otectedint rows;312 private int rows; 254 313 public int Rows { 255 314 get { return rows; } 315 protected set { rows = value; } 256 316 } 257 317 int IStringConvertibleMatrix.Rows { -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationModel.cs
r15583 r16388 66 66 public abstract IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData); 67 67 68 public virtual bool IsProblemDataCompatible(IClassificationProblemData problemData, out string errorMessage) { 69 return IsProblemDataCompatible(this, problemData, out errorMessage); 70 } 71 72 public override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) { 73 if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null."); 74 var classificationProblemData = problemData as IClassificationProblemData; 75 if (classificationProblemData == null) 76 throw new ArgumentException("The problem data is not a regression problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData"); 77 return IsProblemDataCompatible(classificationProblemData, out errorMessage); 78 } 79 80 public static bool IsProblemDataCompatible(IClassificationModel model, IClassificationProblemData problemData, out string errorMessage) { 81 if (model == null) throw new ArgumentNullException("model", "The provided model is null."); 82 if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null."); 83 errorMessage = string.Empty; 84 85 if (model.TargetVariable != problemData.TargetVariable) 86 errorMessage = string.Format("The target variable of the model {0} does not match the target variable of the problemData {1}.", model.TargetVariable, problemData.TargetVariable); 87 88 var evaluationErrorMessage = string.Empty; 89 var datasetCompatible = model.IsDatasetCompatible(problemData.Dataset, out evaluationErrorMessage); 90 if (!datasetCompatible) 91 errorMessage += evaluationErrorMessage; 92 93 return string.IsNullOrEmpty(errorMessage); 94 } 95 68 96 #region events 69 97 public event EventHandler TargetVariableChanged; -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationProblemData.cs
r15583 r16388 467 467 } 468 468 #endregion 469 470 protected override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) {471 if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");472 IClassificationProblemData classificationProblemData = problemData as IClassificationProblemData;473 if (classificationProblemData == null)474 throw new ArgumentException("The problem data is no classification problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");475 476 var returnValue = base.IsProblemDataCompatible(classificationProblemData, out errorMessage);477 //check targetVariable478 if (classificationProblemData.InputVariables.All(var => var.Value != TargetVariable)) {479 errorMessage = string.Format("The target variable {0} is not present in the new problem data.", TargetVariable)480 + Environment.NewLine + errorMessage;481 return false;482 }483 484 var newClassValues = classificationProblemData.Dataset.GetDoubleValues(TargetVariable).Distinct().OrderBy(x => x);485 if (!newClassValues.SequenceEqual(ClassValues)) {486 errorMessage = errorMessage + string.Format("The class values differ in the provided classification problem data.");487 returnValue = false;488 }489 490 var newPositivieClassName = classificationProblemData.PositiveClass;491 if (newPositivieClassName != PositiveClass) {492 errorMessage = errorMessage + string.Format("The positive class differs in the provided classification problem data.");493 returnValue = false;494 }495 496 return returnValue;497 }498 499 public override void AdjustProblemDataProperties(IDataAnalysisProblemData problemData) {500 if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");501 ClassificationProblemData classificationProblemData = problemData as ClassificationProblemData;502 if (classificationProblemData == null)503 throw new ArgumentException("The problem data is not a classification problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");504 505 base.AdjustProblemDataProperties(problemData);506 TargetVariable = classificationProblemData.TargetVariable;507 for (int i = 0; i < classificationProblemData.ClassNames.Count(); i++)508 ClassNamesParameter.Value[i, 0] = classificationProblemData.ClassNames.ElementAt(i);509 510 PositiveClass = classificationProblemData.PositiveClass;511 512 for (int i = 0; i < Classes; i++) {513 for (int j = 0; j < Classes; j++) {514 ClassificationPenaltiesParameter.Value[i, j] = classificationProblemData.GetClassificationPenalty(ClassValuesCache[i], ClassValuesCache[j]);515 }516 }517 }518 469 } 519 470 } -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolutionBase.cs
r15583 r16388 20 20 #endregion 21 21 22 using System; 22 23 using System.Collections.Generic; 23 24 using System.Linq; … … 44 45 public new IClassificationProblemData ProblemData { 45 46 get { return (IClassificationProblemData)base.ProblemData; } 46 set { base.ProblemData = value; } 47 set { 48 if (value == null) throw new ArgumentNullException("The problemData must not be null."); 49 string errorMessage = string.Empty; 50 if (!Model.IsProblemDataCompatible(value, out errorMessage)) throw new ArgumentException(errorMessage); 51 52 base.ProblemData = value; 53 } 47 54 } 48 55 -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolutionVariableImpactsCalculator.cs
r15674 r16388 100 100 var problemData = solution.ProblemData; 101 101 var dataset = problemData.Dataset; 102 var model = (IClassificationModel)solution.Model.Clone(); //mkommend: clone of model is necessary, because the thresholds for IDiscriminantClassificationModels are updated 102 103 103 104 IEnumerable<int> rows; … … 137 138 // calculate impacts for double variables 138 139 foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType<double>)) { 139 var newEstimates = EvaluateModelWithReplacedVariable( solution.Model, inputVariable, modifiableDataset, rows, replacementMethod);140 var newEstimates = EvaluateModelWithReplacedVariable(model, inputVariable, modifiableDataset, rows, replacementMethod); 140 141 var newAccuracy = OnlineAccuracyCalculator.Calculate(targetValues, newEstimates, out error); 141 142 if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation with replaced inputs."); … … 150 151 var smallestImpact = double.PositiveInfinity; 151 152 foreach (var repl in problemData.Dataset.GetStringValues(inputVariable, rows).Distinct()) { 152 var newEstimates = EvaluateModelWithReplacedVariable( solution.Model, inputVariable, modifiableDataset, rows,153 var newEstimates = EvaluateModelWithReplacedVariable(model, inputVariable, modifiableDataset, rows, 153 154 Enumerable.Repeat(repl, dataset.Rows)); 154 155 var newAccuracy = OnlineAccuracyCalculator.Calculate(targetValues, newEstimates, out error); … … 164 165 // calculate impacts for factor variables 165 166 166 var newEstimates = EvaluateModelWithReplacedVariable( solution.Model, inputVariable, modifiableDataset, rows,167 var newEstimates = EvaluateModelWithReplacedVariable(model, inputVariable, modifiableDataset, rows, 167 168 factorReplacementMethod); 168 169 var newAccuracy = OnlineAccuracyCalculator.Calculate(targetValues, newEstimates, out error); … … 263 264 var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList(); 264 265 dataset.ReplaceVariable(variable, replacementValues.ToList()); 266 267 var discModel = model as IDiscriminantFunctionClassificationModel; 268 if (discModel != null) { 269 var problemData = new ClassificationProblemData(dataset, dataset.VariableNames, model.TargetVariable); 270 discModel.RecalculateModelParameters(problemData, rows); 271 } 272 265 273 //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements 266 274 var estimates = model.GetEstimatedClassValues(dataset, rows).ToList(); … … 273 281 var originalValues = dataset.GetReadOnlyStringValues(variable).ToList(); 274 282 dataset.ReplaceVariable(variable, replacementValues.ToList()); 283 284 285 var discModel = model as IDiscriminantFunctionClassificationModel; 286 if (discModel != null) { 287 var problemData = new ClassificationProblemData(dataset, dataset.VariableNames, model.TargetVariable); 288 discModel.RecalculateModelParameters(problemData, rows); 289 } 290 275 291 //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements 276 292 var estimates = model.GetEstimatedClassValues(dataset, rows).ToList(); -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/ConstantModel.cs
r15583 r16388 83 83 } 84 84 85 public virtual bool IsProblemDataCompatible(IClassificationProblemData problemData, out string errorMessage) { 86 return ClassificationModel.IsProblemDataCompatible(this, problemData, out errorMessage); 87 } 88 89 public override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) { 90 if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null."); 91 92 var regressionProblemData = problemData as IRegressionProblemData; 93 if (regressionProblemData != null) 94 return IsProblemDataCompatible(regressionProblemData, out errorMessage); 95 96 var classificationProblemData = problemData as IClassificationProblemData; 97 if (classificationProblemData != null) 98 return IsProblemDataCompatible(classificationProblemData, out errorMessage); 99 100 throw new ArgumentException("The problem data is not a regression nor a classification problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData"); 101 } 102 85 103 #region IStringConvertibleValue 86 104 public bool ReadOnly { get; private set; } -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisModel.cs
r15583 r16388 20 20 #endregion 21 21 22 using System; 22 23 using System.Collections.Generic; 23 24 using HeuristicLab.Common; … … 38 39 39 40 public abstract IEnumerable<string> VariablesUsedForPrediction { get; } 41 42 public virtual bool IsDatasetCompatible(IDataset dataset, out string errorMessage) { 43 if (dataset == null) throw new ArgumentNullException("dataset", "The provided dataset is null."); 44 return IsDatasetCompatible(this, dataset, out errorMessage); 45 } 46 47 public abstract bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage); 48 49 public static bool IsDatasetCompatible(IDataAnalysisModel model, IDataset dataset, out string errorMessage) { 50 if(model == null) throw new ArgumentNullException("model", "The provided model is null."); 51 if (dataset == null) throw new ArgumentNullException("dataset", "The provided dataset is null."); 52 errorMessage = string.Empty; 53 54 foreach (var variable in model.VariablesUsedForPrediction) { 55 if (!dataset.ContainsVariable(variable)) { 56 if (string.IsNullOrEmpty(errorMessage)) { 57 errorMessage = "The following variables must be present in the dataset for model evaluation:"; 58 } 59 errorMessage += System.Environment.NewLine + " " + variable; 60 } 61 } 62 63 return string.IsNullOrEmpty(errorMessage); 64 } 40 65 } 41 66 } -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs
r15583 r16388 163 163 164 164 var variables = dataset.VariableNames.Where(variable => dataset.VariableHasType<double>(variable) || dataset.VariableHasType<string>(variable)); 165 var inputVariables = new CheckedItemList<StringValue>(variables.Select(x => new StringValue(x) ));165 var inputVariables = new CheckedItemList<StringValue>(variables.Select(x => new StringValue(x).AsReadOnly())); 166 166 foreach (StringValue x in inputVariables) 167 167 inputVariables.SetItemCheckedState(x, allowedInputVariables.Contains(x.Value)); … … 207 207 if (listeners != null) listeners(this, EventArgs.Empty); 208 208 } 209 210 protected virtual bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) {211 errorMessage = string.Empty;212 if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");213 214 //check allowed input variables215 StringBuilder message = new StringBuilder();216 var variables = new HashSet<string>(problemData.InputVariables.Select(x => x.Value));217 foreach (var item in AllowedInputVariables) {218 if (!variables.Contains(item))219 message.AppendLine("Input variable '" + item + "' is not present in the new problem data.");220 }221 222 if (message.Length != 0) {223 errorMessage = message.ToString();224 return false;225 }226 return true;227 228 }229 230 public virtual void AdjustProblemDataProperties(IDataAnalysisProblemData problemData) {231 DataAnalysisProblemData data = problemData as DataAnalysisProblemData;232 if (data == null) throw new ArgumentException("The problem data is not a data analysis problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");233 234 string errorMessage;235 if (!data.IsProblemDataCompatible(this, out errorMessage)) {236 throw new InvalidOperationException(errorMessage);237 }238 239 foreach (var inputVariable in InputVariables) {240 var variable = data.InputVariables.FirstOrDefault(i => i.Value == inputVariable.Value);241 InputVariables.SetItemCheckedState(inputVariable, variable != null && data.InputVariables.ItemChecked(variable));242 }243 }244 209 } 245 210 } -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisSolution.cs
r15583 r16388 58 58 get { return (IDataAnalysisProblemData)this[ProblemDataResultName].Value; } 59 59 set { 60 if (this[ProblemDataResultName].Value != value) { 61 if (value != null) { 62 ProblemData.Changed -= new EventHandler(ProblemData_Changed); 63 this[ProblemDataResultName].Value = value; 64 ProblemData.Changed += new EventHandler(ProblemData_Changed); 65 OnProblemDataChanged(); 66 } 67 } 60 if (value == null) throw new ArgumentNullException("The problemData must not be null."); 61 if (this[ProblemDataResultName].Value == value) return; 62 string errorMessage = string.Empty; 63 if (!Model.IsProblemDataCompatible(value, out errorMessage)) throw new ArgumentException(errorMessage); 64 65 ProblemData.Changed -= new EventHandler(ProblemData_Changed); 66 this[ProblemDataResultName].Value = value; 67 ProblemData.Changed += new EventHandler(ProblemData_Changed); 68 OnProblemDataChanged(); 68 69 } 69 70 } -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionModel.cs
r15583 r16388 67 67 public abstract IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData); 68 68 69 public virtual bool IsProblemDataCompatible(IRegressionProblemData problemData, out string errorMessage) { 70 return IsProblemDataCompatible(this, problemData, out errorMessage); 71 } 72 73 public override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) { 74 if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null."); 75 var regressionProblemData = problemData as IRegressionProblemData; 76 if (regressionProblemData == null) 77 throw new ArgumentException("The problem data is not a regression problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData"); 78 return IsProblemDataCompatible(regressionProblemData, out errorMessage); 79 } 80 81 public static bool IsProblemDataCompatible(IRegressionModel model, IRegressionProblemData problemData, out string errorMessage) { 82 if (model == null) throw new ArgumentNullException("model", "The provided model is null."); 83 if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null."); 84 errorMessage = string.Empty; 85 86 if (model.TargetVariable != problemData.TargetVariable) 87 errorMessage = string.Format("The target variable of the model {0} does not match the target variable of the problemData {1}.", model.TargetVariable, problemData.TargetVariable); 88 89 var evaluationErrorMessage = string.Empty; 90 var datasetCompatible = model.IsDatasetCompatible(problemData.Dataset, out evaluationErrorMessage); 91 if (!datasetCompatible) 92 errorMessage += evaluationErrorMessage; 93 94 return string.IsNullOrEmpty(errorMessage); 95 } 96 69 97 #region events 70 98 public event EventHandler TargetVariableChanged; -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionProblemData.cs
r15583 r16388 161 161 OnChanged(); 162 162 } 163 164 protected override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) {165 if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");166 IRegressionProblemData regressionProblemData = problemData as IRegressionProblemData;167 if (regressionProblemData == null)168 throw new ArgumentException("The problem data is not a regression problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");169 170 var returnValue = base.IsProblemDataCompatible(problemData, out errorMessage);171 return returnValue;172 }173 174 public override void AdjustProblemDataProperties(IDataAnalysisProblemData problemData) {175 if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");176 RegressionProblemData regressionProblemData = problemData as RegressionProblemData;177 if (regressionProblemData == null)178 throw new ArgumentException("The problem data is not a regression problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");179 180 base.AdjustProblemDataProperties(problemData);181 }182 163 } 183 164 } -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionBase.cs
r15583 r16388 70 70 public new IRegressionProblemData ProblemData { 71 71 get { return (IRegressionProblemData)base.ProblemData; } 72 set { base.ProblemData = value; } 72 set { 73 if (value == null) throw new ArgumentNullException("The problemData must not be null."); 74 string errorMessage = string.Empty; 75 if (!Model.IsProblemDataCompatible(value, out errorMessage)) throw new ArgumentException(errorMessage); 76 77 base.ProblemData = value; 78 } 73 79 } 74 80 -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionVariableImpactsCalculator.cs
r15673 r16388 52 52 All 53 53 } 54 54 55 55 private const string ReplacementParameterName = "Replacement Method"; 56 56 private const string DataPartitionParameterName = "DataPartition"; … … 96 96 DataPartitionEnum data = DataPartitionEnum.Training, 97 97 ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Median, 98 FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best) { 98 FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best, 99 Func<double, string, bool> progressCallback = null) { 99 100 100 101 var problemData = solution.ProblemData; … … 134 135 var allowedInputVariables = dataset.VariableNames.Where(v => inputvariables.Contains(v)).ToList(); 135 136 137 int curIdx = 0; 138 int count = allowedInputVariables.Where(problemData.Dataset.VariableHasType<double>).Count(); 136 139 // calculate impacts for double variables 137 140 foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType<double>)) { 141 //Report the current progress in percent. If the callback returns true, it means the execution shall be stopped 142 if (progressCallback != null) { 143 curIdx++; 144 if (progressCallback((double)curIdx / count, string.Format("Calculating impact for variable {0} ({1} of {2})", inputVariable, curIdx, count))) { return null; } 145 } 138 146 var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacementMethod); 139 147 var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error); … … 180 188 } 181 189 190 182 191 private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable<int> rows, ReplacementMethodEnum replacement = ReplacementMethodEnum.Median) { 183 192 var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList(); -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/TimeSeriesPrognosis/TimeSeriesPrognosisProblemData.cs
r15583 r16388 1620 1620 OnChanged(); 1621 1621 } 1622 1623 protected override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) {1624 if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");1625 ITimeSeriesPrognosisProblemData timeseriesProblemData = problemData as ITimeSeriesPrognosisProblemData;1626 if (timeseriesProblemData == null)1627 throw new ArgumentException("The problem data is not a time-series problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");1628 1629 var returnValue = base.IsProblemDataCompatible(problemData, out errorMessage);1630 //check targetVariable1631 if (problemData.InputVariables.All(var => var.Value != TargetVariable)) {1632 errorMessage = string.Format("The target variable {0} is not present in the new problem data.", TargetVariable)1633 + Environment.NewLine + errorMessage;1634 return false;1635 }1636 return returnValue;1637 }1638 1639 public override void AdjustProblemDataProperties(IDataAnalysisProblemData problemData) {1640 TimeSeriesPrognosisProblemData timeSeriesProblemData = problemData as TimeSeriesPrognosisProblemData;1641 if (timeSeriesProblemData == null)1642 throw new ArgumentException("The problem data is not a timeseries problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");1643 1644 var trainingDataStart = TrainingIndices.First();1645 1646 base.AdjustProblemDataProperties(problemData);1647 1648 TestPartition.Start = trainingDataStart;1649 1650 TrainingHorizon = timeSeriesProblemData.TrainingHorizon;1651 TestHorizon = timeSeriesProblemData.TestHorizon;1652 }1653 1654 1622 } 1655 1623 } -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/Classification/IClassificationModel.cs
r15583 r16388 31 31 IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows); 32 32 IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData); 33 bool IsProblemDataCompatible(IClassificationProblemData problemData, out string errorMessage); 33 34 string TargetVariable { get; set; } 34 35 event EventHandler TargetVariableChanged; -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisModel.cs
r15583 r16388 30 30 public interface IDataAnalysisModel : INamedItem { 31 31 IEnumerable<string> VariablesUsedForPrediction { get; } 32 bool IsDatasetCompatible(IDataset dataset, out string errorMessage); 33 bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage); 32 34 } 33 35 } -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisProblemData.cs
r15583 r16388 49 49 50 50 event EventHandler Changed; 51 52 void AdjustProblemDataProperties(IDataAnalysisProblemData problemData);53 51 } 54 52 } -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataset.cs
r15583 r16388 33 33 IEnumerable<string> DateTimeVariables { get; } 34 34 35 bool ContainsVariable(string variablename); 35 36 bool VariableHasType<T>(string variableName); 36 37 -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/Regression/IRegressionModel.cs
r15583 r16388 31 31 IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows); 32 32 IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData); 33 bool IsProblemDataCompatible(IRegressionProblemData problemData, out string errorMessage); 33 34 string TargetVariable { get; set; } 34 35 event EventHandler TargetVariableChanged; -
branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4/ModifiableDataset.cs
r15583 r16388 39 39 40 40 private ModifiableDataset(ModifiableDataset original, Cloner cloner) : base(original, cloner) { 41 var variables = variableValues.Keys.ToList(); 42 foreach (var v in variables) { 43 var type = GetVariableType(v); 44 if (type == typeof(DateTime)) { 45 variableValues[v] = GetDateTimeValues(v).ToList(); 46 } else if (type == typeof(double)) { 47 variableValues[v] = GetDoubleValues(v).ToList(); 48 } else if (type == typeof(string)) { 49 variableValues[v] = GetStringValues(v).ToList(); 50 } else { 51 throw new ArgumentException("Unsupported type " + type + " for variable " + v); 41 variableNames = new List<string>(original.variableNames); 42 variableValues = CloneValues(original.variableValues); 43 } 44 45 public override IDeepCloneable Clone(Cloner cloner) { return new ModifiableDataset(this, cloner); } 46 47 public ModifiableDataset() { } 48 49 public ModifiableDataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues, bool cloneValues = false) : 50 base(variableNames, variableValues, cloneValues) { } 51 52 public Dataset ToDataset() { 53 return new Dataset(variableNames, variableNames.Select(v => variableValues[v])); 54 } 55 56 57 public IEnumerable<object> GetRow(int row) { 58 if (row < 0 || row >= Rows) 59 throw new ArgumentException(string.Format("Invalid row {0} specified. The dataset contains {1} row(s).", row, Rows)); 60 61 return variableValues.Select(x => x.Value[row]); 62 } 63 64 public void AddRow(IEnumerable<object> values) { 65 var list = values.ToList(); 66 if (list.Count != variableNames.Count) 67 throw new ArgumentException("The number of values must be equal to the number of variable names."); 68 // check if all the values are of the correct type 69 for (int i = 0; i < list.Count; ++i) { 70 if (list[i].GetType() != GetVariableType(variableNames[i])) { 71 throw new ArgumentException("The type of the provided value does not match the variable type."); 52 72 } 53 73 } 54 } 55 public override IDeepCloneable Clone(Cloner cloner) { return new ModifiableDataset(this, cloner); } 56 public ModifiableDataset() : base() { } 57 58 public ModifiableDataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) : base(variableNames, variableValues) { } 74 // add values 75 for (int i = 0; i < list.Count; ++i) { 76 variableValues[variableNames[i]].Add(list[i]); 77 } 78 Rows++; 79 OnRowsChanged(); 80 OnReset(); 81 } 59 82 60 83 public void ReplaceRow(int row, IEnumerable<object> values) { … … 72 95 variableValues[variableNames[i]][row] = list[i]; 73 96 } 97 OnReset(); 98 } 99 100 // slow, avoid using this 101 public void RemoveRow(int row) { 102 foreach (var list in variableValues.Values) 103 list.RemoveAt(row); 104 Rows--; 105 OnRowsChanged(); 106 OnReset(); 107 } 108 109 // adds a new variable to the dataset 110 public void AddVariable(string variableName, IList values) { 111 InsertVariable(variableName, Columns, values); 112 } 113 114 public void InsertVariable(string variableName, int position, IList values) { 115 if (variableValues.ContainsKey(variableName)) 116 throw new ArgumentException(string.Format("Variable {0} is already present in the dataset.", variableName)); 117 118 if (position < 0 || position > Columns) 119 throw new ArgumentException(string.Format("Incorrect position {0} specified. The position must be between 0 and {1}.", position, Columns)); 120 121 if (values == null) 122 throw new ArgumentNullException("values", "Values must not be null. At least an empty list of values has to be provided."); 123 124 if (values.Count != Rows) 125 throw new ArgumentException(string.Format("{0} values are provided, but {1} rows are present in the dataset.", values.Count, Rows)); 126 127 if (!IsAllowedType(values)) 128 throw new ArgumentException(string.Format("Unsupported type {0} for variable {1}.", GetElementType(values), variableName)); 129 130 variableNames.Insert(position, variableName); 131 variableValues[variableName] = values; 132 133 OnColumnsChanged(); 134 OnColumnNamesChanged(); 74 135 OnReset(); 75 136 } … … 85 146 } 86 147 87 public void AddRow(IEnumerable<object> values) {88 var list = values.ToList();89 if (list.Count != variableNames.Count)90 throw new ArgumentException("The number of values must be equal to the number of variable names.");91 // check if all the values are of the correct type92 for (int i = 0; i < list.Count; ++i) {93 if (list[i].GetType() != GetVariableType(variableNames[i])) {94 throw new ArgumentException("The type of the provided value does not match the variable type.");95 }96 }97 // add values98 for (int i = 0; i < list.Count; ++i) {99 variableValues[variableNames[i]].Add(list[i]);100 }101 rows++;102 OnRowsChanged();103 OnReset();104 }105 106 // adds a new variable to the dataset107 public void AddVariable<T>(string variableName, IEnumerable<T> values) {108 if (variableValues.ContainsKey(variableName))109 throw new ArgumentException("Variable " + variableName + " is already present in the dataset.");110 int count = values.Count();111 if (count != rows)112 throw new ArgumentException("The number of values must exactly match the number of rows in the dataset.");113 variableValues[variableName] = new List<T>(values);114 variableNames.Add(variableName);115 OnColumnsChanged();116 OnColumnNamesChanged();117 OnReset();118 }119 148 120 149 public void RemoveVariable(string variableName) { 121 150 if (!variableValues.ContainsKey(variableName)) 122 throw new ArgumentException( "The variable " + variableName + " does not exist in the dataset.");151 throw new ArgumentException(string.Format("The variable {0} does not exist in the dataset.", variableName)); 123 152 variableValues.Remove(variableName); 124 153 variableNames.Remove(variableName); … … 128 157 } 129 158 130 // slow, avoid to use this131 public void RemoveRow(int row) {132 foreach (var list in variableValues.Values)133 list.RemoveAt(row);134 rows--;159 public void ClearValues() { 160 foreach (var list in variableValues.Values) { 161 list.Clear(); 162 } 163 Rows = 0; 135 164 OnRowsChanged(); 136 165 OnReset(); 137 166 } 167 138 168 139 169 public void SetVariableValue(object value, string variableName, int row) { … … 151 181 } 152 182 153 private Type GetVariableType(string variableName) {154 IList list;155 variableValues.TryGetValue(variableName, out list);156 if (list == null)157 throw new ArgumentException("The variable " + variableName + " does not exist in the dataset.");158 return list.GetType().GetGenericArguments()[0];159 }160 161 183 bool IStringConvertibleMatrix.SetValue(string value, int rowIndex, int columnIndex) { 162 184 var variableName = variableNames[columnIndex];
Note: See TracChangeset
for help on using the changeset viewer.