Changeset 5586 for branches/DataAnalysis Refactoring
- Timestamp:
- 03/02/11 00:52:18 (14 years ago)
- Location:
- branches/DataAnalysis Refactoring
- Files:
-
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/MultiObjective/SymbolicRegressionMultiObjectiveMeanSquaredErrorTreeSizeEvaluator.cs
r5549 r5586 53 53 public static double[] Calculate(ISymbolicDataAnalysisTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable<int> rows) { 54 54 IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows); 55 IEnumerable<double> originalValues = problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable , rows);55 IEnumerable<double> originalValues = problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable.Value, rows); 56 56 IEnumerable<double> boundedEstimationValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit); 57 57 double mse = OnlineMeanSquaredErrorEvaluator.Calculate(originalValues, boundedEstimationValues); -
branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/MultiObjective/SymbolicRegressionMultiObjectivePearsonRSquaredTreeSizeEvaluator.cs
r5551 r5586 53 53 public static double[] Calculate(ISymbolicDataAnalysisTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable<int> rows) { 54 54 IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows); 55 IEnumerable<double> originalValues = problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable , rows);55 IEnumerable<double> originalValues = problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable.Value, rows); 56 56 double r2 = OnlinePearsonsRSquaredEvaluator.Calculate(originalValues, estimatedValues); 57 57 return new double[2] { r2, solution.Length }; -
branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/SymbolicRegressionSingleObjectiveMeanSquaredErrorEvaluator.cs
r5548 r5586 53 53 public static double Calculate(ISymbolicDataAnalysisTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable<int> rows) { 54 54 IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows); 55 IEnumerable<double> originalValues = problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable , rows);55 IEnumerable<double> originalValues = problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable.Value, rows); 56 56 IEnumerable<double> boundedEstimationValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit); 57 57 return OnlineMeanSquaredErrorEvaluator.Calculate(originalValues, boundedEstimationValues); -
branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.cs
r5551 r5586 53 53 public static double Calculate(ISymbolicDataAnalysisTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable<int> rows) { 54 54 IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows); 55 IEnumerable<double> originalValues = problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable , rows);55 IEnumerable<double> originalValues = problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable.Value, rows); 56 56 return OnlinePearsonsRSquaredEvaluator.Calculate(originalValues, estimatedValues); 57 57 } -
branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Evaluators/SymbolicDataAnalysisEvaluator.cs
r5559 r5586 125 125 if (count == 0) count = 1; 126 126 return RandomEnumerable.SampleRandomNumbers(seed, SamplesEnd.Value, SamplesStart.Value, count) 127 .Where(i => i < ProblemDataParameter.ActualValue.TestPartitionStart || ProblemDataParameter.ActualValue.TestPartitionEnd<= i);127 .Where(i => i < ProblemDataParameter.ActualValue.TestPartitionStart.Value || ProblemDataParameter.ActualValue.TestPartitionEnd.Value <= i); 128 128 } 129 129 } -
branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.4/DataAnalysisProblemData.cs
r5565 r5586 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using HeuristicLab.Collections; 25 26 using HeuristicLab.Common; 26 27 using HeuristicLab.Core; 28 using HeuristicLab.Data; 29 using HeuristicLab.Parameters; 27 30 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 28 31 29 32 namespace HeuristicLab.Problems.DataAnalysis { 30 33 [StorableClass] 31 public abstract class DataAnalysisProblemData : NamedItem, IDataAnalysisProblemData { 34 public abstract class DataAnalysisProblemData : ParameterizedNamedItem, IDataAnalysisProblemData { 35 private const string DatasetParameterName = "Dataset"; 36 private const string InputVariablesParameterName = "InputVariables"; 37 private const string TrainingPartitionStartParameterName = "TrainingPartitionStart"; 38 private const string TrainingPartitionEndParameterName = "TrainingPartitionEnd"; 39 private const string TestPartitionStartParameterName = "TestPartitionStart"; 40 private const string TestPartitionEndParameterName = "TestPartitionEnd"; 41 42 #region parameter properites 43 public IValueParameter<Dataset> DatasetParameter { 44 get { return (IValueParameter<Dataset>)Parameters[DatasetParameterName]; } 45 } 46 public IFixedValueParameter<ICheckedItemCollection<StringValue>> InputVariablesParameter { 47 get { return (IFixedValueParameter<ICheckedItemCollection<StringValue>>)Parameters[InputVariablesParameterName]; } 48 } 49 public IFixedValueParameter<IntValue> TrainingPartitionStartParameter { 50 get { return (IFixedValueParameter<IntValue>)Parameters[TrainingPartitionStartParameterName]; } 51 } 52 public IFixedValueParameter<IntValue> TrainingPartitionEndParameter { 53 get { return (IFixedValueParameter<IntValue>)Parameters[TrainingPartitionEndParameterName]; } 54 } 55 public IFixedValueParameter<IntValue> TestPartitionStartParameter { 56 get { return (IFixedValueParameter<IntValue>)Parameters[TestPartitionStartParameterName]; } 57 } 58 public IFixedValueParameter<IntValue> TestPartitionEndParameter { 59 get { return (IFixedValueParameter<IntValue>)Parameters[TestPartitionEndParameterName]; } 60 } 61 #endregion 62 32 63 #region propeties 33 [Storable]34 private Dataset dataset;35 64 public Dataset Dataset { 36 get { return dataset; } 65 get { return DatasetParameter.Value; } 66 set { DatasetParameter.Value = value; } 67 } 68 public ICheckedItemCollection<StringValue> InputVariables { 69 get { return InputVariablesParameter.Value; } 70 } 71 public IEnumerable<string> AllowedInputVariables { 72 get { return InputVariables.CheckedItems.Select(x => x.Value); } 37 73 } 38 74 39 [Storable] 40 private HashSet<string> allowedInputVariables; 41 public IEnumerable<string> AllowedInputVariables { 42 get { return allowedInputVariables; } 75 public IntValue TrainingPartitionStart { 76 get { return TrainingPartitionStartParameter.Value; } 43 77 } 44 45 [Storable] 46 private int trainingPartitionStart; 47 public int TrainingPartitionStart { 48 get { return trainingPartitionStart; } 49 set { 50 if (0 < value || value > dataset.Rows) 51 throw new ArgumentException(string.Format("The training partition start must be between 0 and the number of rows of the dataset ({0})", dataset.Rows)); 52 if (trainingPartitionStart != value) { 53 trainingPartitionStart = value; 54 OnChanged(); 55 } 56 } 78 public IntValue TrainingPartitionEnd { 79 get { return TrainingPartitionEndParameter.Value; } 57 80 } 58 [Storable] 59 private int trainingPartitionEnd; 60 public int TrainingPartitionEnd { 61 get { return trainingPartitionEnd; } 62 set { 63 if (0 < value || value > dataset.Rows) 64 throw new ArgumentException(string.Format("The training partition end must be between 0 and the number of rows of the dataset ({0})", dataset.Rows)); 65 if (trainingPartitionEnd != value) { 66 trainingPartitionEnd = value; 67 OnChanged(); 68 } 69 } 81 public IntValue TestPartitionStart { 82 get { return TestPartitionStartParameter.Value; } 70 83 } 71 72 [Storable] 73 private int testPartitionStart; 74 public int TestPartitionStart { 75 get { return testPartitionStart; } 76 set { 77 if (0 < value || value > dataset.Rows) 78 throw new ArgumentException(string.Format("The test partition start must be between 0 and the number of rows of the dataset ({0})", dataset.Rows)); 79 if (testPartitionStart != value) { 80 testPartitionStart = value; 81 OnChanged(); 82 } 83 } 84 } 85 [Storable] 86 private int testPartitionEnd; 87 public int TestPartitionEnd { 88 get { return testPartitionEnd; } 89 set { 90 if (0 < value || value > dataset.Rows) 91 throw new ArgumentException(string.Format("The test partition end must be between 0 and the number of rows of the dataset ({0})", dataset.Rows)); 92 if (testPartitionEnd != value) { 93 testPartitionEnd = value; 94 OnChanged(); 95 } 96 } 84 public IntValue TestPartitionEnd { 85 get { return TestPartitionEndParameter.Value; } 97 86 } 98 87 99 88 public IEnumerable<int> TrainingIndizes { 100 89 get { 101 return Enumerable.Range(TrainingPartitionStart , TrainingPartitionEnd - TrainingPartitionStart)102 .Where(i => i >= 0 && i < Dataset.Rows && (i < TestPartitionStart || TestPartitionEnd<= i));90 return Enumerable.Range(TrainingPartitionStart.Value, TrainingPartitionEnd.Value - TrainingPartitionStart.Value) 91 .Where(i => i >= 0 && i < Dataset.Rows && (i < TestPartitionStart.Value || TestPartitionEnd.Value <= i)); 103 92 } 104 93 } 105 94 public IEnumerable<int> TestIndizes { 106 95 get { 107 return Enumerable.Range(TestPartitionStart , TestPartitionEnd - TestPartitionStart)96 return Enumerable.Range(TestPartitionStart.Value, TestPartitionEnd.Value - TestPartitionStart.Value) 108 97 .Where(i => i >= 0 && i < Dataset.Rows); 109 98 } … … 122 111 throw new ArgumentException("All allowed input variables must be present in the dataset."); 123 112 124 this.dataset = dataset; 125 this.allowedInputVariables = new HashSet<string>(allowedInputVariables); 126 trainingPartitionStart = 0; 127 trainingPartitionEnd = dataset.Rows / 2; 128 testPartitionStart = dataset.Rows / 2; 129 testPartitionEnd = dataset.Rows; 113 var inputVariables = new CheckedItemCollection<StringValue>(dataset.VariableNames.Select(x => new StringValue(x))); 114 foreach (StringValue x in inputVariables) 115 inputVariables.SetItemCheckedState(x, allowedInputVariables.Contains(x.Value)); 116 117 int trainingPartitionStart = 0; 118 int trainingPartitionEnd = dataset.Rows / 2; 119 int testPartitionStart = dataset.Rows / 2; 120 int testPartitionEnd = dataset.Rows; 121 122 Parameters.Add(new ValueParameter<Dataset>(DatasetParameterName, "", dataset)); 123 Parameters.Add(new FixedValueParameter<ICheckedItemCollection<StringValue>>(InputVariablesParameterName, "", inputVariables.AsReadOnly())); 124 Parameters.Add(new FixedValueParameter<IntValue>(TrainingPartitionStartParameterName, "", new IntValue(trainingPartitionStart))); 125 Parameters.Add(new FixedValueParameter<IntValue>(TrainingPartitionEndParameterName, "", new IntValue(trainingPartitionEnd))); 126 Parameters.Add(new FixedValueParameter<IntValue>(TestPartitionStartParameterName, "", new IntValue(testPartitionStart))); 127 Parameters.Add(new FixedValueParameter<IntValue>(TestPartitionEndParameterName, "", new IntValue(testPartitionEnd))); 128 129 RegisterEventHandlers(); 130 130 } 131 131 132 public bool AddAllowedInputVariable(string inputVariable) { 133 if (!Dataset.VariableNames.Contains(inputVariable)) 134 throw new ArgumentException("The allowed input variable must be present in the dataset."); 135 if (allowedInputVariables.Contains(inputVariable)) return false; 132 private void RegisterEventHandlers() { 133 DatasetParameter.ValueChanged += new EventHandler(Parameter_ValueChanged); 134 InputVariables.CheckedItemsChanged += new CollectionItemsChangedEventHandler<StringValue>(InputVariables_CheckedItemsChanged); 135 TrainingPartitionStart.ValueChanged += new EventHandler(Parameter_ValueChanged); 136 TrainingPartitionEnd.ValueChanged += new EventHandler(Parameter_ValueChanged); 137 TestPartitionStart.ValueChanged += new EventHandler(Parameter_ValueChanged); 138 TestPartitionEnd.ValueChanged += new EventHandler(Parameter_ValueChanged); 139 } 136 140 137 allowedInputVariables.Add(inputVariable);138 return true;141 private void InputVariables_CheckedItemsChanged(object sender, CollectionItemsChangedEventArgs<StringValue> e) { 142 OnChanged(); 139 143 } 140 public bool RemoveAllowedInputVariable(string inputVariable) { 141 if (!Dataset.VariableNames.Contains(inputVariable)) 142 throw new ArgumentException("The allowed input variable must be present in the dataset."); 143 if (!allowedInputVariables.Contains(inputVariable)) return false; 144 145 allowedInputVariables.Remove(inputVariable); 146 return true; 144 private void Parameter_ValueChanged(object sender, EventArgs e) { 145 OnChanged(); 147 146 } 148 147 -
branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisProblemData.cs
r5559 r5586 23 23 using System.Collections.Generic; 24 24 using HeuristicLab.Core; 25 using HeuristicLab.Data; 25 26 26 27 namespace HeuristicLab.Problems.DataAnalysis { 27 public interface IDataAnalysisProblemData : INamedItem { 28 Dataset Dataset { get; } 28 public interface IDataAnalysisProblemData : IParameterizedNamedItem { 29 Dataset Dataset { get; set; } 30 ICheckedItemCollection<StringValue> InputVariables { get; } 29 31 IEnumerable<string> AllowedInputVariables { get; } 30 32 31 bool AddAllowedInputVariable(string inputVariable); 32 bool RemoveAllowedInputVariable(string inputVariable); 33 34 int TrainingPartitionStart { get; set; } 35 int TrainingPartitionEnd { get; set; } 36 int TestPartitionStart { get; set; } 37 int TestPartitionEnd { get; set; } 33 IntValue TrainingPartitionStart { get; } 34 IntValue TrainingPartitionEnd { get; } 35 IntValue TestPartitionStart { get; } 36 IntValue TestPartitionEnd { get; } 38 37 39 38 IEnumerable<int> TrainingIndizes { get; } -
branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/Regression/IRegressionProblemData.cs
r5559 r5586 20 20 #endregion 21 21 22 using HeuristicLab.Data; 22 23 namespace HeuristicLab.Problems.DataAnalysis { 23 24 public interface IRegressionProblemData : IDataAnalysisProblemData { 24 string TargetVariable { get; set; }25 StringValue TargetVariable { get; } 25 26 } 26 27 } -
branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.4/RegressionProblemData.cs
r5559 r5586 20 20 #endregion 21 21 22 using System;23 22 using System.Collections.Generic; 24 23 using System.IO; 25 24 using System.Linq; 26 25 using HeuristicLab.Common; 26 using HeuristicLab.Core; 27 using HeuristicLab.Data; 28 using HeuristicLab.Parameters; 27 29 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 28 30 … … 30 32 [StorableClass] 31 33 public sealed class RegressionProblemData : DataAnalysisProblemData, IRegressionProblemData { 34 private const string TargetVariableParameterName = "TargetVariable"; 32 35 33 36 #region default data … … 72 75 #endregion 73 76 74 #region propeties 75 [Storable] 76 private string targetVariable; 77 public string TargetVariable { 78 get { return targetVariable; } 79 set { 80 if (!Dataset.VariableNames.Contains(value)) 81 throw new ArgumentException(string.Format("The target variable {0} is not present in the dataset", value)); 82 if (targetVariable != value) { 83 targetVariable = value; 84 OnChanged(); 85 } 86 } 77 public IValueParameter<StringValue> TargetVariableParameter { 78 get { return (IValueParameter<StringValue>)Parameters[TargetVariableParameterName]; } 87 79 } 88 #endregion 80 public StringValue TargetVariable { 81 get { return TargetVariableParameter.Value; } 82 } 83 89 84 90 85 [StorableConstructor] … … 99 94 public RegressionProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, string targetVariable) 100 95 : base(dataset, allowedInputVariables) { 101 TargetVariable = targetVariable;96 Parameters.Add(new ConstrainedValueParameter<StringValue>("TargetVariable", new ItemSet<StringValue>(InputVariables), InputVariables.Where(x => x.Value == targetVariable).First())); 102 97 } 103 98
Note: See TracChangeset
for help on using the changeset viewer.