- Timestamp:
- 04/02/14 11:38:44 (11 years ago)
- Location:
- branches/DataPreprocessing
- Files:
-
- 1 deleted
- 14 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingContext.cs
r10676 r10695 93 93 problem.ProblemDataParameter.ActualValue = data; 94 94 problem.Name = "Preprocessed " + problem.Name; 95 96 var symbolicProblem = problem as ISymbolicDataAnalysisProblem;97 if (symbolicProblem != null) {98 var tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());99 var variableNode = (VariableTreeNode)new Variable("dummy", "dummy description").CreateTreeNode();100 variableNode.VariableName = "dummy";101 tree.Root.AddSubtree(variableNode);102 103 symbolicProblem.TransformationsParameter.Value.Add(tree);104 }105 106 95 return clone; 107 96 } -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingData.cs
r10586 r10695 39 39 40 40 protected double trainingToTestRatio; 41 42 protected IList<ITransformation> transformations; 41 43 42 44 protected PreprocessingData(PreprocessingData original, Cloner cloner) … … 161 163 } 162 164 165 public IList<ITransformation> Transformations { 166 get { return transformations; } 167 } 168 163 169 public string GetVariableName(int columnIndex) { 164 170 return variableNames[columnIndex]; -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IPreprocessingData.cs
r10586 r10695 51 51 IntRange TestPartition { get; } 52 52 53 IList<ITransformation> Transformations { get; } 54 53 55 IEnumerable<string> VariableNames { get; } 54 56 string GetVariableName(int columnIndex); -
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/ProblemDataCreator.cs
r10536 r10695 29 29 private readonly IPreprocessingContext context; 30 30 31 private Dataset ExportedDataset { 32 get { return exporteDataset ?? (exporteDataset = context.Data.ExportToDataset()); } 33 } 34 private Dataset exporteDataset; 35 36 private IEnumerable<string> InputVariables { get { return context.Data.VariableNames; } } 37 private IEnumerable<ITransformation> Transformations { get { return context.Data.Transformations; } } 38 39 31 40 public ProblemDataCreator(IPreprocessingContext context) { 32 41 this.context = context; … … 38 47 IDataAnalysisProblemData problemData = null; 39 48 40 var dataSet = context.Data.ExportToDataset();41 var inputVariables = context.Data.VariableNames;42 43 49 if (oldProblemData is RegressionProblemData) { 44 problemData = CreateRegressionData((RegressionProblemData)oldProblemData , dataSet, inputVariables);50 problemData = CreateRegressionData((RegressionProblemData)oldProblemData); 45 51 } else if (oldProblemData is ClassificationProblemData) { 46 problemData = CreateClassificationData((ClassificationProblemData)oldProblemData , dataSet, inputVariables);52 problemData = CreateClassificationData((ClassificationProblemData)oldProblemData); 47 53 } else if (oldProblemData is ClusteringProblemData) { 48 problemData = CreateClusteringData((ClusteringProblemData)oldProblemData , dataSet, inputVariables);54 problemData = CreateClusteringData((ClusteringProblemData)oldProblemData); 49 55 } else { 50 56 throw new NotImplementedException("The type of the DataAnalysisProblemData is not supported."); … … 56 62 } 57 63 58 private IDataAnalysisProblemData CreateRegressionData(RegressionProblemData oldProblemData , Dataset dataSet, IEnumerable<string> inputVariables) {64 private IDataAnalysisProblemData CreateRegressionData(RegressionProblemData oldProblemData) { 59 65 var targetVariable = oldProblemData.TargetVariable; 60 66 // target variable must be double and must exist in the new dataset 61 return new RegressionProblemData( dataSet, inputVariables, targetVariable);67 return new RegressionProblemData(ExportedDataset, InputVariables, targetVariable, Transformations); 62 68 } 63 69 64 private IDataAnalysisProblemData CreateClassificationData(ClassificationProblemData oldProblemData , Dataset dataSet, IEnumerable<string> inputVariables) {70 private IDataAnalysisProblemData CreateClassificationData(ClassificationProblemData oldProblemData) { 65 71 var targetVariable = oldProblemData.TargetVariable; 66 72 // target variable must be double and must exist in the new dataset 67 return new ClassificationProblemData( dataSet, inputVariables, targetVariable);73 return new ClassificationProblemData(ExportedDataset, InputVariables, targetVariable, Transformations); 68 74 } 69 75 70 private IDataAnalysisProblemData CreateClusteringData(ClusteringProblemData oldProblemData , Dataset dataSet, IEnumerable<string> inputVariables) {71 return new ClusteringProblemData( dataSet, inputVariables);76 private IDataAnalysisProblemData CreateClusteringData(ClusteringProblemData oldProblemData) { 77 return new ClusteringProblemData(ExportedDataset, InputVariables, Transformations); 72 78 } 73 79 -
branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.csproj
r10673 r10695 221 221 <Compile Include="Symbols\VariableConditionTreeNode.cs" /> 222 222 <Compile Include="Symbols\VariableTreeNode.cs" /> 223 <Compile Include="TransformationCollection.cs" />224 223 <None Include="HeuristicLab.snk" /> 225 224 <None Include="Plugin.cs.frame" /> -
branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interfaces/ISymbolicDataAnalysisProblem.cs
r10673 r10695 35 35 IFixedValueParameter<IntRange> FitnessCalculationPartitionParameter { get; } 36 36 IFixedValueParameter<IntRange> ValidationPartitionParameter { get; } 37 IFixedValueParameter<TransformationCollection> TransformationsParameter { get; }38 37 39 38 ISymbolicDataAnalysisGrammar SymbolicExpressionTreeGrammar { get; set; } -
branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisProblem.cs
r10673 r10695 54 54 private const string ValidationPartitionParameterName = "ValidationPartition"; 55 55 private const string ApplyLinearScalingParameterName = "ApplyLinearScaling"; 56 private const string TransformationsParameterName = "Transformations";57 56 58 57 private const string ProblemDataParameterDescription = ""; … … 67 66 private const string ValidationPartitionParameterDescription = "The partition of the problem data training partition, that should be used to select the best model from (optional)."; 68 67 private const string ApplyLinearScalingParameterDescription = "Flag that indicates if the individual should be linearly scaled before evaluating."; 69 private const string TransformationsParameterDescrioption = "The transformations which were applied on the input variables.";70 68 #endregion 71 69 … … 106 104 public IFixedValueParameter<BoolValue> ApplyLinearScalingParameter { 107 105 get { return (IFixedValueParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; } 108 }109 public IFixedValueParameter<TransformationCollection> TransformationsParameter {110 get { return (IFixedValueParameter<TransformationCollection>)Parameters[TransformationsParameterName]; }111 106 } 112 107 #endregion … … 172 167 if (GetType().Name.Contains("SymbolicRegression")) 173 168 ApplyLinearScaling.Value = true; 174 }175 176 if (!Parameters.ContainsKey(TransformationsParameterName)) {177 Parameters.Add(new FixedValueParameter<TransformationCollection>(TransformationsParameterName, TransformationsParameterDescrioption, new TransformationCollection()));178 TransformationsParameter.Hidden = true;179 169 } 180 170 … … 199 189 Parameters.Add(new FixedValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, RelativeNumberOfEvaluatedSamplesParameterDescription, new PercentValue(1))); 200 190 Parameters.Add(new FixedValueParameter<BoolValue>(ApplyLinearScalingParameterName, ApplyLinearScalingParameterDescription, new BoolValue(false))); 201 Parameters.Add(new FixedValueParameter<TransformationCollection>(TransformationsParameterName, TransformationsParameterDescrioption, new TransformationCollection()));202 191 203 192 SymbolicExpressionTreeInterpreterParameter.Hidden = true; … … 205 194 MaximumFunctionDefinitionsParameter.Hidden = true; 206 195 ApplyLinearScalingParameter.Hidden = true; 207 TransformationsParameter.Hidden = true;208 196 209 197 SymbolicExpressionTreeGrammar = new TypeCoherentExpressionGrammar(); -
branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis.Trading/3.4/ProblemData.cs
r9989 r10695 1627 1627 } 1628 1628 1629 public ProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, string targetVariable )1630 : base(dataset, allowedInputVariables ) {1629 public ProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, string targetVariable, IEnumerable<ITransformation> transformations = null) 1630 : base(dataset, allowedInputVariables, transformations ?? Enumerable.Empty<ITransformation>()) { 1631 1631 var variables = InputVariables.Select(x => x.AsReadOnly()).ToList(); 1632 1632 Parameters.Add(new ConstrainedValueParameter<StringValue>(PriceChangeVariableParameterName, new ItemSet<StringValue>(variables), variables.First(x => x.Value == targetVariable))); -
branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationProblemData.cs
r9456 r10695 291 291 } 292 292 293 public ClassificationProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, string targetVariable )294 : base(dataset, allowedInputVariables ) {293 public ClassificationProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, string targetVariable, IEnumerable<ITransformation> transformations = null) 294 : base(dataset, allowedInputVariables, transformations ?? Enumerable.Empty<ITransformation>()) { 295 295 var validTargetVariableValues = CheckVariablesForPossibleTargetVariables(dataset).Select(x => new StringValue(x).AsReadOnly()).ToList(); 296 296 var target = validTargetVariableValues.Where(x => x.Value == targetVariable).DefaultIfEmpty(validTargetVariableValues.First()).First(); -
branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Clustering/ClusteringProblemData.cs
r9456 r10695 21 21 22 22 using System.Collections.Generic; 23 using System.Linq; 23 24 using HeuristicLab.Common; 24 25 using HeuristicLab.Core; … … 86 87 } 87 88 88 public ClusteringProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables )89 : base(dataset, allowedInputVariables ) {89 public ClusteringProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, IEnumerable<ITransformation> transformations = null) 90 : base(dataset, allowedInputVariables, transformations ?? Enumerable.Empty<ITransformation>()) { 90 91 } 91 92 } -
branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs
r9456 r10695 37 37 protected const string TrainingPartitionParameterName = "TrainingPartition"; 38 38 protected const string TestPartitionParameterName = "TestPartition"; 39 protected const string TransformationsParameterName = "Transformations"; 39 40 40 41 #region parameter properites … … 50 51 public IFixedValueParameter<IntRange> TestPartitionParameter { 51 52 get { return (IFixedValueParameter<IntRange>)Parameters[TestPartitionParameterName]; } 53 } 54 public IFixedValueParameter<ReadOnlyItemCollection<ITransformation>> TransformationsParameter { 55 get { return (IFixedValueParameter<ReadOnlyItemCollection<ITransformation>>)Parameters[TransformationsParameterName]; } 52 56 } 53 57 #endregion … … 88 92 } 89 93 94 public IEnumerable<ITransformation> Transformations { 95 get { return TransformationsParameter.Value; } 96 } 97 90 98 public virtual bool IsTrainingSample(int index) { 91 99 return index >= 0 && index < Dataset.Rows && … … 111 119 private void AfterDeserialization() { 112 120 RegisterEventHandlers(); 121 122 if (!Parameters.ContainsKey(TransformationsParameterName)) { 123 Parameters.Add(new FixedValueParameter<ReadOnlyItemCollection<ITransformation>>(TransformationsParameterName, "", new ItemCollection<ITransformation>().AsReadOnly())); 124 } 113 125 } 114 126 115 protected DataAnalysisProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables ) {127 protected DataAnalysisProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, IEnumerable<ITransformation> transformations) { 116 128 if (dataset == null) throw new ArgumentNullException("The dataset must not be null."); 117 129 if (allowedInputVariables == null) throw new ArgumentNullException("The allowedInputVariables must not be null."); … … 119 131 if (allowedInputVariables.Except(dataset.DoubleVariables).Any()) 120 132 throw new ArgumentException("All allowed input variables must be present in the dataset and of type double."); 133 134 if (transformations == null) throw new ArgumentNullException("The transformations must not be null."); 121 135 122 136 var inputVariables = new CheckedItemList<StringValue>(dataset.DoubleVariables.Select(x => new StringValue(x))); … … 129 143 int testPartitionEnd = dataset.Rows; 130 144 145 var transformationsCollection = new ItemCollection<ITransformation>(transformations); 146 131 147 Parameters.Add(new FixedValueParameter<Dataset>(DatasetParameterName, "", dataset)); 132 148 Parameters.Add(new FixedValueParameter<ReadOnlyCheckedItemList<StringValue>>(InputVariablesParameterName, "", inputVariables.AsReadOnly())); 133 149 Parameters.Add(new FixedValueParameter<IntRange>(TrainingPartitionParameterName, "", new IntRange(trainingPartitionStart, trainingPartitionEnd))); 134 150 Parameters.Add(new FixedValueParameter<IntRange>(TestPartitionParameterName, "", new IntRange(testPartitionStart, testPartitionEnd))); 151 Parameters.Add(new FixedValueParameter<ReadOnlyItemCollection<ITransformation>>(TransformationsParameterName, "", transformationsCollection.AsReadOnly())); 135 152 136 153 ((ValueParameter<Dataset>)DatasetParameter).ReactOnValueToStringChangedAndValueItemImageChanged = false; -
branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionProblemData.cs
r9456 r10695 129 129 } 130 130 131 public RegressionProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, string targetVariable )132 : base(dataset, allowedInputVariables ) {131 public RegressionProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, string targetVariable, IEnumerable<ITransformation> transformations = null) 132 : base(dataset, allowedInputVariables, transformations ?? Enumerable.Empty<ITransformation>()) { 133 133 var variables = InputVariables.Select(x => x.AsReadOnly()).ToList(); 134 134 Parameters.Add(new ConstrainedValueParameter<StringValue>(TargetVariableParameterName, new ItemSet<StringValue>(variables), variables.Where(x => x.Value == targetVariable).First())); -
branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisProblemData.cs
r9456 r10695 39 39 IEnumerable<int> TestIndices { get; } 40 40 41 IEnumerable<ITransformation> Transformations { get; } 42 41 43 bool IsTrainingSample(int index); 42 44 bool IsTestSample(int index); -
branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/ITransformation.cs
r10694 r10695 20 20 #endregion 21 21 22 using System.Collections; 22 23 using System.Collections.Generic; 23 24 using HeuristicLab.Core; 24 25 25 26 namespace HeuristicLab.Problems.DataAnalysis { 26 public interface ITransformation <T>: IParameterizedItem {27 public interface ITransformation : IParameterizedItem { 27 28 string Column { get; } 29 } 30 31 public interface ITransformation<T> : ITransformation { 28 32 IEnumerable<T> Apply(IEnumerable<T> data); 29 33 IEnumerable<T> InverseApply(IEnumerable<T> data);
Note: See TracChangeset
for help on using the changeset viewer.