Free cookie consent management tool by TermsFeed Policy Generator

Changeset 10695


Ignore:
Timestamp:
04/02/14 11:38:44 (10 years ago)
Author:
pfleck
Message:
  • Added Transformations to PreprocessingData
  • Added Transformations to DataAnalysisProblemData Parameters
  • Removed SymbolicExpressionTree as inverse transformation.
Location:
branches/DataPreprocessing
Files:
1 deleted
14 edited

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingContext.cs

    r10676 r10695  
    9393      problem.ProblemDataParameter.ActualValue = data;
    9494      problem.Name = "Preprocessed " + problem.Name;
    95 
    96       var symbolicProblem = problem as ISymbolicDataAnalysisProblem;
    97       if (symbolicProblem != null) {
    98         var tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
    99         var variableNode = (VariableTreeNode)new Variable("dummy", "dummy description").CreateTreeNode();
    100         variableNode.VariableName = "dummy";
    101         tree.Root.AddSubtree(variableNode);
    102 
    103         symbolicProblem.TransformationsParameter.Value.Add(tree);
    104       }
    105 
    10695      return clone;
    10796    }
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingData.cs

    r10586 r10695  
    3939
    4040    protected double trainingToTestRatio;
     41
     42    protected IList<ITransformation> transformations;
    4143
    4244    protected PreprocessingData(PreprocessingData original, Cloner cloner)
     
    161163    }
    162164
     165    public IList<ITransformation> Transformations {
     166      get { return transformations; }
     167    }
     168
    163169    public string GetVariableName(int columnIndex) {
    164170      return variableNames[columnIndex];
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Interfaces/IPreprocessingData.cs

    r10586 r10695  
    5151    IntRange TestPartition { get; }
    5252
     53    IList<ITransformation> Transformations { get; }
     54
    5355    IEnumerable<string> VariableNames { get; }
    5456    string GetVariableName(int columnIndex);
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/ProblemDataCreator.cs

    r10536 r10695  
    2929    private readonly IPreprocessingContext context;
    3030
     31    private Dataset ExportedDataset {
     32      get { return exporteDataset ?? (exporteDataset = context.Data.ExportToDataset()); }
     33    }
     34    private Dataset exporteDataset;
     35
     36    private IEnumerable<string> InputVariables { get { return context.Data.VariableNames; } }
     37    private IEnumerable<ITransformation> Transformations { get { return context.Data.Transformations; } }
     38
     39
    3140    public ProblemDataCreator(IPreprocessingContext context) {
    3241      this.context = context;
     
    3847      IDataAnalysisProblemData problemData = null;
    3948
    40       var dataSet = context.Data.ExportToDataset();
    41       var inputVariables = context.Data.VariableNames;
    42 
    4349      if (oldProblemData is RegressionProblemData) {
    44         problemData = CreateRegressionData((RegressionProblemData)oldProblemData, dataSet, inputVariables);
     50        problemData = CreateRegressionData((RegressionProblemData)oldProblemData);
    4551      } else if (oldProblemData is ClassificationProblemData) {
    46         problemData = CreateClassificationData((ClassificationProblemData)oldProblemData, dataSet, inputVariables);
     52        problemData = CreateClassificationData((ClassificationProblemData)oldProblemData);
    4753      } else if (oldProblemData is ClusteringProblemData) {
    48         problemData = CreateClusteringData((ClusteringProblemData)oldProblemData, dataSet, inputVariables);
     54        problemData = CreateClusteringData((ClusteringProblemData)oldProblemData);
    4955      } else {
    5056        throw new NotImplementedException("The type of the DataAnalysisProblemData is not supported.");
     
    5662    }
    5763
    58     private IDataAnalysisProblemData CreateRegressionData(RegressionProblemData oldProblemData, Dataset dataSet, IEnumerable<string> inputVariables) {
     64    private IDataAnalysisProblemData CreateRegressionData(RegressionProblemData oldProblemData) {
    5965      var targetVariable = oldProblemData.TargetVariable;
    6066      // target variable must be double and must exist in the new dataset
    61       return new RegressionProblemData(dataSet, inputVariables, targetVariable);
     67      return new RegressionProblemData(ExportedDataset, InputVariables, targetVariable, Transformations);
    6268    }
    6369
    64     private IDataAnalysisProblemData CreateClassificationData(ClassificationProblemData oldProblemData, Dataset dataSet, IEnumerable<string> inputVariables) {
     70    private IDataAnalysisProblemData CreateClassificationData(ClassificationProblemData oldProblemData) {
    6571      var targetVariable = oldProblemData.TargetVariable;
    6672      // target variable must be double and must exist in the new dataset
    67       return new ClassificationProblemData(dataSet, inputVariables, targetVariable);
     73      return new ClassificationProblemData(ExportedDataset, InputVariables, targetVariable, Transformations);
    6874    }
    6975
    70     private IDataAnalysisProblemData CreateClusteringData(ClusteringProblemData oldProblemData, Dataset dataSet, IEnumerable<string> inputVariables) {
    71       return new ClusteringProblemData(dataSet, inputVariables);
     76    private IDataAnalysisProblemData CreateClusteringData(ClusteringProblemData oldProblemData) {
     77      return new ClusteringProblemData(ExportedDataset, InputVariables, Transformations);
    7278    }
    7379
  • branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.csproj

    r10673 r10695  
    221221    <Compile Include="Symbols\VariableConditionTreeNode.cs" />
    222222    <Compile Include="Symbols\VariableTreeNode.cs" />
    223     <Compile Include="TransformationCollection.cs" />
    224223    <None Include="HeuristicLab.snk" />
    225224    <None Include="Plugin.cs.frame" />
  • branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interfaces/ISymbolicDataAnalysisProblem.cs

    r10673 r10695  
    3535    IFixedValueParameter<IntRange> FitnessCalculationPartitionParameter { get; }
    3636    IFixedValueParameter<IntRange> ValidationPartitionParameter { get; }
    37     IFixedValueParameter<TransformationCollection> TransformationsParameter { get; }
    3837
    3938    ISymbolicDataAnalysisGrammar SymbolicExpressionTreeGrammar { get; set; }
  • branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisProblem.cs

    r10673 r10695  
    5454    private const string ValidationPartitionParameterName = "ValidationPartition";
    5555    private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
    56     private const string TransformationsParameterName = "Transformations";
    5756
    5857    private const string ProblemDataParameterDescription = "";
     
    6766    private const string ValidationPartitionParameterDescription = "The partition of the problem data training partition, that should be used to select the best model from (optional).";
    6867    private const string ApplyLinearScalingParameterDescription = "Flag that indicates if the individual should be linearly scaled before evaluating.";
    69     private const string TransformationsParameterDescrioption = "The transformations which were applied on the input variables.";
    7068    #endregion
    7169
     
    106104    public IFixedValueParameter<BoolValue> ApplyLinearScalingParameter {
    107105      get { return (IFixedValueParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
    108     }
    109     public IFixedValueParameter<TransformationCollection> TransformationsParameter {
    110       get { return (IFixedValueParameter<TransformationCollection>)Parameters[TransformationsParameterName]; }
    111106    }
    112107    #endregion
     
    172167        if (GetType().Name.Contains("SymbolicRegression"))
    173168          ApplyLinearScaling.Value = true;
    174       }
    175 
    176       if (!Parameters.ContainsKey(TransformationsParameterName)) {
    177         Parameters.Add(new FixedValueParameter<TransformationCollection>(TransformationsParameterName, TransformationsParameterDescrioption, new TransformationCollection()));
    178         TransformationsParameter.Hidden = true;
    179169      }
    180170
     
    199189      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, RelativeNumberOfEvaluatedSamplesParameterDescription, new PercentValue(1)));
    200190      Parameters.Add(new FixedValueParameter<BoolValue>(ApplyLinearScalingParameterName, ApplyLinearScalingParameterDescription, new BoolValue(false)));
    201       Parameters.Add(new FixedValueParameter<TransformationCollection>(TransformationsParameterName, TransformationsParameterDescrioption, new TransformationCollection()));
    202191
    203192      SymbolicExpressionTreeInterpreterParameter.Hidden = true;
     
    205194      MaximumFunctionDefinitionsParameter.Hidden = true;
    206195      ApplyLinearScalingParameter.Hidden = true;
    207       TransformationsParameter.Hidden = true;
    208196
    209197      SymbolicExpressionTreeGrammar = new TypeCoherentExpressionGrammar();
  • branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis.Trading/3.4/ProblemData.cs

    r9989 r10695  
    16271627    }
    16281628
    1629     public ProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, string targetVariable)
    1630       : base(dataset, allowedInputVariables) {
     1629    public ProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, string targetVariable, IEnumerable<ITransformation> transformations = null)
     1630      : base(dataset, allowedInputVariables, transformations ?? Enumerable.Empty<ITransformation>()) {
    16311631      var variables = InputVariables.Select(x => x.AsReadOnly()).ToList();
    16321632      Parameters.Add(new ConstrainedValueParameter<StringValue>(PriceChangeVariableParameterName, new ItemSet<StringValue>(variables), variables.First(x => x.Value == targetVariable)));
  • branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationProblemData.cs

    r9456 r10695  
    291291    }
    292292
    293     public ClassificationProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, string targetVariable)
    294       : base(dataset, allowedInputVariables) {
     293    public ClassificationProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, string targetVariable, IEnumerable<ITransformation> transformations = null)
     294      : base(dataset, allowedInputVariables, transformations ?? Enumerable.Empty<ITransformation>()) {
    295295      var validTargetVariableValues = CheckVariablesForPossibleTargetVariables(dataset).Select(x => new StringValue(x).AsReadOnly()).ToList();
    296296      var target = validTargetVariableValues.Where(x => x.Value == targetVariable).DefaultIfEmpty(validTargetVariableValues.First()).First();
  • branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Clustering/ClusteringProblemData.cs

    r9456 r10695  
    2121
    2222using System.Collections.Generic;
     23using System.Linq;
    2324using HeuristicLab.Common;
    2425using HeuristicLab.Core;
     
    8687    }
    8788
    88     public ClusteringProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables)
    89       : base(dataset, allowedInputVariables) {
     89    public ClusteringProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, IEnumerable<ITransformation> transformations = null)
     90      : base(dataset, allowedInputVariables, transformations ?? Enumerable.Empty<ITransformation>()) {
    9091    }
    9192  }
  • branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs

    r9456 r10695  
    3737    protected const string TrainingPartitionParameterName = "TrainingPartition";
    3838    protected const string TestPartitionParameterName = "TestPartition";
     39    protected const string TransformationsParameterName = "Transformations";
    3940
    4041    #region parameter properites
     
    5051    public IFixedValueParameter<IntRange> TestPartitionParameter {
    5152      get { return (IFixedValueParameter<IntRange>)Parameters[TestPartitionParameterName]; }
     53    }
     54    public IFixedValueParameter<ReadOnlyItemCollection<ITransformation>> TransformationsParameter {
     55      get { return (IFixedValueParameter<ReadOnlyItemCollection<ITransformation>>)Parameters[TransformationsParameterName]; }
    5256    }
    5357    #endregion
     
    8892    }
    8993
     94    public IEnumerable<ITransformation> Transformations {
     95      get { return TransformationsParameter.Value; }
     96    }
     97
    9098    public virtual bool IsTrainingSample(int index) {
    9199      return index >= 0 && index < Dataset.Rows &&
     
    111119    private void AfterDeserialization() {
    112120      RegisterEventHandlers();
     121
     122      if (!Parameters.ContainsKey(TransformationsParameterName)) {
     123        Parameters.Add(new FixedValueParameter<ReadOnlyItemCollection<ITransformation>>(TransformationsParameterName, "", new ItemCollection<ITransformation>().AsReadOnly()));
     124      }
    113125    }
    114126
    115     protected DataAnalysisProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables) {
     127    protected DataAnalysisProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, IEnumerable<ITransformation> transformations) {
    116128      if (dataset == null) throw new ArgumentNullException("The dataset must not be null.");
    117129      if (allowedInputVariables == null) throw new ArgumentNullException("The allowedInputVariables must not be null.");
     
    119131      if (allowedInputVariables.Except(dataset.DoubleVariables).Any())
    120132        throw new ArgumentException("All allowed input variables must be present in the dataset and of type double.");
     133
     134      if (transformations == null) throw new ArgumentNullException("The transformations must not be null.");
    121135
    122136      var inputVariables = new CheckedItemList<StringValue>(dataset.DoubleVariables.Select(x => new StringValue(x)));
     
    129143      int testPartitionEnd = dataset.Rows;
    130144
     145      var transformationsCollection = new ItemCollection<ITransformation>(transformations);
     146
    131147      Parameters.Add(new FixedValueParameter<Dataset>(DatasetParameterName, "", dataset));
    132148      Parameters.Add(new FixedValueParameter<ReadOnlyCheckedItemList<StringValue>>(InputVariablesParameterName, "", inputVariables.AsReadOnly()));
    133149      Parameters.Add(new FixedValueParameter<IntRange>(TrainingPartitionParameterName, "", new IntRange(trainingPartitionStart, trainingPartitionEnd)));
    134150      Parameters.Add(new FixedValueParameter<IntRange>(TestPartitionParameterName, "", new IntRange(testPartitionStart, testPartitionEnd)));
     151      Parameters.Add(new FixedValueParameter<ReadOnlyItemCollection<ITransformation>>(TransformationsParameterName, "", transformationsCollection.AsReadOnly()));
    135152
    136153      ((ValueParameter<Dataset>)DatasetParameter).ReactOnValueToStringChangedAndValueItemImageChanged = false;
  • branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionProblemData.cs

    r9456 r10695  
    129129    }
    130130
    131     public RegressionProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, string targetVariable)
    132       : base(dataset, allowedInputVariables) {
     131    public RegressionProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, string targetVariable, IEnumerable<ITransformation> transformations = null)
     132      : base(dataset, allowedInputVariables, transformations ?? Enumerable.Empty<ITransformation>()) {
    133133      var variables = InputVariables.Select(x => x.AsReadOnly()).ToList();
    134134      Parameters.Add(new ConstrainedValueParameter<StringValue>(TargetVariableParameterName, new ItemSet<StringValue>(variables), variables.Where(x => x.Value == targetVariable).First()));
  • branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisProblemData.cs

    r9456 r10695  
    3939    IEnumerable<int> TestIndices { get; }
    4040
     41    IEnumerable<ITransformation> Transformations { get; }
     42
    4143    bool IsTrainingSample(int index);
    4244    bool IsTestSample(int index);
  • branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/ITransformation.cs

    r10694 r10695  
    2020#endregion
    2121
     22using System.Collections;
    2223using System.Collections.Generic;
    2324using HeuristicLab.Core;
    2425
    2526namespace HeuristicLab.Problems.DataAnalysis {
    26   public interface ITransformation<T> : IParameterizedItem {
     27  public interface ITransformation : IParameterizedItem {
    2728    string Column { get; }
     29  }
     30
     31  public interface ITransformation<T> : ITransformation {
    2832    IEnumerable<T> Apply(IEnumerable<T> data);
    2933    IEnumerable<T> InverseApply(IEnumerable<T> data);
Note: See TracChangeset for help on using the changeset viewer.