Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
12/11/13 15:55:30 (11 years ago)
Author:
mleitner
Message:

Implement first draft for shuffeling dataset while maintaining Test- and Trainingspartition

Location:
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingData.cs

    r10194 r10218  
    2727using HeuristicLab.Core;
    2828using HeuristicLab.Problems.DataAnalysis;
     29using HeuristicLab.Data;
    2930
    3031namespace HeuristicLab.DataPreprocessing {
     
    3536
    3637    private IList<string> variableNames;
     38    private IntRange trainingPartition;
     39    private IntRange testPartition;
    3740
    3841    private IDictionary<string, int> variableNameIndices;
     
    7174        }
    7275      }
     76
     77      trainingPartition = problemData.TrainingPartition;
     78      testPartition = problemData.TestPartition;
    7379
    7480      trainingToTestRatio = (double)problemData.TrainingPartition.Size / problemData.TestPartition.Size;
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingDataManipulation.cs

    r10193 r10218  
    1 using System;
     1using HeuristicLab.Data;
     2using System;
    23using System.Collections.Generic;
    34using System.Linq;
     
    8788            }
    8889        }
     90
     91        public void ShuffleWithRanges(IEnumerable<IntRange> ranges)
     92        {
     93            // init random outside loop
     94            Random random = new Random();
     95
     96            // process all given ranges - e.g. TrainingPartition, Trainingpartition
     97            foreach (IntRange range in ranges) {
     98                List<int> shuffledIndices = new List<int>();
     99               
     100                // generate random indices used for shuffeling each column
     101                for (int i = range.End; i > range.Start; --i)
     102                {
     103                    int rand = random.Next(range.Start, i);
     104                    shuffledIndices[i] = rand;
     105                }
     106
     107                foreach (string variableName in preprocessingData.VariableNames)
     108                {
     109                    if (preprocessingData.IsType<double>(variableName))
     110                    {
     111                        reOrderToIndices<double>(variableName, shuffledIndices);
     112                    }
     113                    else if (preprocessingData.IsType<string>(variableName))
     114                    {
     115                        reOrderToIndices<string>(variableName, shuffledIndices);
     116                    }
     117                    else if (preprocessingData.IsType<DateTime>(variableName))
     118                    {
     119                        reOrderToIndices<DateTime>(variableName, shuffledIndices);
     120                    }
     121                }
     122            }     
     123        }
     124
     125        public void reOrderToIndices<T>(string variableName, List<int> indices) {
     126            // process all columns equally
     127            for (int i = 0; i < preprocessingData.Rows; i++)
     128            {
     129                int replaceIndex = indices[i];
     130
     131                T tmp = preprocessingData.GetCell<T>(variableName, i);
     132                T replaceValue = preprocessingData.GetCell<T>(variableName, replaceIndex);
     133
     134                preprocessingData.SetCell<T>(variableName, i, replaceValue);
     135                preprocessingData.SetCell<T>(variableName, replaceIndex, tmp);
     136            }
     137        }
    89138    }
    90139}
Note: See TracChangeset for help on using the changeset viewer.