Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/PreprocessingDataManipulation.cs @ 10220

Last change on this file since 10220 was 10218, checked in by mleitner, 11 years ago

Implement first draft for shuffeling dataset while maintaining Test- and Trainingspartition

File size: 5.6 KB
Line 
1using HeuristicLab.Data;
2using System;
3using System.Collections.Generic;
4using System.Linq;
5using System.Text;
6
7namespace HeuristicLab.DataPreprocessing.Implementations
8{
9    class PreprocessingDataManipulation
10    {
11        private IPreprocessingData preprocessingData;
12        private StatisticInfo statisticInfo;
13
14        public PreprocessingDataManipulation(IPreprocessingData _prepocessingData) {
15            preprocessingData = _prepocessingData;
16            statisticInfo = new StatisticInfo(preprocessingData);
17        }
18
19        public void ReplaceIndicesByValue<T>(string variableName, IEnumerable<int> indices, T value)
20        {
21            foreach (int index in indices)
22            {
23                preprocessingData.SetCell<T>(variableName, index, value);
24            }
25        }
26
27        public void ReplaceIndicesByAverageValue(string variableName, IEnumerable<int> indices)
28        {
29            double average = statisticInfo.GetAverage(variableName);
30            ReplaceIndicesByValue<double>(variableName, indices, average);
31        }
32
33        public void ReplaceIndicesByMedianValue(string variableName, IEnumerable<int> indices)
34        {
35            double median = statisticInfo.GetMedian(variableName);
36            ReplaceIndicesByValue<double>(variableName, indices, median);
37        }
38
39        public void ReplaceIndicesByRandomValue(string variableName, IEnumerable<int> indices)
40        {
41            Random r = new Random();
42
43            double max = statisticInfo.GetMax<double>(variableName);
44            double min = statisticInfo.GetMin<double>(variableName);
45            double randMultiplier = (max - min);
46            foreach (int index in indices)
47            {
48                double rand = r.NextDouble() * randMultiplier + min;
49                preprocessingData.SetCell<double>(variableName, index, rand);
50            }
51        }
52
53        public void ReplaceIndicesByLinearInterpolationOfNeighbours(string variableName, IEnumerable<int> indices)
54        {
55            int countValues = preprocessingData.GetValues<double>(variableName).Count();
56            foreach (int index in indices)
57            {
58                // dont replace first or last values
59                if (index > 0 && index < countValues)
60                {
61                    double prev = preprocessingData.GetCell<double>(variableName, index - 1);
62                    double next = preprocessingData.GetCell<double>(variableName, index + 1);
63
64                    double interpolated = (prev + next) / 2;
65
66                    preprocessingData.SetCell<double>(variableName, index, interpolated);
67                }
68            }
69        }
70
71        public void ReplaceIndicesByMostCommonValue(string variableName, IEnumerable<int> indices)
72        {
73            if (preprocessingData.IsType<double>(variableName))
74            {
75                ReplaceIndicesByValue<double>(variableName, indices, statisticInfo.GetMostCommonValue<double>(variableName));
76            }
77            else if (preprocessingData.IsType<string>(variableName))
78            {
79                ReplaceIndicesByValue<string>(variableName, indices, statisticInfo.GetMostCommonValue<string>(variableName));
80            }
81            else if (preprocessingData.IsType<DateTime>(variableName))
82            {
83                ReplaceIndicesByValue<DateTime>(variableName, indices, statisticInfo.GetMostCommonValue<DateTime>(variableName));
84            }
85            else
86            {
87                throw new ArgumentException("column with index: " + variableName + " contains a non supported type.");
88            }
89        }
90
91        public void ShuffleWithRanges(IEnumerable<IntRange> ranges)
92        {
93            // init random outside loop
94            Random random = new Random();
95
96            // process all given ranges - e.g. TrainingPartition, Trainingpartition
97            foreach (IntRange range in ranges) {
98                List<int> shuffledIndices = new List<int>();
99               
100                // generate random indices used for shuffeling each column
101                for (int i = range.End; i > range.Start; --i)
102                {
103                    int rand = random.Next(range.Start, i);
104                    shuffledIndices[i] = rand;
105                }
106
107                foreach (string variableName in preprocessingData.VariableNames)
108                {
109                    if (preprocessingData.IsType<double>(variableName))
110                    {
111                        reOrderToIndices<double>(variableName, shuffledIndices);
112                    }
113                    else if (preprocessingData.IsType<string>(variableName))
114                    {
115                        reOrderToIndices<string>(variableName, shuffledIndices);
116                    }
117                    else if (preprocessingData.IsType<DateTime>(variableName))
118                    {
119                        reOrderToIndices<DateTime>(variableName, shuffledIndices);
120                    }
121                }
122            }     
123        }
124
125        public void reOrderToIndices<T>(string variableName, List<int> indices) {
126            // process all columns equally
127            for (int i = 0; i < preprocessingData.Rows; i++)
128            {
129                int replaceIndex = indices[i];
130
131                T tmp = preprocessingData.GetCell<T>(variableName, i);
132                T replaceValue = preprocessingData.GetCell<T>(variableName, replaceIndex);
133
134                preprocessingData.SetCell<T>(variableName, i, replaceValue);
135                preprocessingData.SetCell<T>(variableName, replaceIndex, tmp);
136            }
137        }
138    }
139}
Note: See TracBrowser for help on using the repository browser.