Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/StatisticsLogic.cs @ 10383

Last change on this file since 10383 was 10383, checked in by pfleck, 10 years ago
  • Added ProblemDataCreator for instancing a new DataAnalysisProblemData with changed Dataset etc.
  • Added export functionality to PreprocessingContext. (cloned Algorithm or Problem)
  • Commented out code in StatisticsLogic which breaks the build. :(
File size: 5.3 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using HeuristicLab.Common;
5
6namespace HeuristicLab.DataPreprocessing {
7
8  public class StatisticsLogic : IStatisticsLogic {
9
10    private readonly IPreprocessingData preprocessingData;
11    private readonly ISearchLogic searchLogic;
12
13    public StatisticsLogic(IPreprocessingData thePreprocessingData, ISearchLogic theSearchLogic) {
14      preprocessingData = thePreprocessingData;
15      searchLogic = theSearchLogic;
16    }
17
18    public int GetColumnCount() {
19      return preprocessingData.Columns;
20    }
21
22    public int GetRowCount() {
23      return preprocessingData.Rows;
24    }
25
26    public int GetNumericColumnCount() {
27      int count = 0;
28
29      for (int i = 0; i < preprocessingData.Columns; ++i) {
30        if (preprocessingData.IsType<double>(i)) {
31          ++count;
32        }
33      }
34      return count;
35    }
36
37    public int GetNominalColumnCount() {
38      return preprocessingData.Columns - GetNumericColumnCount();
39    }
40
41    public int GetMissingValueCount() {
42      int count = 0;
43      for (int i = 0; i < preprocessingData.Columns; ++i) {
44        count += GetMissingValueCount(i);
45      }
46      return count;
47    }
48
49    public int GetMissingValueCount(int columnIndex) {
50      return searchLogic.GetMissingValueIndices(columnIndex).Count();
51    }
52
53    public T GetMin<T>(int columnIndex) where T : IComparable<T> {
54      return preprocessingData.GetValues<T>(columnIndex).Min();
55    }
56
57    public T GetMax<T>(int columnIndex) where T : IComparable<T> {
58      return preprocessingData.GetValues<T>(columnIndex).Max();
59    }
60
61    public double GetMedian(int columnIndex) {
62      double median = double.NaN;
63      if (preprocessingData.IsType<double>(columnIndex)) {
64        median = preprocessingData.GetValues<double>(columnIndex).Median();
65      }
66      return median;
67    }
68
69    public double GetAverage(int columnIndex) {
70      double avg = double.NaN;
71      if (preprocessingData.IsType<double>(columnIndex)) {
72        avg = preprocessingData.GetValues<double>(columnIndex).Average();
73      }
74      return avg;
75    }
76
77    public DateTime GetMedianDateTime(int columnIndex) {
78      DateTime median = new DateTime();
79      if (preprocessingData.IsType<DateTime>(columnIndex)) {
80        median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Median());
81      }
82      return median;
83    }
84
85    public DateTime GetAverageDateTime(int columnIndex) {
86      DateTime avg = new DateTime();
87      if (preprocessingData.IsType<DateTime>(columnIndex)) {
88        avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex).Average());
89      }
90      return avg;
91    }
92
93    public T GetMostCommonValue<T>(int columnIndex) {
94      return preprocessingData.GetValues<T>(columnIndex)
95
96                              .GroupBy(x => x)
97                              .OrderByDescending(g => g.Count())
98                              .Select(g => g.Key)
99                              .First();
100    }
101
102
103    public double GetStandardDeviation(int columnIndex) {
104      double stdDev = double.NaN;
105      //TODO: fix me
106      /*
107      if (preprocessingData.IsType<double>(columnIndex)) {
108        stdDev = preprocessingData.GetValues<double>(columnIndex).StandardDeviation();
109      }
110      else if (preprocessingData.IsType<DateTime>(variableName)) {
111        stdDev = GetDateTimeAsSeconds(variableName).StandardDeviation();
112      }
113      */
114      return stdDev;
115    }
116
117    public double GetVariance(int columnIndex) {
118      double variance = double.NaN;
119      //TODO: fix me
120      /*
121      if (preprocessingData.IsType<double>(columnIndex)) {
122        variance = preprocessingData.GetValues<double>(columnIndex).Variance();
123      }
124      else if (preprocessingData.IsType<DateTime>(variableName)) {
125        variance = GetDateTimeAsSeconds(variableName).Variance();
126      }
127      */
128      return variance;
129    }
130
131    public int GetDifferentValuesCount<T>(int columnIndex) {
132      return preprocessingData.GetValues<T>(columnIndex).GroupBy(x => x).Count();
133    }
134
135    public int GetRowMissingValueCount(int rowIndex) {
136      int count = 0;
137      for (int i = 0; i < preprocessingData.Columns; ++i) {
138        if (searchLogic.IsMissingValue(i, rowIndex)) {
139          ++count;
140        }
141      }
142      return count;
143    }
144
145    public string GetVariableName(int columnIndex) {
146      return preprocessingData.GetVariableName(columnIndex);
147    }
148
149    public bool IsType<T>(int columnIndex) {
150      return preprocessingData.IsType<T>(columnIndex);
151    }
152
153    public string GetColumnTypeAsString(int columnIndex) {
154      if (preprocessingData.IsType<double>(columnIndex)) {
155        return "double";
156      } else if (preprocessingData.IsType<string>(columnIndex)) {
157        return "string";
158      } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
159        return "DateTime";
160      }
161      return "Unknown Type";
162    }
163
164    private IEnumerable<double> GetDateTimeAsSeconds(int columnIndex) {
165      return preprocessingData.GetValues<DateTime>(columnIndex).Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond);
166    }
167
168    private DateTime GetSecondsAsDateTime(double seconds) {
169      DateTime dateTime = new DateTime();
170      return dateTime.Add(new TimeSpan(0, 0, (int)seconds));
171    }
172  }
173}
Note: See TracBrowser for help on using the repository browser.