Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/PreprocessingDataStatisticInfo.cs @ 10161

Last change on this file since 10161 was 10161, checked in by rstoll, 10 years ago

renamed to PreprocessingData

File size: 5.5 KB
Line 
1using System;
2using HeuristicLab.DataPreprocessing.Interfaces;
3
4namespace HeuristicLab.DataPreprocessing {
5  class DatasetStatisticInfo : IDatasetStatisticInfo {
6
7    private IPreprocessingData preprocessingData;
8
9    public DatasetStatisticInfo(IPreprocessingData theDataSet) {
10      preprocessingData = theDataSet;
11    }
12
13
14    public int GetColumnCount() {
15      return preprocessingData.Columns;
16    }
17
18    public int GetRowCount() {
19      return preprocessingData.Rows;
20    }
21
22    public int GetNumericColumnCount() {
23      int count = 0;
24      for (int i = 0; i < preprocessingData.Columns; ++i) {
25        if (preprocessingData.IsType<double>(i)) {
26          ++count;
27        }
28      }
29      return count;
30    }
31
32    public int GetNominalColumnCount() {
33      return preprocessingData.Columns - GetNumericColumnCount();
34    }
35
36    public int GetMissingValueCount() {
37      int count = 0;
38      for (int i = 0; i < preprocessingData.Columns; ++i) {
39        count += GetMissingValueCount(i);
40      }
41      return count;
42    }
43
44    public int GetMissingValueCount(int columnIndex) {
45      throw new System.NotImplementedException();
46      //Func<dynamic, bool> isMissingValueFunc;
47      //if (preprocessingData.IsType<double>(columnIndex)) {
48      //  isMissingValueFunc = IsMissingDoubleValue;
49      //} else if (preprocessingData.IsType<string>(columnIndex)) {
50      //  isMissingValueFunc = IsMissingStringValue;
51      //} else if (preprocessingData.IsType<DateTime>(columnIndex)) {
52      //  isMissingValueFunc = isMissingDateTimeValue;
53      //} else {
54      //  throw new ArgumentException("column with index: " + columnIndex + " contains a non supported type.");
55      //}
56
57      //int count = 0;
58      //for (int i = 0; i < preprocessingData.Rows; ++i) {
59      //  if (isMissingValueFunc(preprocessingData.GetCell(i, columnIndex))) {
60      //    ++count;
61      //  }
62      //}
63      //return count;
64    }
65
66    private bool IsMissingDoubleValue(string value) {
67      double dummy;
68      bool couldNotParse = !double.TryParse(value, out dummy);
69      return couldNotParse || double.IsNaN(dummy);
70    }
71
72    private bool IsMissingStringValue(string value) {
73      return string.IsNullOrEmpty(value);
74    }
75
76    private bool isMissingDateTimeValue(string value) {
77      DateTime dateTime;
78      bool couldNotParse = DateTime.TryParse(value, out dateTime);
79      return couldNotParse || dateTime.Equals(DateTime.MinValue);
80    }
81
82    public T GetMin<T>(int columnIndex) where T : IComparable<T> {
83      if (!preprocessingData.IsType<double>(columnIndex)) {
84        throw new ArgumentException("column with index: " + columnIndex + " was assumed to be of type " + typeof(T).Name + " but was different.");
85      }
86      if (typeof(T) == typeof(double)) {
87        return (dynamic)GetMin(columnIndex, double.MaxValue, IsMissingDoubleValue, double.Parse); ;
88      } else if (typeof(T) == typeof(DateTime)) {
89        return (dynamic)GetMin(columnIndex, DateTime.MaxValue, IsMissingDoubleValue, DateTime.Parse);
90      } else {
91        throw new ArgumentException("type of T is not supported");
92      }
93    }
94
95    public T GetMax<T>(int columnIndex) where T : IComparable<T> {
96      if (!preprocessingData.IsType<double>(columnIndex)) {
97        throw new ArgumentException("column with index: " + columnIndex + " was assumed to be of type " + typeof(T).Name + " but was different.");
98      }
99      if (typeof(T) == typeof(double)) {
100        return (dynamic)GetMax(columnIndex, double.MinValue, IsMissingDoubleValue, double.Parse); ;
101      } else if (typeof(T) == typeof(DateTime)) {
102        return (dynamic)GetMax(columnIndex, DateTime.MinValue, IsMissingDoubleValue, DateTime.Parse);
103      } else {
104        throw new ArgumentException("type of T is not supported");
105      }
106    }
107
108    private T GetMin<T>(int columnIndex, T max, Func<string, bool> isMissingValueFunc, Func<string, T> parseFunc) where T : IComparable<T> {
109      throw new System.NotImplementedException();
110      //T min = max;
111      //for (int i = 0; i < preprocessingData.Rows; ++i) {
112      //  var value = preprocessingData.GetValue(i, columnIndex);
113      //  if (!isMissingValueFunc(value)) {
114      //    T parsedValue = parseFunc(value);
115      //    if (parsedValue.CompareTo(min) < 0) {
116      //      min = parsedValue;
117      //    }
118      //  }
119      //}
120      //return min;
121    }
122
123    private T GetMax<T>(int columnIndex, T min, Func<string, bool> isMissingValueFunc, Func<string, T> parseFunc) where T : IComparable<T> {
124      throw new System.NotImplementedException();
125      //T max = min;
126      //for (int i = 0; i < preprocessingData.Rows; ++i) {
127      //  var value = preprocessingData.GetValue(i, columnIndex);
128      //  if (!isMissingValueFunc(value)) {
129      //    T parsedValue = parseFunc(value);
130      //    if (parsedValue.CompareTo(min) > 0) {
131      //      max = parsedValue;
132      //    }
133      //  }
134      //}
135      //return max;
136    }
137
138
139
140
141
142    public double GetMedian(int columnIndex) {
143      throw new System.NotImplementedException();
144    }
145
146    public double GetAverage(int columnIndex) {
147      throw new System.NotImplementedException();
148    }
149
150    public double GetMostCommonValue(int columnIndex) {
151      double result = 0;
152      for (int i = 0; i < preprocessingData.Rows; ++i) {
153
154      }
155      return result;
156    }
157
158    public double GetStandardDeviation(int columnIndex) {
159      throw new System.NotImplementedException();
160    }
161
162  }
163}
Note: See TracBrowser for help on using the repository browser.