Changeset 1786
- Timestamp:
- 05/13/09 15:38:26 (16 years ago)
- Location:
- trunk/sources
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.DataAnalysis/3.2/Dataset.cs
r1784 r1786 52 52 public int Columns { 53 53 get { return columns; } 54 set { 54 set { 55 55 columns = value; 56 56 if (variableNames == null || variableNames.Length != columns) { … … 72 72 73 73 public void SetValue(int i, int j, double v) { 74 if (v != samples[columns * i + j]) {74 if (v != samples[columns * i + j]) { 75 75 samples[columns * i + j] = v; 76 76 CreateDictionaries(); … … 84 84 scalingFactor = new double[columns]; 85 85 scalingOffset = new double[columns]; 86 for (int i = 0; i < scalingFactor.Length; i++) {86 for (int i = 0; i < scalingFactor.Length; i++) { 87 87 scalingFactor[i] = 1.0; 88 88 scalingOffset[i] = 0.0; … … 110 110 cachedMeans = new Dictionary<int, Dictionary<int, double>>[columns]; 111 111 cachedRanges = new Dictionary<int, Dictionary<int, double>>[columns]; 112 for (int i = 0; i < columns; i++) {112 for (int i = 0; i < columns; i++) { 113 113 cachedMeans[i] = new Dictionary<int, Dictionary<int, double>>(); 114 114 cachedRanges[i] = new Dictionary<int, Dictionary<int, double>>(); … … 176 176 177 177 variableNames = ParseVariableNamesString(node.Attributes["VariableNames"].Value); 178 if (node.Attributes["ScalingFactors"] != null)178 if (node.Attributes["ScalingFactors"] != null) 179 179 scalingFactor = ParseDoubleString(node.Attributes["ScalingFactors"].Value); 180 180 else { 181 181 scalingFactor = new double[columns]; // compatibility with old serialization format 182 for (int i = 0; i < scalingFactor.Length; i++) scalingFactor[i] = 1.0;183 } 184 if (node.Attributes["ScalingOffsets"] != null)182 for (int i = 0; i < scalingFactor.Length; i++) scalingFactor[i] = 1.0; 183 } 184 if (node.Attributes["ScalingOffsets"] != null) 185 185 scalingOffset = ParseDoubleString(node.Attributes["ScalingOffsets"].Value); 186 186 else { 187 187 scalingOffset = new double[columns]; // compatibility with old serialization format 188 for (int i = 0; i < scalingOffset.Length; i++) scalingOffset[i] = 0.0;188 for (int i = 0; i < scalingOffset.Length; i++) scalingOffset[i] = 0.0; 189 189 } 190 190 191 191 string[] tokens = node.InnerText.Split(';'); 192 if (tokens.Length != rows * columns) throw new FormatException();192 if (tokens.Length != rows * columns) throw new FormatException(); 193 193 samples = new double[rows * columns]; 194 for (int row = 0; row < rows; row++) {195 for (int column = 0; column < columns; column++) {196 if (double.TryParse(tokens[row * columns + column], NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out samples[row * columns + column]) == false) {194 for (int row = 0; row < rows; row++) { 195 for (int column = 0; column < columns; column++) { 196 if (double.TryParse(tokens[row * columns + column], NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out samples[row * columns + column]) == false) { 197 197 throw new FormatException("Can't parse " + tokens[row * columns + column] + " as double value."); 198 198 } … … 208 208 private string ToString(NumberFormatInfo format) { 209 209 StringBuilder builder = new StringBuilder(); 210 for (int row = 0; row < rows; row++) {211 for (int column = 0; column < columns; column++) {210 for (int row = 0; row < rows; row++) { 211 for (int column = 0; column < columns; column++) { 212 212 builder.Append(";"); 213 213 builder.Append(samples[row * columns + column].ToString("r", format)); 214 214 } 215 215 } 216 if (builder.Length > 0) builder.Remove(0, 1);216 if (builder.Length > 0) builder.Remove(0, 1); 217 217 return builder.ToString(); 218 218 } … … 220 220 private string GetVariableNamesString() { 221 221 string s = ""; 222 for (int i = 0; i < variableNames.Length; i++) {222 for (int i = 0; i < variableNames.Length; i++) { 223 223 s += variableNames[i] + "; "; 224 224 } 225 225 226 if (variableNames.Length > 0) {226 if (variableNames.Length > 0) { 227 227 s = s.TrimEnd(';', ' '); 228 228 } … … 231 231 private string GetString(double[] xs) { 232 232 string s = ""; 233 for (int i = 0; i < xs.Length; i++) {233 for (int i = 0; i < xs.Length; i++) { 234 234 s += xs[i].ToString("r", CultureInfo.InvariantCulture) + "; "; 235 235 } 236 236 237 if (xs.Length > 0) {237 if (xs.Length > 0) { 238 238 s = s.TrimEnd(';', ' '); 239 239 } … … 244 244 p = p.Trim(); 245 245 string[] tokens = p.Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries); 246 for (int i = 0; i < tokens.Length; i++) tokens[i] = tokens[i].Trim();246 for (int i = 0; i < tokens.Length; i++) tokens[i] = tokens[i].Trim(); 247 247 return tokens; 248 248 } … … 251 251 string[] ss = s.Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries); 252 252 double[] xs = new double[ss.Length]; 253 for (int i = 0; i < xs.Length; i++) {253 for (int i = 0; i < xs.Length; i++) { 254 254 xs[i] = double.Parse(ss[i], CultureInfo.InvariantCulture); 255 255 } … … 262 262 263 263 public double GetMean(int column, int from, int to) { 264 if (!cachedMeans[column].ContainsKey(from) || !cachedMeans[column][from].ContainsKey(to)) {264 if (!cachedMeans[column].ContainsKey(from) || !cachedMeans[column][from].ContainsKey(to)) { 265 265 double[] values = new double[to - from]; 266 for (int sample = from; sample < to; sample++) {266 for (int sample = from; sample < to; sample++) { 267 267 values[sample - from] = GetValue(sample, column); 268 268 } 269 269 double mean = Statistics.Mean(values); 270 if (!cachedMeans[column].ContainsKey(from)) cachedMeans[column][from] = new Dictionary<int, double>();270 if (!cachedMeans[column].ContainsKey(from)) cachedMeans[column][from] = new Dictionary<int, double>(); 271 271 cachedMeans[column][from][to] = mean; 272 272 return mean; … … 281 281 282 282 public double GetRange(int column, int from, int to) { 283 if (!cachedRanges[column].ContainsKey(from) || !cachedRanges[column][from].ContainsKey(to)) {283 if (!cachedRanges[column].ContainsKey(from) || !cachedRanges[column][from].ContainsKey(to)) { 284 284 double[] values = new double[to - from]; 285 for (int sample = from; sample < to; sample++) {285 for (int sample = from; sample < to; sample++) { 286 286 values[sample - from] = GetValue(sample, column); 287 287 } 288 288 double range = Statistics.Range(values); 289 if (!cachedRanges[column].ContainsKey(from)) cachedRanges[column][from] = new Dictionary<int, double>();289 if (!cachedRanges[column].ContainsKey(from)) cachedRanges[column][from] = new Dictionary<int, double>(); 290 290 cachedRanges[column][from][to] = range; 291 291 return range; … … 297 297 public double GetMaximum(int column) { 298 298 double max = Double.NegativeInfinity; 299 for (int i = 0; i < Rows; i++) {299 for (int i = 0; i < Rows; i++) { 300 300 double val = GetValue(i, column); 301 if (val > max) max = val;301 if (!double.IsNaN(val) && val > max) max = val; 302 302 } 303 303 return max; … … 306 306 public double GetMinimum(int column) { 307 307 double min = Double.PositiveInfinity; 308 for (int i = 0; i < Rows; i++) {308 for (int i = 0; i < Rows; i++) { 309 309 double val = GetValue(i, column); 310 if (val < min) min = val;310 if (!double.IsNaN(val) && val < min) min = val; 311 311 } 312 312 return min; … … 314 314 315 315 internal void ScaleVariable(int column) { 316 if (scalingFactor[column] == 1.0 && scalingOffset[column] == 0.0) {316 if (scalingFactor[column] == 1.0 && scalingOffset[column] == 0.0) { 317 317 double min = GetMinimum(column); 318 318 double max = GetMaximum(column); 319 319 double range = max - min; 320 if (range == 0) ScaleVariable(column, 1.0, -min);320 if (range == 0) ScaleVariable(column, 1.0, -min); 321 321 else ScaleVariable(column, 1.0 / range, -min); 322 322 } … … 328 328 scalingFactor[column] = factor; 329 329 scalingOffset[column] = offset; 330 for (int i = 0; i < Rows; i++) {330 for (int i = 0; i < Rows; i++) { 331 331 double origValue = samples[i * columns + column]; 332 332 samples[i * columns + column] = (origValue + offset) * factor; … … 337 337 338 338 internal void UnscaleVariable(int column) { 339 if (scalingFactor[column] != 1.0 || scalingOffset[column]!=0.0) {340 for (int i = 0; i < rows; i++) {339 if (scalingFactor[column] != 1.0 || scalingOffset[column] != 0.0) { 340 for (int i = 0; i < rows; i++) { 341 341 double scaledValue = samples[i * columns + column]; 342 342 samples[i * columns + column] = scaledValue / scalingFactor[column] - scalingOffset[column]; -
trunk/sources/HeuristicLab.DataAnalysis/3.2/Statistics.cs
r1529 r1786 41 41 T minimum = enumerator.Current; 42 42 43 while (enumerator.MoveNext()) {43 while (enumerator.MoveNext()) { 44 44 T current = enumerator.Current; 45 if (current.CompareTo(minimum) < 0) {45 if (current.CompareTo(minimum) < 0) { 46 46 minimum = current; 47 47 } … … 65 65 T maximum = enumerator.Current; 66 66 67 while (enumerator.MoveNext()) {67 while (enumerator.MoveNext()) { 68 68 T current = enumerator.Current; 69 if (current.CompareTo(maximum) > 0) {69 if (current.CompareTo(maximum) > 0) { 70 70 maximum = current; 71 71 } … … 99 99 /// <returns></returns> 100 100 public static double Range(double[] values, int start, int end) { 101 if (start < 0 || start > values.Length || end < 0 || end > values.Length || start > end) {101 if (start < 0 || start > values.Length || end < 0 || end > values.Length || start > end) { 102 102 throw new InvalidOperationException(); 103 103 } … … 105 105 double minimum = values[start]; 106 106 double maximum = minimum; 107 for(int i = start; i < end; i++) { 108 if(values[i] > maximum) { 109 maximum = values[i]; 110 } 111 if(values[i] < minimum) { 112 minimum = values[i]; 107 for (int i = start; i < end; i++) { 108 if (!double.IsNaN(values[i])) { 109 if (values[i] > maximum) { 110 maximum = values[i]; 111 } 112 if (values[i] < minimum) { 113 minimum = values[i]; 114 } 113 115 } 114 116 } … … 124 126 int n = values.Length; 125 127 double sum = 0.0; 126 for (int i = 0; i < n; i++) {127 if (double.IsNaN(values[i])) {128 for (int i = 0; i < n; i++) { 129 if (double.IsNaN(values[i])) { 128 130 throw new NotFiniteNumberException(); 129 131 } else { … … 156 158 /// <returns></returns> 157 159 public static double Mean(double[] values, int start, int end) { 158 if (values.Length == 0) throw new InvalidOperationException();160 if (values.Length == 0) throw new InvalidOperationException(); 159 161 double sum = 0.0; 160 162 int n = 0; 161 for (int i = start; i < end; i++) {162 if (!double.IsNaN(values[i])) {163 for (int i = start; i < end; i++) { 164 if (!double.IsNaN(values[i])) { 163 165 sum += values[i]; 164 166 n++; … … 175 177 /// <returns></returns> 176 178 public static double Median(double[] values) { 177 if (values.Length == 0) throw new InvalidOperationException();179 if (values.Length == 0) throw new InvalidOperationException(); 178 180 int n = values.Length; 179 if (n == 0)181 if (n == 0) 180 182 return 0; 181 183 … … 186 188 187 189 // return the middle element (if n is uneven) or the average of the two middle elements if n is even. 188 if (n % 2 == 1) {190 if (n % 2 == 1) { 189 191 return sortedValues[n / 2]; 190 192 } else { -
trunk/sources/HeuristicLab.GP.StructureIdentification/3.3/BakedTreeEvaluator.cs
r1529 r1786 56 56 public void ResetEvaluator(BakedFunctionTree functionTree, Dataset dataset, int targetVariable, int start, int end, double punishmentFactor) { 57 57 this.dataset = dataset; 58 double maximumPunishment = punishmentFactor * dataset.GetRange(targetVariable );58 double maximumPunishment = punishmentFactor * dataset.GetRange(targetVariable, start, end); 59 59 60 60 // get the mean of the values of the target variable to determine the max and min bounds of the estimated value 61 double targetMean = dataset.GetMean(targetVariable, start, end - 1);61 double targetMean = dataset.GetMean(targetVariable, start, end); 62 62 estimatedValueMin = targetMean - maximumPunishment; 63 63 estimatedValueMax = targetMean + maximumPunishment;
Note: See TracChangeset
for help on using the changeset viewer.