1 | using System;
|
---|
2 | using System.IO;
|
---|
3 | using System.Collections.Generic;
|
---|
4 | using System.Globalization;
|
---|
5 | using System.Linq;
|
---|
6 | using HeuristicLab.Problems.DataAnalysis;
|
---|
7 | using System.Data;
|
---|
8 | using System.Diagnostics;
|
---|
9 | using System.Threading;
|
---|
10 | using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
|
---|
11 |
|
---|
12 | namespace HeuristicLab.Algorithms.DataAnalysis.FastFunctionExtraction {
|
---|
13 | class Program {
|
---|
14 | private static void TestPennML() {
|
---|
15 | string projectDirectoryPath = Directory.GetParent(Environment.CurrentDirectory).Parent.FullName + "../branches/3022-FastFunctionExtraction/TestFFX/";
|
---|
16 | string pennMLDataPath = projectDirectoryPath + "/pennML_data/";
|
---|
17 | string targetPath = projectDirectoryPath + "/results/results.txt";
|
---|
18 | var dInfo = new DirectoryInfo(pennMLDataPath);
|
---|
19 | StreamWriter streamWriter = new StreamWriter(targetPath);
|
---|
20 | streamWriter.WriteLine("dataset,algorithm,num_bases,train_mse,train_mae,test_mse,test_mae,runtime");
|
---|
21 | streamWriter.Close();
|
---|
22 | int len = dInfo.GetFiles().Length;
|
---|
23 | int idx = 0;
|
---|
24 | // foreach dataset
|
---|
25 | foreach (var fInfo in dInfo.GetFiles()) {
|
---|
26 | //if (fInfo.Name != "1089_USCrime.csv") continue;
|
---|
27 | Console.Write($"{++idx}/{len}\t{fInfo.Name,-35}:");
|
---|
28 | RunFFXOnDataset(fInfo, targetPath, out var elapsedSeconds);
|
---|
29 | Console.WriteLine($"{elapsedSeconds + " sec",-15}");
|
---|
30 | }
|
---|
31 | }
|
---|
32 |
|
---|
33 | private static void SaveAccuracyInFile(ISymbolicRegressionSolution[] regressionSolutions, int[] numBases, double runtime, string filePath, string problemName) {
|
---|
34 | if (filePath == "") return;
|
---|
35 | CultureInfo culture = new CultureInfo("en-US");
|
---|
36 | StreamWriter sw = new StreamWriter(filePath, true);
|
---|
37 | int i = 0;
|
---|
38 | foreach (var solution in regressionSolutions) {
|
---|
39 | string outputStr = String.Join(",", new[]{
|
---|
40 | problemName.Substring(0, problemName.Length - 4),
|
---|
41 | "hl_ffx",
|
---|
42 | numBases[i++].ToString(),
|
---|
43 | solution.TrainingMeanSquaredError.ToString(culture),
|
---|
44 | solution.TrainingMeanAbsoluteError.ToString(culture),
|
---|
45 | solution.TestMeanSquaredError.ToString(culture),
|
---|
46 | solution.TestMeanAbsoluteError.ToString(culture),
|
---|
47 | runtime.ToString(culture)
|
---|
48 | });
|
---|
49 | sw.WriteLine(outputStr, culture);
|
---|
50 | }
|
---|
51 | sw.Dispose();
|
---|
52 | Thread.Sleep(100); // to prevent race conditions with stream writers
|
---|
53 | }
|
---|
54 |
|
---|
55 | private static void RunFFXOnDataset(FileInfo fInfo, string outFilePath, out double elapsedSeconds) {
|
---|
56 | var data = ParseProblemDataFromFile(fInfo.FullName);
|
---|
57 | Stopwatch sw = new Stopwatch();
|
---|
58 | sw.Start();
|
---|
59 | var regressionModels = FastFunctionExtraction.Fit(data, 0.95, out var numBases, true, true, true, true, true, maxNumBases: 10).ToArray();
|
---|
60 | sw.Stop();
|
---|
61 | var regressionSolutions = regressionModels.Select(model => new SymbolicRegressionSolution(model, data)).ToArray();
|
---|
62 | elapsedSeconds = sw.Elapsed.TotalSeconds;
|
---|
63 | SaveAccuracyInFile(regressionSolutions, numBases.ToArray(), elapsedSeconds, outFilePath, fInfo.Name);
|
---|
64 | }
|
---|
65 |
|
---|
66 | private static IRegressionProblemData ParseProblemDataFromFile(string filepath, char separator = ';') {
|
---|
67 | CultureInfo culture = new CultureInfo("en-US");
|
---|
68 | var reader = new StreamReader(filepath);
|
---|
69 | var variables = reader.ReadLine().Split(separator);
|
---|
70 | var vals = Enumerable.Range(0, variables.Length).Select(_ => new List<double>()).ToArray();
|
---|
71 | var targetVar = variables.Last();
|
---|
72 | var allowedInputVars = variables.Where(val => val != targetVar);
|
---|
73 | string line;
|
---|
74 |
|
---|
75 | line = reader.ReadLine();
|
---|
76 | while (!String.IsNullOrWhiteSpace(line)) {
|
---|
77 | int i = 0;
|
---|
78 | foreach (var strVal in line.Split(separator)) {
|
---|
79 | vals[i++].Add(Convert.ToDouble(strVal, culture));
|
---|
80 | }
|
---|
81 | line = reader.ReadLine();
|
---|
82 | }
|
---|
83 | IDataset dataset = new Dataset(variables, vals);
|
---|
84 | var temp = new RegressionProblemData(dataset, allowedInputVars, targetVar);
|
---|
85 | return temp;
|
---|
86 | }
|
---|
87 |
|
---|
88 | static void Main() {
|
---|
89 | TestPennML();
|
---|
90 | Console.WriteLine("Done.");
|
---|
91 | Console.ReadLine();
|
---|
92 | }
|
---|
93 | }
|
---|
94 | }
|
---|