[17738] | 1 | using System;
|
---|
| 2 | using System.IO;
|
---|
| 3 | using System.Collections.Generic;
|
---|
| 4 | using System.Globalization;
|
---|
| 5 | using System.Linq;
|
---|
| 6 | using HeuristicLab.Problems.DataAnalysis;
|
---|
| 7 | using System.Data;
|
---|
| 8 | using System.Diagnostics;
|
---|
| 9 | using System.Threading;
|
---|
| 10 | using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
|
---|
| 11 |
|
---|
| 12 | namespace HeuristicLab.Algorithms.DataAnalysis.FastFunctionExtraction {
|
---|
| 13 | class Program {
|
---|
| 14 | private static void TestPennML() {
|
---|
| 15 | string projectDirectoryPath = Directory.GetParent(Environment.CurrentDirectory).Parent.FullName + "../branches/3022-FastFunctionExtraction/TestFFX/";
|
---|
| 16 | string pennMLDataPath = projectDirectoryPath + "/pennML_data/";
|
---|
| 17 | string targetPath = projectDirectoryPath + "/results/results.txt";
|
---|
| 18 | var dInfo = new DirectoryInfo(pennMLDataPath);
|
---|
| 19 | StreamWriter streamWriter = new StreamWriter(targetPath);
|
---|
| 20 | streamWriter.WriteLine("dataset,algorithm,num_bases,train_mse,train_mae,test_mse,test_mae,runtime");
|
---|
| 21 | streamWriter.Close();
|
---|
| 22 | int len = dInfo.GetFiles().Length;
|
---|
| 23 | int idx = 0;
|
---|
| 24 | // foreach dataset
|
---|
| 25 | foreach (var fInfo in dInfo.GetFiles()) {
|
---|
[17740] | 26 | //if (fInfo.Name != "1089_USCrime.csv") continue;
|
---|
[17738] | 27 | Console.Write($"{++idx}/{len}\t{fInfo.Name,-35}:");
|
---|
| 28 | RunFFXOnDataset(fInfo, targetPath, out var elapsedSeconds);
|
---|
| 29 | Console.WriteLine($"{elapsedSeconds + " sec",-15}");
|
---|
| 30 | }
|
---|
| 31 | }
|
---|
| 32 |
|
---|
| 33 | private static void SaveAccuracyInFile(ISymbolicRegressionSolution[] regressionSolutions, int[] numBases, double runtime, string filePath, string problemName) {
|
---|
| 34 | if (filePath == "") return;
|
---|
| 35 | CultureInfo culture = new CultureInfo("en-US");
|
---|
| 36 | StreamWriter sw = new StreamWriter(filePath, true);
|
---|
| 37 | int i = 0;
|
---|
| 38 | foreach (var solution in regressionSolutions) {
|
---|
| 39 | string outputStr = String.Join(",", new[]{
|
---|
| 40 | problemName.Substring(0, problemName.Length - 4),
|
---|
| 41 | "hl_ffx",
|
---|
| 42 | numBases[i++].ToString(),
|
---|
| 43 | solution.TrainingMeanSquaredError.ToString(culture),
|
---|
| 44 | solution.TrainingMeanAbsoluteError.ToString(culture),
|
---|
| 45 | solution.TestMeanSquaredError.ToString(culture),
|
---|
| 46 | solution.TestMeanAbsoluteError.ToString(culture),
|
---|
| 47 | runtime.ToString(culture)
|
---|
| 48 | });
|
---|
| 49 | sw.WriteLine(outputStr, culture);
|
---|
| 50 | }
|
---|
| 51 | sw.Dispose();
|
---|
| 52 | Thread.Sleep(100); // to prevent race conditions with stream writers
|
---|
| 53 | }
|
---|
| 54 |
|
---|
| 55 | private static void RunFFXOnDataset(FileInfo fInfo, string outFilePath, out double elapsedSeconds) {
|
---|
| 56 | var data = ParseProblemDataFromFile(fInfo.FullName);
|
---|
| 57 | Stopwatch sw = new Stopwatch();
|
---|
| 58 | sw.Start();
|
---|
[17740] | 59 | var regressionModels = FastFunctionExtraction.Fit(data, 0.95, out var numBases, true, true, true, true, true, maxNumBases: 10).ToArray();
|
---|
[17738] | 60 | sw.Stop();
|
---|
| 61 | var regressionSolutions = regressionModels.Select(model => new SymbolicRegressionSolution(model, data)).ToArray();
|
---|
| 62 | elapsedSeconds = sw.Elapsed.TotalSeconds;
|
---|
| 63 | SaveAccuracyInFile(regressionSolutions, numBases.ToArray(), elapsedSeconds, outFilePath, fInfo.Name);
|
---|
| 64 | }
|
---|
| 65 |
|
---|
| 66 | private static IRegressionProblemData ParseProblemDataFromFile(string filepath, char separator = ';') {
|
---|
| 67 | CultureInfo culture = new CultureInfo("en-US");
|
---|
| 68 | var reader = new StreamReader(filepath);
|
---|
| 69 | var variables = reader.ReadLine().Split(separator);
|
---|
| 70 | var vals = Enumerable.Range(0, variables.Length).Select(_ => new List<double>()).ToArray();
|
---|
| 71 | var targetVar = variables.Last();
|
---|
| 72 | var allowedInputVars = variables.Where(val => val != targetVar);
|
---|
| 73 | string line;
|
---|
| 74 |
|
---|
| 75 | line = reader.ReadLine();
|
---|
| 76 | while (!String.IsNullOrWhiteSpace(line)) {
|
---|
| 77 | int i = 0;
|
---|
| 78 | foreach (var strVal in line.Split(separator)) {
|
---|
| 79 | vals[i++].Add(Convert.ToDouble(strVal, culture));
|
---|
| 80 | }
|
---|
| 81 | line = reader.ReadLine();
|
---|
| 82 | }
|
---|
| 83 | IDataset dataset = new Dataset(variables, vals);
|
---|
| 84 | var temp = new RegressionProblemData(dataset, allowedInputVars, targetVar);
|
---|
| 85 | return temp;
|
---|
| 86 | }
|
---|
| 87 |
|
---|
| 88 | static void Main() {
|
---|
| 89 | TestPennML();
|
---|
| 90 | Console.WriteLine("Done.");
|
---|
| 91 | Console.ReadLine();
|
---|
| 92 | }
|
---|
| 93 | }
|
---|
| 94 | }
|
---|