Free cookie consent management tool by TermsFeed Policy Generator

source: branches/3022-FastFunctionExtraction/TestFFX/Program.cs @ 18242

Last change on this file since 18242 was 17740, checked in by lleko, 4 years ago

#3022 add normalization

File size: 4.5 KB
Line 
1using System;
2using System.IO;
3using System.Collections.Generic;
4using System.Globalization;
5using System.Linq;
6using HeuristicLab.Problems.DataAnalysis;
7using System.Data;
8using System.Diagnostics;
9using System.Threading;
10using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
11
12namespace HeuristicLab.Algorithms.DataAnalysis.FastFunctionExtraction {
13    class Program {
14        private static void TestPennML() {
15            string projectDirectoryPath = Directory.GetParent(Environment.CurrentDirectory).Parent.FullName + "../branches/3022-FastFunctionExtraction/TestFFX/";
16            string pennMLDataPath = projectDirectoryPath + "/pennML_data/";
17            string targetPath = projectDirectoryPath + "/results/results.txt";
18            var dInfo = new DirectoryInfo(pennMLDataPath);
19            StreamWriter streamWriter = new StreamWriter(targetPath);
20            streamWriter.WriteLine("dataset,algorithm,num_bases,train_mse,train_mae,test_mse,test_mae,runtime");
21            streamWriter.Close();
22            int len = dInfo.GetFiles().Length;
23            int idx = 0;
24            // foreach dataset
25            foreach (var fInfo in dInfo.GetFiles()) {
26                //if (fInfo.Name != "1089_USCrime.csv") continue;
27                Console.Write($"{++idx}/{len}\t{fInfo.Name,-35}:");
28                RunFFXOnDataset(fInfo, targetPath, out var elapsedSeconds);
29                Console.WriteLine($"{elapsedSeconds + " sec",-15}");
30            }
31        }
32
33        private static void SaveAccuracyInFile(ISymbolicRegressionSolution[] regressionSolutions, int[] numBases, double runtime, string filePath, string problemName) {
34            if (filePath == "") return;
35            CultureInfo culture = new CultureInfo("en-US");
36            StreamWriter sw = new StreamWriter(filePath, true);
37            int i = 0;
38            foreach (var solution in regressionSolutions) {
39                string outputStr = String.Join(",", new[]{
40                    problemName.Substring(0, problemName.Length - 4),
41                    "hl_ffx",
42                    numBases[i++].ToString(),
43                    solution.TrainingMeanSquaredError.ToString(culture),
44                    solution.TrainingMeanAbsoluteError.ToString(culture),
45                    solution.TestMeanSquaredError.ToString(culture),
46                    solution.TestMeanAbsoluteError.ToString(culture),
47                    runtime.ToString(culture)
48                });
49                sw.WriteLine(outputStr, culture);
50            }
51            sw.Dispose();
52            Thread.Sleep(100); // to prevent race conditions with stream writers
53        }
54
55        private static void RunFFXOnDataset(FileInfo fInfo, string outFilePath, out double elapsedSeconds) {
56            var data = ParseProblemDataFromFile(fInfo.FullName);
57            Stopwatch sw = new Stopwatch();
58            sw.Start();
59            var regressionModels = FastFunctionExtraction.Fit(data, 0.95, out var numBases, true, true, true, true, true, maxNumBases: 10).ToArray();
60            sw.Stop();
61            var regressionSolutions = regressionModels.Select(model => new SymbolicRegressionSolution(model, data)).ToArray();
62            elapsedSeconds = sw.Elapsed.TotalSeconds;
63            SaveAccuracyInFile(regressionSolutions, numBases.ToArray(), elapsedSeconds, outFilePath, fInfo.Name);
64        }
65
66        private static IRegressionProblemData ParseProblemDataFromFile(string filepath, char separator = ';') {
67            CultureInfo culture = new CultureInfo("en-US");
68            var reader = new StreamReader(filepath);
69            var variables = reader.ReadLine().Split(separator);
70            var vals = Enumerable.Range(0, variables.Length).Select(_ => new List<double>()).ToArray();
71            var targetVar = variables.Last();
72            var allowedInputVars = variables.Where(val => val != targetVar);
73            string line;
74
75            line = reader.ReadLine();
76            while (!String.IsNullOrWhiteSpace(line)) {
77                int i = 0;
78                foreach (var strVal in line.Split(separator)) {
79                    vals[i++].Add(Convert.ToDouble(strVal, culture));
80                }
81                line = reader.ReadLine();
82            }
83            IDataset dataset = new Dataset(variables, vals);
84            var temp = new RegressionProblemData(dataset, allowedInputVars, targetVar);
85            return temp;
86        }
87
88        static void Main() {
89            TestPennML();
90            Console.WriteLine("Done.");
91            Console.ReadLine();
92        }
93    }
94}
Note: See TracBrowser for help on using the repository browser.