1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Linq;
|
---|
4 | using System.Text;
|
---|
5 | using System.IO;
|
---|
6 | using HeuristicLab.Modeling.Database;
|
---|
7 | using HeuristicLab.GP;
|
---|
8 | using HeuristicLab.GP.Interfaces;
|
---|
9 | using HeuristicLab.GP.StructureIdentification;
|
---|
10 | using System.Diagnostics;
|
---|
11 |
|
---|
12 | namespace CedmaImporter {
|
---|
13 | public class Importer {
|
---|
14 |
|
---|
15 | private const int ID_COLUMN = 0;
|
---|
16 | private const int FILENAME_COLUMN = 1;
|
---|
17 | private const int TARGETVARIABLE_COLUMN = 2;
|
---|
18 | private const int ALGORITHM_COLUMN = 3;
|
---|
19 | private const int RESULTS_IDX = 4;
|
---|
20 | private const int TRAINING_MSE = 4;
|
---|
21 | private const int VALIDATION_MSE = TRAINING_MSE + 1;
|
---|
22 | private const int TEST_MSE = TRAINING_MSE + 2;
|
---|
23 |
|
---|
24 | private const int TRAINING_R2 = 7;
|
---|
25 | private const int VALIDATION_R2 = TRAINING_R2 + 1;
|
---|
26 | private const int TEST_R2 = TRAINING_R2 + 2;
|
---|
27 |
|
---|
28 | private const int TRAINING_MAPE = 10;
|
---|
29 | private const int VALIDATION_MAPE = TRAINING_MAPE + 1;
|
---|
30 | private const int TEST_MAPE = TRAINING_MAPE + 2;
|
---|
31 |
|
---|
32 | private const int TRAINING_MAPRE = 13;
|
---|
33 | private const int VALIDATION_MAPRE = TRAINING_MAPRE + 1;
|
---|
34 | private const int TEST_MAPRE = TRAINING_MAPRE + 2;
|
---|
35 |
|
---|
36 | private const int TRAINING_VAF = 16;
|
---|
37 | private const int VALIDATION_VAF = TRAINING_VAF + 1;
|
---|
38 | private const int TEST_VAF = TRAINING_VAF + 2;
|
---|
39 |
|
---|
40 | private const int VARIABLE_IMPACTS = 19;
|
---|
41 | private const string EVALUATION_IMPACT = "EvaluationImpact";
|
---|
42 | private const string QUALITY_IMPACT = "QualityImpact";
|
---|
43 |
|
---|
44 | private string[] results;
|
---|
45 | private string[] inputVariables;
|
---|
46 | private HeuristicLab.CEDMA.Server.Problem problem;
|
---|
47 |
|
---|
48 |
|
---|
49 | public Importer(HeuristicLab.CEDMA.Server.Problem problem) {
|
---|
50 | this.problem = problem;
|
---|
51 | }
|
---|
52 |
|
---|
53 | public void Import(string fileName, string dirName) {
|
---|
54 | string outputFileName = Path.Combine(dirName, Path.GetFileNameWithoutExtension(fileName) + ".sdf");
|
---|
55 | string connectionString = @"Data Source=" + outputFileName;
|
---|
56 |
|
---|
57 | var database = new HeuristicLab.Modeling.Database.SQLServerCompact.DatabaseService(connectionString);
|
---|
58 | IProblem p = database.GetOrCreateProblem(problem.Dataset);
|
---|
59 | using (StreamReader reader = File.OpenText(fileName)) {
|
---|
60 | ReadResultsAndInputVariables(reader, database);
|
---|
61 | reader.ReadLine();
|
---|
62 | ImportAllModels(dirName, reader, database);
|
---|
63 | }
|
---|
64 | }
|
---|
65 |
|
---|
66 | private void ReadResultsAndInputVariables(StreamReader reader, IModelingDatabase database) {
|
---|
67 | string[] columns = reader.ReadLine().Split(';').Select(x=>x.Trim()).ToArray();
|
---|
68 | results = new string[columns.Length];
|
---|
69 | inputVariables = new string[columns.Length];
|
---|
70 | for (int i = RESULTS_IDX; i < columns.Length; i++) {
|
---|
71 | string resultColumn = columns[i].Trim();
|
---|
72 | if (resultColumn.Contains(" ")) {
|
---|
73 | string[] tokens = resultColumn.Split(' ');
|
---|
74 | string variableName = tokens[1].Trim(' ','(',')');
|
---|
75 | string variableResultName = tokens[0].Trim();
|
---|
76 | inputVariables[i] = variableName;
|
---|
77 | results[i] = variableResultName;
|
---|
78 | } else {
|
---|
79 | // normal result value
|
---|
80 | results[i] = resultColumn;
|
---|
81 | }
|
---|
82 | }
|
---|
83 | }
|
---|
84 |
|
---|
85 | private void ImportAllModels(string dirName, StreamReader reader, IModelingDatabase database) {
|
---|
86 | while (!reader.EndOfStream) {
|
---|
87 | string[] modelData = reader.ReadLine().Split(';','\t').Select(x => x.Trim()).ToArray();
|
---|
88 | int id = int.Parse(modelData[ID_COLUMN]);
|
---|
89 | string targetVariableName = modelData[TARGETVARIABLE_COLUMN].Trim();
|
---|
90 | string algoName = modelData[ALGORITHM_COLUMN].Trim();
|
---|
91 | try {
|
---|
92 | HeuristicLab.Modeling.Model model = new HeuristicLab.Modeling.Model();
|
---|
93 | model.TargetVariable = targetVariableName;
|
---|
94 | model.Dataset = problem.Dataset;
|
---|
95 | model.TrainingSamplesStart = problem.TrainingSamplesStart;
|
---|
96 | model.TrainingSamplesEnd = problem.TrainingSamplesEnd;
|
---|
97 | model.ValidationSamplesStart = problem.ValidationSamplesStart;
|
---|
98 | model.ValidationSamplesEnd = problem.ValidationSamplesEnd;
|
---|
99 | model.TestSamplesStart = problem.TestSamplesStart;
|
---|
100 | model.TestSamplesEnd = problem.TestSamplesEnd;
|
---|
101 |
|
---|
102 |
|
---|
103 | model.Data = ParseModel(dirName, modelData[FILENAME_COLUMN].Trim(), algoName);
|
---|
104 |
|
---|
105 | SetModelResults(model, modelData);
|
---|
106 | SetInputVariableResults(model, modelData);
|
---|
107 |
|
---|
108 | database.Persist(model, algoName, null);
|
---|
109 | }
|
---|
110 | catch (Exception ex) {
|
---|
111 | }
|
---|
112 | }
|
---|
113 | }
|
---|
114 |
|
---|
115 | private void SetInputVariableResults(HeuristicLab.Modeling.Model model, string[] modelData) {
|
---|
116 | for (int i = VARIABLE_IMPACTS; i < modelData.Length; i++) {
|
---|
117 | if (!string.IsNullOrEmpty(modelData[i])) {
|
---|
118 | model.AddInputVariables(inputVariables[i]);
|
---|
119 | if (results[i] == EVALUATION_IMPACT) {
|
---|
120 | model.SetVariableEvaluationImpact(inputVariables[i], double.Parse(modelData[i]));
|
---|
121 | } else if (results[i] == QUALITY_IMPACT) {
|
---|
122 | model.SetVariableQualityImpact(inputVariables[i], double.Parse(modelData[i]));
|
---|
123 | } else throw new FormatException();
|
---|
124 | }
|
---|
125 | }
|
---|
126 | }
|
---|
127 |
|
---|
128 | private void SetModelResults(HeuristicLab.Modeling.Model model, string[] modelData) {
|
---|
129 | model.TrainingMeanSquaredError = double.Parse(modelData[TRAINING_MSE]);
|
---|
130 | model.ValidationMeanSquaredError = double.Parse(modelData[VALIDATION_MSE]);
|
---|
131 | model.TestMeanSquaredError = double.Parse(modelData[TEST_MSE]);
|
---|
132 |
|
---|
133 | model.TrainingCoefficientOfDetermination = double.Parse(modelData[TRAINING_R2]);
|
---|
134 | model.ValidationCoefficientOfDetermination = double.Parse(modelData[VALIDATION_R2]);
|
---|
135 | model.TestCoefficientOfDetermination = double.Parse(modelData[TEST_R2]);
|
---|
136 |
|
---|
137 | model.TrainingMeanAbsolutePercentageError = double.Parse(modelData[TRAINING_MAPE]);
|
---|
138 | model.ValidationMeanAbsolutePercentageError = double.Parse(modelData[VALIDATION_MAPE]);
|
---|
139 | model.TestMeanAbsolutePercentageError = double.Parse(modelData[TEST_MAPE]);
|
---|
140 |
|
---|
141 | model.TrainingMeanAbsolutePercentageOfRangeError = double.Parse(modelData[TRAINING_MAPRE]);
|
---|
142 | model.ValidationMeanAbsolutePercentageOfRangeError = double.Parse(modelData[VALIDATION_MAPRE]);
|
---|
143 | model.TestMeanAbsolutePercentageOfRangeError = double.Parse(modelData[TEST_MAPRE]);
|
---|
144 |
|
---|
145 | model.TrainingVarianceAccountedFor = double.Parse(modelData[TRAINING_VAF]);
|
---|
146 | model.ValidationVarianceAccountedFor = double.Parse(modelData[VALIDATION_VAF]);
|
---|
147 | model.TestVarianceAccountedFor = double.Parse(modelData[TEST_VAF]);
|
---|
148 | }
|
---|
149 |
|
---|
150 | private HeuristicLab.Core.IItem ParseModel(string dirName, string modelFileName, string algoName) {
|
---|
151 | foreach (char c in Path.GetInvalidFileNameChars()) {
|
---|
152 | modelFileName = modelFileName.Replace(c, '_');
|
---|
153 | }
|
---|
154 | if (algoName == "SupportVectorRegression") {
|
---|
155 | HeuristicLab.Data.SVMModel model = new HeuristicLab.Data.SVMModel();
|
---|
156 | model.Model = SVM.Model.Read(Path.Combine(dirName, modelFileName) + ".svm.model.txt");
|
---|
157 | model.RangeTransform = SVM.RangeTransform.Read(Path.Combine(dirName, modelFileName) + ".svm.transform.txt");
|
---|
158 | return model;
|
---|
159 | } else {
|
---|
160 | SymbolicExpressionImporter sexpImporter = new SymbolicExpressionImporter();
|
---|
161 | GeneticProgrammingModel model = new GeneticProgrammingModel();
|
---|
162 | using (StreamReader reader = File.OpenText(Path.Combine(dirName, modelFileName) + ".gp.txt")) {
|
---|
163 | model.FunctionTree = sexpImporter.Import(reader);
|
---|
164 | }
|
---|
165 | return model;
|
---|
166 | }
|
---|
167 | }
|
---|
168 | }
|
---|
169 | }
|
---|