1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Linq;
|
---|
4 | using System.Text;
|
---|
5 | using System.IO;
|
---|
6 | using HeuristicLab.Modeling.Database;
|
---|
7 | using HeuristicLab.GP;
|
---|
8 | using HeuristicLab.GP.Interfaces;
|
---|
9 | using HeuristicLab.GP.StructureIdentification;
|
---|
10 | using System.Diagnostics;
|
---|
11 | using HeuristicLab.Modeling;
|
---|
12 |
|
---|
13 | namespace CedmaImporter {
|
---|
14 | public class Importer {
|
---|
15 |
|
---|
16 | private const int ID_COLUMN = 0;
|
---|
17 | private const int FILENAME_COLUMN = 1;
|
---|
18 | private const int TARGETVARIABLE_COLUMN = 2;
|
---|
19 | private const int ALGORITHM_COLUMN = 3;
|
---|
20 | private const int RESULTS_IDX = 4;
|
---|
21 | private const int TRAINING_MSE = 4;
|
---|
22 | private const int VALIDATION_MSE = TRAINING_MSE + 1;
|
---|
23 | private const int TEST_MSE = TRAINING_MSE + 2;
|
---|
24 |
|
---|
25 | private const int TRAINING_R2 = 7;
|
---|
26 | private const int VALIDATION_R2 = TRAINING_R2 + 1;
|
---|
27 | private const int TEST_R2 = TRAINING_R2 + 2;
|
---|
28 |
|
---|
29 | private const int TRAINING_MAPE = 10;
|
---|
30 | private const int VALIDATION_MAPE = TRAINING_MAPE + 1;
|
---|
31 | private const int TEST_MAPE = TRAINING_MAPE + 2;
|
---|
32 |
|
---|
33 | private const int TRAINING_MAPRE = 13;
|
---|
34 | private const int VALIDATION_MAPRE = TRAINING_MAPRE + 1;
|
---|
35 | private const int TEST_MAPRE = TRAINING_MAPRE + 2;
|
---|
36 |
|
---|
37 | private const int TRAINING_VAF = 16;
|
---|
38 | private const int VALIDATION_VAF = TRAINING_VAF + 1;
|
---|
39 | private const int TEST_VAF = TRAINING_VAF + 2;
|
---|
40 |
|
---|
41 | private const int VARIABLE_IMPACTS = 19;
|
---|
42 | private const string EVALUATION_IMPACT = "EvaluationImpact";
|
---|
43 | private const string QUALITY_IMPACT = "QualityImpact";
|
---|
44 |
|
---|
45 | private string[] results;
|
---|
46 | private string[] inputVariables;
|
---|
47 | private HeuristicLab.CEDMA.Server.Problem problem;
|
---|
48 |
|
---|
49 |
|
---|
50 | public Importer(HeuristicLab.CEDMA.Server.Problem problem) {
|
---|
51 | this.problem = problem;
|
---|
52 | }
|
---|
53 |
|
---|
54 | public void Import(string fileName, string dirName) {
|
---|
55 | string outputFileName = Path.Combine(dirName, Path.GetFileNameWithoutExtension(fileName) + ".sdf");
|
---|
56 | string connectionString = @"Data Source=" + outputFileName;
|
---|
57 |
|
---|
58 | var database = new HeuristicLab.Modeling.Database.SQLServerCompact.DatabaseService(connectionString);
|
---|
59 | IProblem p = database.GetOrCreateProblem(problem.Dataset);
|
---|
60 | using (StreamReader reader = File.OpenText(fileName)) {
|
---|
61 | ReadResultsAndInputVariables(reader, database);
|
---|
62 | reader.ReadLine();
|
---|
63 | ImportAllModels(dirName, reader, database);
|
---|
64 | }
|
---|
65 | database.Disconnect();
|
---|
66 | }
|
---|
67 |
|
---|
68 | private void ReadResultsAndInputVariables(StreamReader reader, IModelingDatabase database) {
|
---|
69 | string[] columns = reader.ReadLine().Split(';').Select(x=>x.Trim()).ToArray();
|
---|
70 | results = new string[columns.Length];
|
---|
71 | inputVariables = new string[columns.Length];
|
---|
72 | for (int i = RESULTS_IDX; i < columns.Length; i++) {
|
---|
73 | string resultColumn = columns[i].Trim();
|
---|
74 | if (resultColumn.Contains(" ")) {
|
---|
75 | string[] tokens = resultColumn.Split(' ');
|
---|
76 | string variableName = tokens[1].Trim(' ','(',')');
|
---|
77 | string variableResultName = tokens[0].Trim();
|
---|
78 | inputVariables[i] = variableName;
|
---|
79 | results[i] = variableResultName;
|
---|
80 | } else {
|
---|
81 | // normal result value
|
---|
82 | results[i] = resultColumn;
|
---|
83 | }
|
---|
84 | }
|
---|
85 | }
|
---|
86 |
|
---|
87 | private void ImportAllModels(string dirName, StreamReader reader, IModelingDatabase database) {
|
---|
88 | while (!reader.EndOfStream) {
|
---|
89 | string[] modelData = reader.ReadLine().Split(';','\t').Select(x => x.Trim()).ToArray();
|
---|
90 | int id = int.Parse(modelData[ID_COLUMN]);
|
---|
91 | string targetVariableName = modelData[TARGETVARIABLE_COLUMN].Trim();
|
---|
92 | string algoName = modelData[ALGORITHM_COLUMN].Trim();
|
---|
93 | try {
|
---|
94 | HeuristicLab.Modeling.IAnalyzerModel model = new AnalyzerModel();
|
---|
95 | model.TargetVariable = targetVariableName;
|
---|
96 | model.Dataset = problem.Dataset;
|
---|
97 | model.TrainingSamplesStart = problem.TrainingSamplesStart;
|
---|
98 | model.TrainingSamplesEnd = problem.TrainingSamplesEnd;
|
---|
99 | model.ValidationSamplesStart = problem.ValidationSamplesStart;
|
---|
100 | model.ValidationSamplesEnd = problem.ValidationSamplesEnd;
|
---|
101 | model.TestSamplesStart = problem.TestSamplesStart;
|
---|
102 | model.TestSamplesEnd = problem.TestSamplesEnd;
|
---|
103 |
|
---|
104 | SetModelResults(model, modelData);
|
---|
105 | SetInputVariableResults(model, modelData);
|
---|
106 |
|
---|
107 | model.Predictor = CreatePredictor(targetVariableName, dirName, modelData[FILENAME_COLUMN].Trim(), algoName);
|
---|
108 | database.Persist(model, algoName, null);
|
---|
109 | }
|
---|
110 | catch (Exception ex) {
|
---|
111 | }
|
---|
112 | }
|
---|
113 | }
|
---|
114 |
|
---|
115 | private void SetInputVariableResults(HeuristicLab.Modeling.IAnalyzerModel model, string[] modelData) {
|
---|
116 | for (int i = VARIABLE_IMPACTS; i < modelData.Length; i++) {
|
---|
117 | if (!string.IsNullOrEmpty(modelData[i])) {
|
---|
118 | model.AddInputVariable(inputVariables[i]);
|
---|
119 | if (results[i] == EVALUATION_IMPACT) {
|
---|
120 | model.SetVariableEvaluationImpact(inputVariables[i], double.Parse(modelData[i]));
|
---|
121 | } else if (results[i] == QUALITY_IMPACT) {
|
---|
122 | model.SetVariableQualityImpact(inputVariables[i], double.Parse(modelData[i]));
|
---|
123 | } else throw new FormatException();
|
---|
124 | }
|
---|
125 | }
|
---|
126 | }
|
---|
127 |
|
---|
128 | private void SetModelResults(HeuristicLab.Modeling.IAnalyzerModel model, string[] modelData) {
|
---|
129 | model.TrainingMeanSquaredError = double.Parse(modelData[TRAINING_MSE]);
|
---|
130 | model.ValidationMeanSquaredError = double.Parse(modelData[VALIDATION_MSE]);
|
---|
131 | model.TestMeanSquaredError = double.Parse(modelData[TEST_MSE]);
|
---|
132 |
|
---|
133 | model.TrainingCoefficientOfDetermination = double.Parse(modelData[TRAINING_R2]);
|
---|
134 | model.ValidationCoefficientOfDetermination = double.Parse(modelData[VALIDATION_R2]);
|
---|
135 | model.TestCoefficientOfDetermination = double.Parse(modelData[TEST_R2]);
|
---|
136 |
|
---|
137 | model.TrainingMeanAbsolutePercentageError = double.Parse(modelData[TRAINING_MAPE]);
|
---|
138 | model.ValidationMeanAbsolutePercentageError = double.Parse(modelData[VALIDATION_MAPE]);
|
---|
139 | model.TestMeanAbsolutePercentageError = double.Parse(modelData[TEST_MAPE]);
|
---|
140 |
|
---|
141 | model.TrainingMeanAbsolutePercentageOfRangeError = double.Parse(modelData[TRAINING_MAPRE]);
|
---|
142 | model.ValidationMeanAbsolutePercentageOfRangeError = double.Parse(modelData[VALIDATION_MAPRE]);
|
---|
143 | model.TestMeanAbsolutePercentageOfRangeError = double.Parse(modelData[TEST_MAPRE]);
|
---|
144 |
|
---|
145 | model.TrainingVarianceAccountedFor = double.Parse(modelData[TRAINING_VAF]);
|
---|
146 | model.ValidationVarianceAccountedFor = double.Parse(modelData[VALIDATION_VAF]);
|
---|
147 | model.TestVarianceAccountedFor = double.Parse(modelData[TEST_VAF]);
|
---|
148 | }
|
---|
149 |
|
---|
150 | private HeuristicLab.Modeling.IPredictor CreatePredictor(string targetVariable, string dirName, string modelFileName, string algoName) {
|
---|
151 | foreach (char c in Path.GetInvalidFileNameChars()) {
|
---|
152 | modelFileName = modelFileName.Replace(c, '_');
|
---|
153 | }
|
---|
154 | if (algoName == "SupportVectorRegression") {
|
---|
155 | //HeuristicLab.SupportVectorMachines.SVMModel model = new HeuristicLab.SupportVectorMachines.SVMModel();
|
---|
156 | //model.Model = SVM.Model.Read(Path.Combine(dirName, modelFileName) + ".svm.model.txt");
|
---|
157 | //model.RangeTransform = SVM.RangeTransform.Read(Path.Combine(dirName, modelFileName) + ".svm.transform.txt");
|
---|
158 | //return new HeuristicLab.SupportVectorMachines.Predictor(model, targetVariable);
|
---|
159 | throw new FormatException();
|
---|
160 | } else {
|
---|
161 | SymbolicExpressionImporter sexpImporter = new SymbolicExpressionImporter();
|
---|
162 | GeneticProgrammingModel model = new GeneticProgrammingModel();
|
---|
163 | using (StreamReader reader = File.OpenText(Path.Combine(dirName, modelFileName) + ".gp.txt")) {
|
---|
164 | model.FunctionTree = sexpImporter.Import(reader);
|
---|
165 | }
|
---|
166 | return new HeuristicLab.GP.StructureIdentification.Predictor(new HL2TreeEvaluator(), model);
|
---|
167 | }
|
---|
168 | }
|
---|
169 | }
|
---|
170 | }
|
---|