[2259] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
| 3 | using System.Linq;
|
---|
| 4 | using System.Text;
|
---|
| 5 | using System.IO;
|
---|
| 6 | using HeuristicLab.Modeling.Database;
|
---|
[2260] | 7 | using HeuristicLab.GP;
|
---|
| 8 | using HeuristicLab.GP.Interfaces;
|
---|
| 9 | using HeuristicLab.GP.StructureIdentification;
|
---|
| 10 | using System.Diagnostics;
|
---|
[2259] | 11 |
|
---|
| 12 | namespace CedmaImporter {
|
---|
| 13 | public class Importer {
|
---|
| 14 |
|
---|
| 15 | private const int ID_COLUMN = 0;
|
---|
| 16 | private const int FILENAME_COLUMN = 1;
|
---|
| 17 | private const int TARGETVARIABLE_COLUMN = 2;
|
---|
| 18 | private const int ALGORITHM_COLUMN = 3;
|
---|
| 19 | private const int RESULTS_IDX = 4;
|
---|
[2272] | 20 | private const int TRAINING_MSE = 4;
|
---|
| 21 | private const int VALIDATION_MSE = TRAINING_MSE + 1;
|
---|
| 22 | private const int TEST_MSE = TRAINING_MSE + 2;
|
---|
| 23 |
|
---|
| 24 | private const int TRAINING_R2 = 7;
|
---|
| 25 | private const int VALIDATION_R2 = TRAINING_R2 + 1;
|
---|
| 26 | private const int TEST_R2 = TRAINING_R2 + 2;
|
---|
| 27 |
|
---|
| 28 | private const int TRAINING_MAPE = 10;
|
---|
| 29 | private const int VALIDATION_MAPE = TRAINING_MAPE + 1;
|
---|
| 30 | private const int TEST_MAPE = TRAINING_MAPE + 2;
|
---|
| 31 |
|
---|
| 32 | private const int TRAINING_MAPRE = 13;
|
---|
| 33 | private const int VALIDATION_MAPRE = TRAINING_MAPRE + 1;
|
---|
| 34 | private const int TEST_MAPRE = TRAINING_MAPRE + 2;
|
---|
| 35 |
|
---|
| 36 | private const int TRAINING_VAF = 16;
|
---|
| 37 | private const int VALIDATION_VAF = TRAINING_VAF + 1;
|
---|
| 38 | private const int TEST_VAF = TRAINING_VAF + 2;
|
---|
| 39 |
|
---|
| 40 | private const int VARIABLE_IMPACTS = 19;
|
---|
| 41 | private const string EVALUATION_IMPACT = "EvaluationImpact";
|
---|
| 42 | private const string QUALITY_IMPACT = "QualityImpact";
|
---|
| 43 |
|
---|
| 44 | private string[] results;
|
---|
[2259] | 45 | private string[] inputVariables;
|
---|
| 46 | private HeuristicLab.CEDMA.Server.Problem problem;
|
---|
| 47 |
|
---|
| 48 |
|
---|
| 49 | public Importer(HeuristicLab.CEDMA.Server.Problem problem) {
|
---|
| 50 | this.problem = problem;
|
---|
| 51 | }
|
---|
| 52 |
|
---|
| 53 | public void Import(string fileName, string dirName) {
|
---|
[2272] | 54 | string outputFileName = Path.Combine(dirName, Path.GetFileNameWithoutExtension(fileName) + ".sdf");
|
---|
[2259] | 55 | string connectionString = @"Data Source=" + outputFileName;
|
---|
| 56 |
|
---|
[2272] | 57 | var database = new HeuristicLab.Modeling.Database.SQLServerCompact.DatabaseService(connectionString);
|
---|
| 58 | IProblem p = database.GetOrCreateProblem(problem.Dataset);
|
---|
[2259] | 59 | using (StreamReader reader = File.OpenText(fileName)) {
|
---|
[2272] | 60 | ReadResultsAndInputVariables(reader, database);
|
---|
[2263] | 61 | reader.ReadLine();
|
---|
[2260] | 62 | ImportAllModels(dirName, reader, database);
|
---|
[2259] | 63 | }
|
---|
| 64 | }
|
---|
| 65 |
|
---|
[2272] | 66 | private void ReadResultsAndInputVariables(StreamReader reader, IModelingDatabase database) {
|
---|
| 67 | string[] columns = reader.ReadLine().Split(';').Select(x=>x.Trim()).ToArray();
|
---|
| 68 | results = new string[columns.Length];
|
---|
| 69 | inputVariables = new string[columns.Length];
|
---|
[2259] | 70 | for (int i = RESULTS_IDX; i < columns.Length; i++) {
|
---|
| 71 | string resultColumn = columns[i].Trim();
|
---|
[2272] | 72 | if (resultColumn.Contains(" ")) {
|
---|
| 73 | string[] tokens = resultColumn.Split(' ');
|
---|
| 74 | string variableName = tokens[1].Trim(' ','(',')');
|
---|
[2259] | 75 | string variableResultName = tokens[0].Trim();
|
---|
| 76 | inputVariables[i] = variableName;
|
---|
[2272] | 77 | results[i] = variableResultName;
|
---|
[2259] | 78 | } else {
|
---|
| 79 | // normal result value
|
---|
[2272] | 80 | results[i] = resultColumn;
|
---|
[2259] | 81 | }
|
---|
| 82 | }
|
---|
| 83 | }
|
---|
| 84 |
|
---|
[2272] | 85 | private void ImportAllModels(string dirName, StreamReader reader, IModelingDatabase database) {
|
---|
[2259] | 86 | while (!reader.EndOfStream) {
|
---|
[2272] | 87 | string[] modelData = reader.ReadLine().Split(';').Select(x => x.Trim()).ToArray();
|
---|
[2259] | 88 | int id = int.Parse(modelData[ID_COLUMN]);
|
---|
[2260] | 89 | string targetVariableName = modelData[TARGETVARIABLE_COLUMN].Trim();
|
---|
| 90 | string algoName = modelData[ALGORITHM_COLUMN].Trim();
|
---|
[2263] | 91 | try {
|
---|
[2272] | 92 | HeuristicLab.Modeling.Model model = new HeuristicLab.Modeling.Model();
|
---|
| 93 | model.TargetVariable = targetVariableName;
|
---|
| 94 | model.Dataset = problem.Dataset;
|
---|
[2263] | 95 | model.TrainingSamplesStart = problem.TrainingSamplesStart;
|
---|
| 96 | model.TrainingSamplesEnd = problem.TrainingSamplesEnd;
|
---|
| 97 | model.ValidationSamplesStart = problem.ValidationSamplesStart;
|
---|
| 98 | model.ValidationSamplesEnd = problem.ValidationSamplesEnd;
|
---|
| 99 | model.TestSamplesStart = problem.TestSamplesStart;
|
---|
| 100 | model.TestSamplesEnd = problem.TestSamplesEnd;
|
---|
[2259] | 101 |
|
---|
| 102 |
|
---|
[2272] | 103 | model.Data = ParseModel(dirName, modelData[FILENAME_COLUMN].Trim(), algoName);
|
---|
| 104 |
|
---|
| 105 | SetModelResults(model, modelData);
|
---|
| 106 | SetInputVariableResults(model, modelData);
|
---|
| 107 |
|
---|
| 108 | database.Persist(model, algoName, null);
|
---|
[2263] | 109 | }
|
---|
| 110 | catch (Exception ex) {
|
---|
| 111 | }
|
---|
[2259] | 112 | }
|
---|
| 113 | }
|
---|
| 114 |
|
---|
[2272] | 115 | private void SetInputVariableResults(HeuristicLab.Modeling.Model model, string[] modelData) {
|
---|
| 116 | for (int i = VARIABLE_IMPACTS; i < modelData.Length; i++) {
|
---|
| 117 | if (!string.IsNullOrEmpty(modelData[i])) {
|
---|
| 118 | model.AddInputVariables(inputVariables[i]);
|
---|
| 119 | if (results[i] == EVALUATION_IMPACT) {
|
---|
| 120 | model.SetVariableEvaluationImpact(inputVariables[i], double.Parse(modelData[i]));
|
---|
| 121 | } else if (results[i] == QUALITY_IMPACT) {
|
---|
| 122 | model.SetVariableQualityImpact(inputVariables[i], double.Parse(modelData[i]));
|
---|
| 123 | } else throw new FormatException();
|
---|
| 124 | }
|
---|
| 125 | }
|
---|
[2259] | 126 | }
|
---|
| 127 |
|
---|
[2272] | 128 | private void SetModelResults(HeuristicLab.Modeling.Model model, string[] modelData) {
|
---|
| 129 | model.TrainingMeanSquaredError = double.Parse(modelData[TRAINING_MSE]);
|
---|
| 130 | model.ValidationMeanSquaredError = double.Parse(modelData[VALIDATION_MSE]);
|
---|
| 131 | model.TestMeanSquaredError = double.Parse(modelData[TEST_MSE]);
|
---|
| 132 |
|
---|
| 133 | model.TrainingCoefficientOfDetermination = double.Parse(modelData[TRAINING_R2]);
|
---|
| 134 | model.ValidationCoefficientOfDetermination = double.Parse(modelData[VALIDATION_R2]);
|
---|
| 135 | model.TestCoefficientOfDetermination = double.Parse(modelData[TEST_R2]);
|
---|
| 136 |
|
---|
| 137 | model.TrainingMeanAbsolutePercentageError = double.Parse(modelData[TRAINING_MAPE]);
|
---|
| 138 | model.ValidationCoefficientOfDetermination = double.Parse(modelData[VALIDATION_MAPE]);
|
---|
| 139 | model.TestCoefficientOfDetermination = double.Parse(modelData[TEST_MAPE]);
|
---|
| 140 |
|
---|
| 141 | model.TrainingMeanAbsolutePercentageOfRangeError = double.Parse(modelData[TRAINING_MAPRE]);
|
---|
| 142 | model.ValidationMeanAbsolutePercentageOfRangeError = double.Parse(modelData[VALIDATION_MAPRE]);
|
---|
| 143 | model.TestMeanAbsolutePercentageOfRangeError = double.Parse(modelData[TEST_MAPRE]);
|
---|
| 144 |
|
---|
| 145 | model.TrainingVarianceAccountedFor = double.Parse(modelData[TRAINING_VAF]);
|
---|
| 146 | model.ValidationVarianceAccountedFor = double.Parse(modelData[VALIDATION_VAF]);
|
---|
| 147 | model.TestVarianceAccountedFor = double.Parse(modelData[TEST_VAF]);
|
---|
[2259] | 148 | }
|
---|
[2260] | 149 |
|
---|
| 150 | private HeuristicLab.Core.IItem ParseModel(string dirName, string modelFileName, string algoName) {
|
---|
[2263] | 151 | foreach (char c in Path.GetInvalidFileNameChars()) {
|
---|
| 152 | modelFileName = modelFileName.Replace(c, '_');
|
---|
| 153 | }
|
---|
[2260] | 154 | if (algoName == "SupportVectorRegression") {
|
---|
| 155 | HeuristicLab.Data.SVMModel model = new HeuristicLab.Data.SVMModel();
|
---|
| 156 | model.Model = SVM.Model.Read(Path.Combine(dirName, modelFileName) + ".svm.model.txt");
|
---|
| 157 | model.RangeTransform = SVM.RangeTransform.Read(Path.Combine(dirName, modelFileName) + ".svm.transform.txt");
|
---|
| 158 | return model;
|
---|
| 159 | } else {
|
---|
[2263] | 160 | SymbolicExpressionImporter sexpImporter = new SymbolicExpressionImporter();
|
---|
[2260] | 161 | GeneticProgrammingModel model = new GeneticProgrammingModel();
|
---|
[2265] | 162 | using (StreamReader reader = File.OpenText(Path.Combine(dirName, modelFileName) + ".gp.txt")) {
|
---|
| 163 | model.FunctionTree = sexpImporter.Import(reader);
|
---|
| 164 | }
|
---|
[2260] | 165 | return model;
|
---|
| 166 | }
|
---|
| 167 | }
|
---|
[2259] | 168 | }
|
---|
| 169 | }
|
---|