[2259] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
| 3 | using System.Linq;
|
---|
| 4 | using System.Text;
|
---|
| 5 | using System.IO;
|
---|
| 6 | using HeuristicLab.Modeling.Database;
|
---|
[2260] | 7 | using HeuristicLab.GP;
|
---|
| 8 | using HeuristicLab.GP.Interfaces;
|
---|
| 9 | using HeuristicLab.GP.StructureIdentification;
|
---|
| 10 | using System.Diagnostics;
|
---|
[2285] | 11 | using HeuristicLab.Modeling;
|
---|
[2259] | 12 |
|
---|
| 13 | namespace CedmaImporter {
|
---|
| 14 | public class Importer {
|
---|
| 15 |
|
---|
| 16 | private const int ID_COLUMN = 0;
|
---|
| 17 | private const int FILENAME_COLUMN = 1;
|
---|
| 18 | private const int TARGETVARIABLE_COLUMN = 2;
|
---|
| 19 | private const int ALGORITHM_COLUMN = 3;
|
---|
| 20 | private const int RESULTS_IDX = 4;
|
---|
[2272] | 21 | private const int TRAINING_MSE = 4;
|
---|
| 22 | private const int VALIDATION_MSE = TRAINING_MSE + 1;
|
---|
| 23 | private const int TEST_MSE = TRAINING_MSE + 2;
|
---|
| 24 |
|
---|
| 25 | private const int TRAINING_R2 = 7;
|
---|
| 26 | private const int VALIDATION_R2 = TRAINING_R2 + 1;
|
---|
| 27 | private const int TEST_R2 = TRAINING_R2 + 2;
|
---|
| 28 |
|
---|
| 29 | private const int TRAINING_MAPE = 10;
|
---|
| 30 | private const int VALIDATION_MAPE = TRAINING_MAPE + 1;
|
---|
| 31 | private const int TEST_MAPE = TRAINING_MAPE + 2;
|
---|
| 32 |
|
---|
| 33 | private const int TRAINING_MAPRE = 13;
|
---|
| 34 | private const int VALIDATION_MAPRE = TRAINING_MAPRE + 1;
|
---|
| 35 | private const int TEST_MAPRE = TRAINING_MAPRE + 2;
|
---|
| 36 |
|
---|
| 37 | private const int TRAINING_VAF = 16;
|
---|
| 38 | private const int VALIDATION_VAF = TRAINING_VAF + 1;
|
---|
| 39 | private const int TEST_VAF = TRAINING_VAF + 2;
|
---|
| 40 |
|
---|
| 41 | private const int VARIABLE_IMPACTS = 19;
|
---|
| 42 | private const string EVALUATION_IMPACT = "EvaluationImpact";
|
---|
| 43 | private const string QUALITY_IMPACT = "QualityImpact";
|
---|
| 44 |
|
---|
| 45 | private string[] results;
|
---|
[2259] | 46 | private string[] inputVariables;
|
---|
| 47 | private HeuristicLab.CEDMA.Server.Problem problem;
|
---|
| 48 |
|
---|
| 49 |
|
---|
| 50 | public Importer(HeuristicLab.CEDMA.Server.Problem problem) {
|
---|
| 51 | this.problem = problem;
|
---|
| 52 | }
|
---|
| 53 |
|
---|
| 54 | public void Import(string fileName, string dirName) {
|
---|
[2272] | 55 | string outputFileName = Path.Combine(dirName, Path.GetFileNameWithoutExtension(fileName) + ".sdf");
|
---|
[2259] | 56 | string connectionString = @"Data Source=" + outputFileName;
|
---|
| 57 |
|
---|
[2272] | 58 | var database = new HeuristicLab.Modeling.Database.SQLServerCompact.DatabaseService(connectionString);
|
---|
| 59 | IProblem p = database.GetOrCreateProblem(problem.Dataset);
|
---|
[2259] | 60 | using (StreamReader reader = File.OpenText(fileName)) {
|
---|
[2272] | 61 | ReadResultsAndInputVariables(reader, database);
|
---|
[2263] | 62 | reader.ReadLine();
|
---|
[2260] | 63 | ImportAllModels(dirName, reader, database);
|
---|
[2259] | 64 | }
|
---|
[2285] | 65 | database.Disconnect();
|
---|
[2259] | 66 | }
|
---|
| 67 |
|
---|
[2272] | 68 | private void ReadResultsAndInputVariables(StreamReader reader, IModelingDatabase database) {
|
---|
| 69 | string[] columns = reader.ReadLine().Split(';').Select(x=>x.Trim()).ToArray();
|
---|
| 70 | results = new string[columns.Length];
|
---|
| 71 | inputVariables = new string[columns.Length];
|
---|
[2259] | 72 | for (int i = RESULTS_IDX; i < columns.Length; i++) {
|
---|
| 73 | string resultColumn = columns[i].Trim();
|
---|
[2272] | 74 | if (resultColumn.Contains(" ")) {
|
---|
| 75 | string[] tokens = resultColumn.Split(' ');
|
---|
| 76 | string variableName = tokens[1].Trim(' ','(',')');
|
---|
[2259] | 77 | string variableResultName = tokens[0].Trim();
|
---|
| 78 | inputVariables[i] = variableName;
|
---|
[2272] | 79 | results[i] = variableResultName;
|
---|
[2259] | 80 | } else {
|
---|
| 81 | // normal result value
|
---|
[2272] | 82 | results[i] = resultColumn;
|
---|
[2259] | 83 | }
|
---|
| 84 | }
|
---|
| 85 | }
|
---|
| 86 |
|
---|
[2272] | 87 | private void ImportAllModels(string dirName, StreamReader reader, IModelingDatabase database) {
|
---|
[2259] | 88 | while (!reader.EndOfStream) {
|
---|
[2277] | 89 | string[] modelData = reader.ReadLine().Split(';','\t').Select(x => x.Trim()).ToArray();
|
---|
[2259] | 90 | int id = int.Parse(modelData[ID_COLUMN]);
|
---|
[2260] | 91 | string targetVariableName = modelData[TARGETVARIABLE_COLUMN].Trim();
|
---|
| 92 | string algoName = modelData[ALGORITHM_COLUMN].Trim();
|
---|
[2263] | 93 | try {
|
---|
[2285] | 94 | HeuristicLab.Modeling.IAnalyzerModel model = new AnalyzerModel();
|
---|
[2272] | 95 | model.TargetVariable = targetVariableName;
|
---|
| 96 | model.Dataset = problem.Dataset;
|
---|
[2263] | 97 | model.TrainingSamplesStart = problem.TrainingSamplesStart;
|
---|
| 98 | model.TrainingSamplesEnd = problem.TrainingSamplesEnd;
|
---|
| 99 | model.ValidationSamplesStart = problem.ValidationSamplesStart;
|
---|
| 100 | model.ValidationSamplesEnd = problem.ValidationSamplesEnd;
|
---|
| 101 | model.TestSamplesStart = problem.TestSamplesStart;
|
---|
| 102 | model.TestSamplesEnd = problem.TestSamplesEnd;
|
---|
[2259] | 103 |
|
---|
[2272] | 104 | SetModelResults(model, modelData);
|
---|
| 105 | SetInputVariableResults(model, modelData);
|
---|
| 106 |
|
---|
[2285] | 107 | model.Predictor = CreatePredictor(targetVariableName, dirName, modelData[FILENAME_COLUMN].Trim(), algoName);
|
---|
[2272] | 108 | database.Persist(model, algoName, null);
|
---|
[2263] | 109 | }
|
---|
| 110 | catch (Exception ex) {
|
---|
| 111 | }
|
---|
[2259] | 112 | }
|
---|
| 113 | }
|
---|
| 114 |
|
---|
[2285] | 115 | private void SetInputVariableResults(HeuristicLab.Modeling.IAnalyzerModel model, string[] modelData) {
|
---|
[2272] | 116 | for (int i = VARIABLE_IMPACTS; i < modelData.Length; i++) {
|
---|
| 117 | if (!string.IsNullOrEmpty(modelData[i])) {
|
---|
[2285] | 118 | model.AddInputVariable(inputVariables[i]);
|
---|
[2272] | 119 | if (results[i] == EVALUATION_IMPACT) {
|
---|
| 120 | model.SetVariableEvaluationImpact(inputVariables[i], double.Parse(modelData[i]));
|
---|
| 121 | } else if (results[i] == QUALITY_IMPACT) {
|
---|
| 122 | model.SetVariableQualityImpact(inputVariables[i], double.Parse(modelData[i]));
|
---|
| 123 | } else throw new FormatException();
|
---|
| 124 | }
|
---|
| 125 | }
|
---|
[2259] | 126 | }
|
---|
| 127 |
|
---|
[2285] | 128 | private void SetModelResults(HeuristicLab.Modeling.IAnalyzerModel model, string[] modelData) {
|
---|
[2272] | 129 | model.TrainingMeanSquaredError = double.Parse(modelData[TRAINING_MSE]);
|
---|
| 130 | model.ValidationMeanSquaredError = double.Parse(modelData[VALIDATION_MSE]);
|
---|
| 131 | model.TestMeanSquaredError = double.Parse(modelData[TEST_MSE]);
|
---|
| 132 |
|
---|
| 133 | model.TrainingCoefficientOfDetermination = double.Parse(modelData[TRAINING_R2]);
|
---|
| 134 | model.ValidationCoefficientOfDetermination = double.Parse(modelData[VALIDATION_R2]);
|
---|
| 135 | model.TestCoefficientOfDetermination = double.Parse(modelData[TEST_R2]);
|
---|
| 136 |
|
---|
| 137 | model.TrainingMeanAbsolutePercentageError = double.Parse(modelData[TRAINING_MAPE]);
|
---|
[2274] | 138 | model.ValidationMeanAbsolutePercentageError = double.Parse(modelData[VALIDATION_MAPE]);
|
---|
| 139 | model.TestMeanAbsolutePercentageError = double.Parse(modelData[TEST_MAPE]);
|
---|
[2272] | 140 |
|
---|
| 141 | model.TrainingMeanAbsolutePercentageOfRangeError = double.Parse(modelData[TRAINING_MAPRE]);
|
---|
| 142 | model.ValidationMeanAbsolutePercentageOfRangeError = double.Parse(modelData[VALIDATION_MAPRE]);
|
---|
| 143 | model.TestMeanAbsolutePercentageOfRangeError = double.Parse(modelData[TEST_MAPRE]);
|
---|
| 144 |
|
---|
| 145 | model.TrainingVarianceAccountedFor = double.Parse(modelData[TRAINING_VAF]);
|
---|
| 146 | model.ValidationVarianceAccountedFor = double.Parse(modelData[VALIDATION_VAF]);
|
---|
| 147 | model.TestVarianceAccountedFor = double.Parse(modelData[TEST_VAF]);
|
---|
[2259] | 148 | }
|
---|
[2260] | 149 |
|
---|
[2285] | 150 | private HeuristicLab.Modeling.IPredictor CreatePredictor(string targetVariable, string dirName, string modelFileName, string algoName) {
|
---|
[2263] | 151 | foreach (char c in Path.GetInvalidFileNameChars()) {
|
---|
| 152 | modelFileName = modelFileName.Replace(c, '_');
|
---|
| 153 | }
|
---|
[2260] | 154 | if (algoName == "SupportVectorRegression") {
|
---|
[2312] | 155 | //HeuristicLab.SupportVectorMachines.SVMModel model = new HeuristicLab.SupportVectorMachines.SVMModel();
|
---|
| 156 | //model.Model = SVM.Model.Read(Path.Combine(dirName, modelFileName) + ".svm.model.txt");
|
---|
| 157 | //model.RangeTransform = SVM.RangeTransform.Read(Path.Combine(dirName, modelFileName) + ".svm.transform.txt");
|
---|
| 158 | //return new HeuristicLab.SupportVectorMachines.Predictor(model, targetVariable);
|
---|
| 159 | throw new FormatException();
|
---|
[2260] | 160 | } else {
|
---|
[2263] | 161 | SymbolicExpressionImporter sexpImporter = new SymbolicExpressionImporter();
|
---|
[2260] | 162 | GeneticProgrammingModel model = new GeneticProgrammingModel();
|
---|
[2265] | 163 | using (StreamReader reader = File.OpenText(Path.Combine(dirName, modelFileName) + ".gp.txt")) {
|
---|
| 164 | model.FunctionTree = sexpImporter.Import(reader);
|
---|
| 165 | }
|
---|
[2285] | 166 | return new HeuristicLab.GP.StructureIdentification.Predictor(new HL2TreeEvaluator(), model);
|
---|
[2260] | 167 | }
|
---|
| 168 | }
|
---|
[2259] | 169 | }
|
---|
| 170 | }
|
---|