source: trunk/tools/CedmaImporter/Importer.cs @ 2272

Last change on this file since 2272 was 2272, checked in by gkronber, 12 years ago

Worked on persistence of models into the DB. #719 (CEDMA Importer)

File size: 7.4 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.IO;
6using HeuristicLab.Modeling.Database;
7using HeuristicLab.GP;
8using HeuristicLab.GP.Interfaces;
9using HeuristicLab.GP.StructureIdentification;
10using System.Diagnostics;
11
12namespace CedmaImporter {
13  public class Importer {
14
15    private const int ID_COLUMN = 0;
16    private const int FILENAME_COLUMN = 1;
17    private const int TARGETVARIABLE_COLUMN = 2;
18    private const int ALGORITHM_COLUMN = 3;
19    private const int RESULTS_IDX = 4;
20    private const int TRAINING_MSE = 4;
21    private const int VALIDATION_MSE = TRAINING_MSE + 1;
22    private const int TEST_MSE = TRAINING_MSE + 2;
23
24    private const int TRAINING_R2 = 7;
25    private const int VALIDATION_R2 = TRAINING_R2 + 1;
26    private const int TEST_R2 = TRAINING_R2 + 2;
27
28    private const int TRAINING_MAPE = 10;
29    private const int VALIDATION_MAPE = TRAINING_MAPE + 1;
30    private const int TEST_MAPE = TRAINING_MAPE + 2;
31
32    private const int TRAINING_MAPRE = 13;
33    private const int VALIDATION_MAPRE = TRAINING_MAPRE + 1;
34    private const int TEST_MAPRE = TRAINING_MAPRE + 2;
35
36    private const int TRAINING_VAF = 16;
37    private const int VALIDATION_VAF = TRAINING_VAF + 1;
38    private const int TEST_VAF = TRAINING_VAF + 2;
39
40    private const int VARIABLE_IMPACTS = 19;
41    private const string EVALUATION_IMPACT = "EvaluationImpact";
42    private const string QUALITY_IMPACT = "QualityImpact";
43
44    private string[] results;
45    private string[] inputVariables;
46    private HeuristicLab.CEDMA.Server.Problem problem;
47
48
49    public Importer(HeuristicLab.CEDMA.Server.Problem problem) {
50      this.problem = problem;
51    }
52
53    public void Import(string fileName, string dirName) {
54      string outputFileName = Path.Combine(dirName, Path.GetFileNameWithoutExtension(fileName) + ".sdf");
55      string connectionString = @"Data Source=" + outputFileName;
56
57      var database = new HeuristicLab.Modeling.Database.SQLServerCompact.DatabaseService(connectionString);
58      IProblem p = database.GetOrCreateProblem(problem.Dataset);
59      using (StreamReader reader = File.OpenText(fileName)) {
60        ReadResultsAndInputVariables(reader, database);
61        reader.ReadLine();
62        ImportAllModels(dirName, reader, database);
63      }
64    }
65
66    private void ReadResultsAndInputVariables(StreamReader reader, IModelingDatabase database) {
67      string[] columns = reader.ReadLine().Split(';').Select(x=>x.Trim()).ToArray();
68      results = new string[columns.Length];
69      inputVariables = new string[columns.Length];
70      for (int i = RESULTS_IDX; i < columns.Length; i++) {
71        string resultColumn = columns[i].Trim();
72        if (resultColumn.Contains(" ")) {
73          string[] tokens = resultColumn.Split(' ');
74          string variableName = tokens[1].Trim(' ','(',')');
75          string variableResultName = tokens[0].Trim();
76          inputVariables[i] = variableName;
77          results[i] = variableResultName;
78        } else {
79          // normal result value
80          results[i] = resultColumn;
81        }
82      }
83    }
84
85    private void ImportAllModels(string dirName, StreamReader reader, IModelingDatabase database) {
86      while (!reader.EndOfStream) {
87        string[] modelData = reader.ReadLine().Split(';').Select(x => x.Trim()).ToArray();
88        int id = int.Parse(modelData[ID_COLUMN]);
89        string targetVariableName = modelData[TARGETVARIABLE_COLUMN].Trim();
90        string algoName = modelData[ALGORITHM_COLUMN].Trim();
91        try {
92          HeuristicLab.Modeling.Model model = new HeuristicLab.Modeling.Model();
93          model.TargetVariable = targetVariableName;
94          model.Dataset = problem.Dataset;
95          model.TrainingSamplesStart = problem.TrainingSamplesStart;
96          model.TrainingSamplesEnd = problem.TrainingSamplesEnd;
97          model.ValidationSamplesStart = problem.ValidationSamplesStart;
98          model.ValidationSamplesEnd = problem.ValidationSamplesEnd;
99          model.TestSamplesStart = problem.TestSamplesStart;
100          model.TestSamplesEnd = problem.TestSamplesEnd;
101
102
103          model.Data = ParseModel(dirName, modelData[FILENAME_COLUMN].Trim(), algoName);
104
105          SetModelResults(model, modelData);
106          SetInputVariableResults(model, modelData);
107
108          database.Persist(model, algoName, null);
109        }
110        catch (Exception ex) {
111        }
112      }
113    }
114
115    private void SetInputVariableResults(HeuristicLab.Modeling.Model model, string[] modelData) {
116      for (int i = VARIABLE_IMPACTS; i < modelData.Length; i++) {
117        if (!string.IsNullOrEmpty(modelData[i])) {
118          model.AddInputVariables(inputVariables[i]);
119          if (results[i] == EVALUATION_IMPACT) {
120            model.SetVariableEvaluationImpact(inputVariables[i], double.Parse(modelData[i]));
121          } else if (results[i] == QUALITY_IMPACT) {
122            model.SetVariableQualityImpact(inputVariables[i], double.Parse(modelData[i]));
123          } else throw new FormatException();
124        }
125      }
126    }
127
128    private void SetModelResults(HeuristicLab.Modeling.Model model, string[] modelData) {
129      model.TrainingMeanSquaredError = double.Parse(modelData[TRAINING_MSE]);
130      model.ValidationMeanSquaredError = double.Parse(modelData[VALIDATION_MSE]);
131      model.TestMeanSquaredError = double.Parse(modelData[TEST_MSE]);
132
133      model.TrainingCoefficientOfDetermination = double.Parse(modelData[TRAINING_R2]);
134      model.ValidationCoefficientOfDetermination = double.Parse(modelData[VALIDATION_R2]);
135      model.TestCoefficientOfDetermination = double.Parse(modelData[TEST_R2]);
136
137      model.TrainingMeanAbsolutePercentageError = double.Parse(modelData[TRAINING_MAPE]);
138      model.ValidationCoefficientOfDetermination = double.Parse(modelData[VALIDATION_MAPE]);
139      model.TestCoefficientOfDetermination = double.Parse(modelData[TEST_MAPE]);
140
141      model.TrainingMeanAbsolutePercentageOfRangeError = double.Parse(modelData[TRAINING_MAPRE]);
142      model.ValidationMeanAbsolutePercentageOfRangeError = double.Parse(modelData[VALIDATION_MAPRE]);
143      model.TestMeanAbsolutePercentageOfRangeError = double.Parse(modelData[TEST_MAPRE]);
144
145      model.TrainingVarianceAccountedFor = double.Parse(modelData[TRAINING_VAF]);
146      model.ValidationVarianceAccountedFor = double.Parse(modelData[VALIDATION_VAF]);
147      model.TestVarianceAccountedFor = double.Parse(modelData[TEST_VAF]);
148    }
149
150    private HeuristicLab.Core.IItem ParseModel(string dirName, string modelFileName, string algoName) {
151      foreach (char c in Path.GetInvalidFileNameChars()) {
152        modelFileName = modelFileName.Replace(c, '_');
153      }
154      if (algoName == "SupportVectorRegression") {
155        HeuristicLab.Data.SVMModel model = new HeuristicLab.Data.SVMModel();
156        model.Model = SVM.Model.Read(Path.Combine(dirName, modelFileName) + ".svm.model.txt");
157        model.RangeTransform = SVM.RangeTransform.Read(Path.Combine(dirName, modelFileName) + ".svm.transform.txt");
158        return model;
159      } else {
160        SymbolicExpressionImporter sexpImporter = new SymbolicExpressionImporter();
161        GeneticProgrammingModel model = new GeneticProgrammingModel();
162        using (StreamReader reader = File.OpenText(Path.Combine(dirName, modelFileName) + ".gp.txt")) {
163          model.FunctionTree = sexpImporter.Import(reader);
164        }
165        return model;
166      }
167    }
168  }
169}
Note: See TracBrowser for help on using the repository browser.