Free cookie consent management tool by TermsFeed Policy Generator

source: branches/3.2/tools/CedmaImporter/Importer.cs @ 13397

Last change on this file since 13397 was 2312, checked in by gkronber, 15 years ago

Added CedmaExporter from #715 branch.

File size: 7.8 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.IO;
6using HeuristicLab.Modeling.Database;
7using HeuristicLab.GP;
8using HeuristicLab.GP.Interfaces;
9using HeuristicLab.GP.StructureIdentification;
10using System.Diagnostics;
11using HeuristicLab.Modeling;
12
13namespace CedmaImporter {
14  public class Importer {
15
16    private const int ID_COLUMN = 0;
17    private const int FILENAME_COLUMN = 1;
18    private const int TARGETVARIABLE_COLUMN = 2;
19    private const int ALGORITHM_COLUMN = 3;
20    private const int RESULTS_IDX = 4;
21    private const int TRAINING_MSE = 4;
22    private const int VALIDATION_MSE = TRAINING_MSE + 1;
23    private const int TEST_MSE = TRAINING_MSE + 2;
24
25    private const int TRAINING_R2 = 7;
26    private const int VALIDATION_R2 = TRAINING_R2 + 1;
27    private const int TEST_R2 = TRAINING_R2 + 2;
28
29    private const int TRAINING_MAPE = 10;
30    private const int VALIDATION_MAPE = TRAINING_MAPE + 1;
31    private const int TEST_MAPE = TRAINING_MAPE + 2;
32
33    private const int TRAINING_MAPRE = 13;
34    private const int VALIDATION_MAPRE = TRAINING_MAPRE + 1;
35    private const int TEST_MAPRE = TRAINING_MAPRE + 2;
36
37    private const int TRAINING_VAF = 16;
38    private const int VALIDATION_VAF = TRAINING_VAF + 1;
39    private const int TEST_VAF = TRAINING_VAF + 2;
40
41    private const int VARIABLE_IMPACTS = 19;
42    private const string EVALUATION_IMPACT = "EvaluationImpact";
43    private const string QUALITY_IMPACT = "QualityImpact";
44
45    private string[] results;
46    private string[] inputVariables;
47    private HeuristicLab.CEDMA.Server.Problem problem;
48
49
50    public Importer(HeuristicLab.CEDMA.Server.Problem problem) {
51      this.problem = problem;
52    }
53
54    public void Import(string fileName, string dirName) {
55      string outputFileName = Path.Combine(dirName, Path.GetFileNameWithoutExtension(fileName) + ".sdf");
56      string connectionString = @"Data Source=" + outputFileName;
57
58      var database = new HeuristicLab.Modeling.Database.SQLServerCompact.DatabaseService(connectionString);
59      IProblem p = database.GetOrCreateProblem(problem.Dataset);
60      using (StreamReader reader = File.OpenText(fileName)) {
61        ReadResultsAndInputVariables(reader, database);
62        reader.ReadLine();
63        ImportAllModels(dirName, reader, database);
64      }
65      database.Disconnect();
66    }
67
68    private void ReadResultsAndInputVariables(StreamReader reader, IModelingDatabase database) {
69      string[] columns = reader.ReadLine().Split(';').Select(x=>x.Trim()).ToArray();
70      results = new string[columns.Length];
71      inputVariables = new string[columns.Length];
72      for (int i = RESULTS_IDX; i < columns.Length; i++) {
73        string resultColumn = columns[i].Trim();
74        if (resultColumn.Contains(" ")) {
75          string[] tokens = resultColumn.Split(' ');
76          string variableName = tokens[1].Trim(' ','(',')');
77          string variableResultName = tokens[0].Trim();
78          inputVariables[i] = variableName;
79          results[i] = variableResultName;
80        } else {
81          // normal result value
82          results[i] = resultColumn;
83        }
84      }
85    }
86
87    private void ImportAllModels(string dirName, StreamReader reader, IModelingDatabase database) {
88      while (!reader.EndOfStream) {
89        string[] modelData = reader.ReadLine().Split(';','\t').Select(x => x.Trim()).ToArray();
90        int id = int.Parse(modelData[ID_COLUMN]);
91        string targetVariableName = modelData[TARGETVARIABLE_COLUMN].Trim();
92        string algoName = modelData[ALGORITHM_COLUMN].Trim();
93        try {
94          HeuristicLab.Modeling.IAnalyzerModel model = new AnalyzerModel();
95          model.TargetVariable = targetVariableName;
96          model.Dataset = problem.Dataset;
97          model.TrainingSamplesStart = problem.TrainingSamplesStart;
98          model.TrainingSamplesEnd = problem.TrainingSamplesEnd;
99          model.ValidationSamplesStart = problem.ValidationSamplesStart;
100          model.ValidationSamplesEnd = problem.ValidationSamplesEnd;
101          model.TestSamplesStart = problem.TestSamplesStart;
102          model.TestSamplesEnd = problem.TestSamplesEnd;
103
104          SetModelResults(model, modelData);
105          SetInputVariableResults(model, modelData);
106
107          model.Predictor = CreatePredictor(targetVariableName, dirName, modelData[FILENAME_COLUMN].Trim(), algoName);
108          database.Persist(model, algoName, null);
109        }
110        catch (Exception ex) {
111        }
112      }
113    }
114
115    private void SetInputVariableResults(HeuristicLab.Modeling.IAnalyzerModel model, string[] modelData) {
116      for (int i = VARIABLE_IMPACTS; i < modelData.Length; i++) {
117        if (!string.IsNullOrEmpty(modelData[i])) {
118          model.AddInputVariable(inputVariables[i]);
119          if (results[i] == EVALUATION_IMPACT) {
120            model.SetVariableEvaluationImpact(inputVariables[i], double.Parse(modelData[i]));
121          } else if (results[i] == QUALITY_IMPACT) {
122            model.SetVariableQualityImpact(inputVariables[i], double.Parse(modelData[i]));
123          } else throw new FormatException();
124        }
125      }
126    }
127
128    private void SetModelResults(HeuristicLab.Modeling.IAnalyzerModel model, string[] modelData) {
129      model.TrainingMeanSquaredError = double.Parse(modelData[TRAINING_MSE]);
130      model.ValidationMeanSquaredError = double.Parse(modelData[VALIDATION_MSE]);
131      model.TestMeanSquaredError = double.Parse(modelData[TEST_MSE]);
132
133      model.TrainingCoefficientOfDetermination = double.Parse(modelData[TRAINING_R2]);
134      model.ValidationCoefficientOfDetermination = double.Parse(modelData[VALIDATION_R2]);
135      model.TestCoefficientOfDetermination = double.Parse(modelData[TEST_R2]);
136
137      model.TrainingMeanAbsolutePercentageError = double.Parse(modelData[TRAINING_MAPE]);
138      model.ValidationMeanAbsolutePercentageError = double.Parse(modelData[VALIDATION_MAPE]);
139      model.TestMeanAbsolutePercentageError = double.Parse(modelData[TEST_MAPE]);
140
141      model.TrainingMeanAbsolutePercentageOfRangeError = double.Parse(modelData[TRAINING_MAPRE]);
142      model.ValidationMeanAbsolutePercentageOfRangeError = double.Parse(modelData[VALIDATION_MAPRE]);
143      model.TestMeanAbsolutePercentageOfRangeError = double.Parse(modelData[TEST_MAPRE]);
144
145      model.TrainingVarianceAccountedFor = double.Parse(modelData[TRAINING_VAF]);
146      model.ValidationVarianceAccountedFor = double.Parse(modelData[VALIDATION_VAF]);
147      model.TestVarianceAccountedFor = double.Parse(modelData[TEST_VAF]);
148    }
149
150    private HeuristicLab.Modeling.IPredictor CreatePredictor(string targetVariable, string dirName, string modelFileName, string algoName) {
151      foreach (char c in Path.GetInvalidFileNameChars()) {
152        modelFileName = modelFileName.Replace(c, '_');
153      }
154      if (algoName == "SupportVectorRegression") {
155        //HeuristicLab.SupportVectorMachines.SVMModel model = new HeuristicLab.SupportVectorMachines.SVMModel();
156        //model.Model = SVM.Model.Read(Path.Combine(dirName, modelFileName) + ".svm.model.txt");
157        //model.RangeTransform = SVM.RangeTransform.Read(Path.Combine(dirName, modelFileName) + ".svm.transform.txt");
158        //return new HeuristicLab.SupportVectorMachines.Predictor(model, targetVariable);
159        throw new FormatException();
160      } else {
161        SymbolicExpressionImporter sexpImporter = new SymbolicExpressionImporter();
162        GeneticProgrammingModel model = new GeneticProgrammingModel();
163        using (StreamReader reader = File.OpenText(Path.Combine(dirName, modelFileName) + ".gp.txt")) {
164          model.FunctionTree = sexpImporter.Import(reader);
165        }
166        return new HeuristicLab.GP.StructureIdentification.Predictor(new HL2TreeEvaluator(), model);
167      }
168    }
169  }
170}
Note: See TracBrowser for help on using the repository browser.