Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/tools/CedmaImporter/Importer.cs @ 2261

Last change on this file since 2261 was 2260, checked in by gkronber, 15 years ago

Worked on CEDMA importer. #719

File size: 10.5 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.IO;
6using HeuristicLab.Modeling.Database;
7using HeuristicLab.Modeling.Database.SQLServerCompact;
8using HeuristicLab.GP;
9using HeuristicLab.GP.Interfaces;
10using HeuristicLab.GP.StructureIdentification;
11using System.Diagnostics;
12
13namespace CedmaImporter {
14  public class Importer {
15
16    private const int ID_COLUMN = 0;
17    private const int FILENAME_COLUMN = 1;
18    private const int TARGETVARIABLE_COLUMN = 2;
19    private const int ALGORITHM_COLUMN = 3;
20    private const int RESULTS_IDX = 4;
21    private Result[] results;
22    private string[] inputVariables;
23
24    private HeuristicLab.CEDMA.Server.Problem problem;
25
26
27    public Importer(HeuristicLab.CEDMA.Server.Problem problem) {
28      this.problem = problem;
29    }
30
31    public void Import(string fileName, string dirName) {
32      string outputFileName = Path.GetFileNameWithoutExtension(fileName) + ".sdf";
33      string connectionString = @"Data Source=" + outputFileName;
34
35      DatabaseService database = new DatabaseService(connectionString);
36      Problem p = database.GetOrCreateProblem(problem.Dataset);
37      using (StreamReader reader = File.OpenText(fileName)) {
38        ReadResultsAndInputVariables(reader);
39        ImportAllModels(dirName, reader, database);
40      }
41    }
42
43    private void ReadResultsAndInputVariables(StreamReader reader) {
44      string[] columns = reader.ReadLine().Split(';');
45      results = Enumerable.Repeat<Result>(null, columns.Length).ToArray();
46      inputVariables = Enumerable.Repeat<string>(null, columns.Length).ToArray();
47      for (int i = RESULTS_IDX; i < columns.Length; i++) {
48        string resultColumn = columns[i].Trim();
49        if (resultColumn.Contains(":")) {
50          string[] tokens = resultColumn.Split(':');
51          string variableName = tokens[1].Trim();
52          string variableResultName = tokens[0].Trim();
53          inputVariables[i] = variableName;
54          results[i] = new Result(variableResultName);
55        } else {
56          // normal result value
57          results[i] = new Result(resultColumn);
58        }
59      }
60    }
61
62    private void ImportAllModels(string dirName, StreamReader reader, DatabaseService database) {
63      while (!reader.EndOfStream) {
64        string modelLine = reader.ReadLine();
65        string[] modelData = modelLine.Split(';');
66        int id = int.Parse(modelData[ID_COLUMN]);
67        string targetVariableName = modelData[TARGETVARIABLE_COLUMN].Trim();
68        string algoName = modelData[ALGORITHM_COLUMN].Trim();
69        HeuristicLab.Core.IItem modelItem = ParseModel(dirName, modelData[FILENAME_COLUMN].Trim(), algoName);
70        HeuristicLab.Modeling.Database.SQLServerCompact.Variable targetVariable = new HeuristicLab.Modeling.Database.SQLServerCompact.Variable(targetVariableName);
71        Algorithm algorithm = new Algorithm(algoName);
72        Model model = new Model(targetVariable, algorithm);
73        model.TrainingSamplesStart = problem.TrainingSamplesStart;
74        model.TrainingSamplesEnd = problem.TrainingSamplesEnd;
75        model.ValidationSamplesStart = problem.ValidationSamplesStart;
76        model.ValidationSamplesEnd = problem.ValidationSamplesEnd;
77        model.TestSamplesStart = problem.TestSamplesStart;
78        model.TestSamplesEnd = problem.TestSamplesEnd;
79
80        IEnumerable<ModelResult> qualityModelResults = GetModelResults(model, modelData);
81        IEnumerable<InputVariableResult> inputVariableResults = GetInputVariableResults(model, modelData);
82
83        // TODO
84        //database.Persist(model);
85        //foreach (ModelResult modelResult in qualityModelResults)
86        //  database.Persist(modelResult);
87        //foreach (InputVariableResult inputVariableResult in inputVariableResults)
88        //  database.Persist(inputVariableResult);
89
90      }
91    }
92
93    private IEnumerable<InputVariableResult> GetInputVariableResults(Model model, string[] modelData) {
94      double temp;
95      return from i in Enumerable.Range(0, inputVariables.Count())
96             where inputVariables[i] != null && results[i] != null && double.TryParse(modelData[i], out temp)
97             select new InputVariableResult(new InputVariable(model, new HeuristicLab.Modeling.Database.SQLServerCompact.Variable(inputVariables[i])), results[i], double.Parse(modelData[i]));
98    }
99
100    private IEnumerable<ModelResult> GetModelResults(Model model, string[] modelData) {
101      return from i in Enumerable.Range(0, results.Count())
102             where results[i] != null
103             select new ModelResult(model, results[i], double.Parse(modelData[i]));
104    }
105
106    Dictionary<string, IFunction> knownFunctions = new Dictionary<string, IFunction>() {
107    {"+", new Addition()},
108    {"and", new And()},
109    {"mean", new Average()},
110    {"cos", new Cosinus()},
111    {"/", new Division()},
112    {"equ", new Equal()},
113    {"exp", new Exponential()},
114    {">", new GreaterThan()},
115    {"if", new IfThenElse()},
116    {"<", new LessThan()},
117    {"log", new Logarithm()},
118    {"*", new Multiplication()},
119    {"not", new Not()},
120    {"or", new Or()},
121    {"expt", new Power()},
122    {"sign", new Signum()},
123    {"sin",new Sinus()},
124    {"sqrt", new Sqrt()},
125    {"-", new Subtraction()},
126    {"tan", new Tangens()},
127    {"xor", new Xor()}
128    };
129    Constant constant = new Constant();
130    HeuristicLab.GP.StructureIdentification.Variable variable = new HeuristicLab.GP.StructureIdentification.Variable();
131    Differential differential = new Differential();
132
133    private HeuristicLab.Core.IItem ParseModel(string dirName, string modelFileName, string algoName) {
134      if (algoName == "SupportVectorRegression") {
135        HeuristicLab.Data.SVMModel model = new HeuristicLab.Data.SVMModel();
136        model.Model = SVM.Model.Read(Path.Combine(dirName, modelFileName) + ".svm.model.txt");
137        model.RangeTransform = SVM.RangeTransform.Read(Path.Combine(dirName, modelFileName) + ".svm.transform.txt");
138        return model;
139      } else {
140        GeneticProgrammingModel model = new GeneticProgrammingModel();
141        IEnumerable<Token> tokens = GetTokenStream(File.OpenText(Path.Combine(dirName, modelFileName) + ".gp.txt"));
142        model.FunctionTree = ParseSexp(new Queue<Token>(tokens));
143        return model;
144      }
145    }
146
147    private IEnumerable<Token> GetTokenStream(StreamReader reader) {
148      return from line in GetLineStream(reader)
149             let strTokens = line.Split(new string[] { " ", "\t", Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries).AsEnumerable()
150             from strToken in strTokens
151             let t = Token.Parse(strToken)
152             where t != null
153             select t;
154    }
155
156    private IEnumerable<string> GetLineStream(StreamReader reader) {
157      while (!reader.EndOfStream) yield return reader.ReadLine().Replace("(", " ( ").Replace(")", " ) ");
158      yield break;
159    }
160
161    private HeuristicLab.GP.Interfaces.IFunctionTree ParseSexp(Queue<Token> tokens) {
162      Expect(Token.LPAR, tokens);
163
164      if (tokens.Peek().Symbol == TokenSymbol.SYMB) {
165        if (tokens.Peek().StringValue.Equals("variable")) {
166          return ParseVariable(tokens);
167        } else if (tokens.Peek().StringValue.Equals("differential")) {
168          return ParseDifferential(tokens);
169        } else {
170          Token curToken = tokens.Dequeue();
171          IFunctionTree tree = CreateTree(curToken);
172          while (!tokens.Peek().Equals(Token.RPAR)) {
173            tree.AddSubTree(ParseSexp(tokens));
174          }
175          Expect(Token.RPAR, tokens);
176          return tree;
177        }
178      } else if (tokens.Peek().Symbol == TokenSymbol.NUMBER) {
179        ConstantFunctionTree t = (ConstantFunctionTree)constant.GetTreeNode();
180        t.Value = tokens.Dequeue().DoubleValue;
181        return t;
182      } else {
183        throw new FormatException("Expected function or constant symbol");
184      }
185    }
186
187    private IFunctionTree ParseDifferential(Queue<Token> tokens) {
188      Debug.Assert(tokens.Dequeue().StringValue == "differential");
189      VariableFunctionTree t = (VariableFunctionTree)differential.GetTreeNode();
190      t.Weight = tokens.Dequeue().DoubleValue;
191      t.VariableName = tokens.Dequeue().StringValue;
192      t.SampleOffset = (int)tokens.Dequeue().DoubleValue;
193      Expect(Token.RPAR, tokens);
194      return t;
195    }
196
197    private IFunctionTree ParseVariable(Queue<Token> tokens) {
198      Debug.Assert(tokens.Dequeue().StringValue == "variable");
199      VariableFunctionTree t = (VariableFunctionTree)variable.GetTreeNode();
200      t.Weight = tokens.Dequeue().DoubleValue;
201      t.VariableName = tokens.Dequeue().StringValue;
202      t.SampleOffset = (int)tokens.Dequeue().DoubleValue;
203      Expect(Token.RPAR, tokens);
204      return t;
205    }
206
207    private IFunctionTree CreateTree(Token token) {
208      if (token.Symbol != TokenSymbol.SYMB) throw new FormatException("Expected function symbol, but got: " + token.StringValue);
209      return knownFunctions[token.StringValue].GetTreeNode();
210    }
211
212    private void Expect(Token token, Queue<Token> tokens) {
213      Token cur = tokens.Dequeue();
214      if (!token.Equals(cur)) throw new FormatException("Expected: " + token.StringValue + ", but got found: " + cur.StringValue);
215    }
216
217    private enum TokenSymbol { LPAR, RPAR, SYMB, NUMBER };
218    private class Token {
219      public static readonly Token LPAR = Token.Parse("(");
220      public static readonly Token RPAR = Token.Parse(")");
221
222      public TokenSymbol Symbol { get; set; }
223      public string StringValue { get; set; }
224      public double DoubleValue { get; set; }
225      public Token() { }
226
227      public override bool Equals(object obj) {
228        Token other = (obj as Token);
229        if (other == null) return false;
230        if (other.Symbol != Symbol) return false;
231        return other.StringValue == this.StringValue;
232      }
233
234      public static Token Parse(string strToken) {
235        strToken = strToken.Trim();
236        Token t = new Token();
237        t.StringValue = strToken.Trim();
238        double temp;
239        if (strToken == "") {
240          t = null;
241        } else if (strToken == "(") {
242          t.Symbol = TokenSymbol.LPAR;
243        } else if (strToken == ")") {
244          t.Symbol = TokenSymbol.RPAR;
245        } else if (double.TryParse(strToken, out temp)) {
246          t.Symbol = TokenSymbol.NUMBER;
247          t.DoubleValue = double.Parse(strToken);
248        } else {
249          t.Symbol = TokenSymbol.SYMB;
250        }
251        return t;
252      }
253    }
254  }
255}
Note: See TracBrowser for help on using the repository browser.