1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Linq;
|
---|
4 | using System.Text;
|
---|
5 | using System.IO;
|
---|
6 | using HeuristicLab.Modeling.Database;
|
---|
7 | using HeuristicLab.Modeling.Database.SQLServerCompact;
|
---|
8 | using HeuristicLab.GP;
|
---|
9 | using HeuristicLab.GP.Interfaces;
|
---|
10 | using HeuristicLab.GP.StructureIdentification;
|
---|
11 | using System.Diagnostics;
|
---|
12 |
|
---|
13 | namespace CedmaImporter {
|
---|
14 | public class Importer {
|
---|
15 |
|
---|
16 | private const int ID_COLUMN = 0;
|
---|
17 | private const int FILENAME_COLUMN = 1;
|
---|
18 | private const int TARGETVARIABLE_COLUMN = 2;
|
---|
19 | private const int ALGORITHM_COLUMN = 3;
|
---|
20 | private const int RESULTS_IDX = 4;
|
---|
21 | private Result[] results;
|
---|
22 | private string[] inputVariables;
|
---|
23 |
|
---|
24 | private HeuristicLab.CEDMA.Server.Problem problem;
|
---|
25 |
|
---|
26 |
|
---|
27 | public Importer(HeuristicLab.CEDMA.Server.Problem problem) {
|
---|
28 | this.problem = problem;
|
---|
29 | }
|
---|
30 |
|
---|
31 | public void Import(string fileName, string dirName) {
|
---|
32 | string outputFileName = Path.GetFileNameWithoutExtension(fileName) + ".sdf";
|
---|
33 | string connectionString = @"Data Source=" + outputFileName;
|
---|
34 |
|
---|
35 | DatabaseService database = new DatabaseService(connectionString);
|
---|
36 | Problem p = database.GetOrCreateProblem(problem.Dataset);
|
---|
37 | using (StreamReader reader = File.OpenText(fileName)) {
|
---|
38 | ReadResultsAndInputVariables(reader);
|
---|
39 | ImportAllModels(dirName, reader, database);
|
---|
40 | }
|
---|
41 | }
|
---|
42 |
|
---|
43 | private void ReadResultsAndInputVariables(StreamReader reader) {
|
---|
44 | string[] columns = reader.ReadLine().Split(';');
|
---|
45 | results = Enumerable.Repeat<Result>(null, columns.Length).ToArray();
|
---|
46 | inputVariables = Enumerable.Repeat<string>(null, columns.Length).ToArray();
|
---|
47 | for (int i = RESULTS_IDX; i < columns.Length; i++) {
|
---|
48 | string resultColumn = columns[i].Trim();
|
---|
49 | if (resultColumn.Contains(":")) {
|
---|
50 | string[] tokens = resultColumn.Split(':');
|
---|
51 | string variableName = tokens[1].Trim();
|
---|
52 | string variableResultName = tokens[0].Trim();
|
---|
53 | inputVariables[i] = variableName;
|
---|
54 | results[i] = new Result(variableResultName);
|
---|
55 | } else {
|
---|
56 | // normal result value
|
---|
57 | results[i] = new Result(resultColumn);
|
---|
58 | }
|
---|
59 | }
|
---|
60 | }
|
---|
61 |
|
---|
62 | private void ImportAllModels(string dirName, StreamReader reader, DatabaseService database) {
|
---|
63 | while (!reader.EndOfStream) {
|
---|
64 | string modelLine = reader.ReadLine();
|
---|
65 | string[] modelData = modelLine.Split(';');
|
---|
66 | int id = int.Parse(modelData[ID_COLUMN]);
|
---|
67 | string targetVariableName = modelData[TARGETVARIABLE_COLUMN].Trim();
|
---|
68 | string algoName = modelData[ALGORITHM_COLUMN].Trim();
|
---|
69 | HeuristicLab.Core.IItem modelItem = ParseModel(dirName, modelData[FILENAME_COLUMN].Trim(), algoName);
|
---|
70 | HeuristicLab.Modeling.Database.SQLServerCompact.Variable targetVariable = new HeuristicLab.Modeling.Database.SQLServerCompact.Variable(targetVariableName);
|
---|
71 | Algorithm algorithm = new Algorithm(algoName);
|
---|
72 | Model model = new Model(targetVariable, algorithm);
|
---|
73 | model.TrainingSamplesStart = problem.TrainingSamplesStart;
|
---|
74 | model.TrainingSamplesEnd = problem.TrainingSamplesEnd;
|
---|
75 | model.ValidationSamplesStart = problem.ValidationSamplesStart;
|
---|
76 | model.ValidationSamplesEnd = problem.ValidationSamplesEnd;
|
---|
77 | model.TestSamplesStart = problem.TestSamplesStart;
|
---|
78 | model.TestSamplesEnd = problem.TestSamplesEnd;
|
---|
79 |
|
---|
80 | IEnumerable<ModelResult> qualityModelResults = GetModelResults(model, modelData);
|
---|
81 | IEnumerable<InputVariableResult> inputVariableResults = GetInputVariableResults(model, modelData);
|
---|
82 |
|
---|
83 | // TODO
|
---|
84 | //database.Persist(model);
|
---|
85 | //foreach (ModelResult modelResult in qualityModelResults)
|
---|
86 | // database.Persist(modelResult);
|
---|
87 | //foreach (InputVariableResult inputVariableResult in inputVariableResults)
|
---|
88 | // database.Persist(inputVariableResult);
|
---|
89 |
|
---|
90 | }
|
---|
91 | }
|
---|
92 |
|
---|
93 | private IEnumerable<InputVariableResult> GetInputVariableResults(Model model, string[] modelData) {
|
---|
94 | double temp;
|
---|
95 | return from i in Enumerable.Range(0, inputVariables.Count())
|
---|
96 | where inputVariables[i] != null && results[i] != null && double.TryParse(modelData[i], out temp)
|
---|
97 | select new InputVariableResult(new InputVariable(model, new HeuristicLab.Modeling.Database.SQLServerCompact.Variable(inputVariables[i])), results[i], double.Parse(modelData[i]));
|
---|
98 | }
|
---|
99 |
|
---|
100 | private IEnumerable<ModelResult> GetModelResults(Model model, string[] modelData) {
|
---|
101 | return from i in Enumerable.Range(0, results.Count())
|
---|
102 | where results[i] != null
|
---|
103 | select new ModelResult(model, results[i], double.Parse(modelData[i]));
|
---|
104 | }
|
---|
105 |
|
---|
106 | Dictionary<string, IFunction> knownFunctions = new Dictionary<string, IFunction>() {
|
---|
107 | {"+", new Addition()},
|
---|
108 | {"and", new And()},
|
---|
109 | {"mean", new Average()},
|
---|
110 | {"cos", new Cosinus()},
|
---|
111 | {"/", new Division()},
|
---|
112 | {"equ", new Equal()},
|
---|
113 | {"exp", new Exponential()},
|
---|
114 | {">", new GreaterThan()},
|
---|
115 | {"if", new IfThenElse()},
|
---|
116 | {"<", new LessThan()},
|
---|
117 | {"log", new Logarithm()},
|
---|
118 | {"*", new Multiplication()},
|
---|
119 | {"not", new Not()},
|
---|
120 | {"or", new Or()},
|
---|
121 | {"expt", new Power()},
|
---|
122 | {"sign", new Signum()},
|
---|
123 | {"sin",new Sinus()},
|
---|
124 | {"sqrt", new Sqrt()},
|
---|
125 | {"-", new Subtraction()},
|
---|
126 | {"tan", new Tangens()},
|
---|
127 | {"xor", new Xor()}
|
---|
128 | };
|
---|
129 | Constant constant = new Constant();
|
---|
130 | HeuristicLab.GP.StructureIdentification.Variable variable = new HeuristicLab.GP.StructureIdentification.Variable();
|
---|
131 | Differential differential = new Differential();
|
---|
132 |
|
---|
133 | private HeuristicLab.Core.IItem ParseModel(string dirName, string modelFileName, string algoName) {
|
---|
134 | if (algoName == "SupportVectorRegression") {
|
---|
135 | HeuristicLab.Data.SVMModel model = new HeuristicLab.Data.SVMModel();
|
---|
136 | model.Model = SVM.Model.Read(Path.Combine(dirName, modelFileName) + ".svm.model.txt");
|
---|
137 | model.RangeTransform = SVM.RangeTransform.Read(Path.Combine(dirName, modelFileName) + ".svm.transform.txt");
|
---|
138 | return model;
|
---|
139 | } else {
|
---|
140 | GeneticProgrammingModel model = new GeneticProgrammingModel();
|
---|
141 | IEnumerable<Token> tokens = GetTokenStream(File.OpenText(Path.Combine(dirName, modelFileName) + ".gp.txt"));
|
---|
142 | model.FunctionTree = ParseSexp(new Queue<Token>(tokens));
|
---|
143 | return model;
|
---|
144 | }
|
---|
145 | }
|
---|
146 |
|
---|
147 | private IEnumerable<Token> GetTokenStream(StreamReader reader) {
|
---|
148 | return from line in GetLineStream(reader)
|
---|
149 | let strTokens = line.Split(new string[] { " ", "\t", Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries).AsEnumerable()
|
---|
150 | from strToken in strTokens
|
---|
151 | let t = Token.Parse(strToken)
|
---|
152 | where t != null
|
---|
153 | select t;
|
---|
154 | }
|
---|
155 |
|
---|
156 | private IEnumerable<string> GetLineStream(StreamReader reader) {
|
---|
157 | while (!reader.EndOfStream) yield return reader.ReadLine().Replace("(", " ( ").Replace(")", " ) ");
|
---|
158 | yield break;
|
---|
159 | }
|
---|
160 |
|
---|
161 | private HeuristicLab.GP.Interfaces.IFunctionTree ParseSexp(Queue<Token> tokens) {
|
---|
162 | Expect(Token.LPAR, tokens);
|
---|
163 |
|
---|
164 | if (tokens.Peek().Symbol == TokenSymbol.SYMB) {
|
---|
165 | if (tokens.Peek().StringValue.Equals("variable")) {
|
---|
166 | return ParseVariable(tokens);
|
---|
167 | } else if (tokens.Peek().StringValue.Equals("differential")) {
|
---|
168 | return ParseDifferential(tokens);
|
---|
169 | } else {
|
---|
170 | Token curToken = tokens.Dequeue();
|
---|
171 | IFunctionTree tree = CreateTree(curToken);
|
---|
172 | while (!tokens.Peek().Equals(Token.RPAR)) {
|
---|
173 | tree.AddSubTree(ParseSexp(tokens));
|
---|
174 | }
|
---|
175 | Expect(Token.RPAR, tokens);
|
---|
176 | return tree;
|
---|
177 | }
|
---|
178 | } else if (tokens.Peek().Symbol == TokenSymbol.NUMBER) {
|
---|
179 | ConstantFunctionTree t = (ConstantFunctionTree)constant.GetTreeNode();
|
---|
180 | t.Value = tokens.Dequeue().DoubleValue;
|
---|
181 | return t;
|
---|
182 | } else {
|
---|
183 | throw new FormatException("Expected function or constant symbol");
|
---|
184 | }
|
---|
185 | }
|
---|
186 |
|
---|
187 | private IFunctionTree ParseDifferential(Queue<Token> tokens) {
|
---|
188 | Debug.Assert(tokens.Dequeue().StringValue == "differential");
|
---|
189 | VariableFunctionTree t = (VariableFunctionTree)differential.GetTreeNode();
|
---|
190 | t.Weight = tokens.Dequeue().DoubleValue;
|
---|
191 | t.VariableName = tokens.Dequeue().StringValue;
|
---|
192 | t.SampleOffset = (int)tokens.Dequeue().DoubleValue;
|
---|
193 | Expect(Token.RPAR, tokens);
|
---|
194 | return t;
|
---|
195 | }
|
---|
196 |
|
---|
197 | private IFunctionTree ParseVariable(Queue<Token> tokens) {
|
---|
198 | Debug.Assert(tokens.Dequeue().StringValue == "variable");
|
---|
199 | VariableFunctionTree t = (VariableFunctionTree)variable.GetTreeNode();
|
---|
200 | t.Weight = tokens.Dequeue().DoubleValue;
|
---|
201 | t.VariableName = tokens.Dequeue().StringValue;
|
---|
202 | t.SampleOffset = (int)tokens.Dequeue().DoubleValue;
|
---|
203 | Expect(Token.RPAR, tokens);
|
---|
204 | return t;
|
---|
205 | }
|
---|
206 |
|
---|
207 | private IFunctionTree CreateTree(Token token) {
|
---|
208 | if (token.Symbol != TokenSymbol.SYMB) throw new FormatException("Expected function symbol, but got: " + token.StringValue);
|
---|
209 | return knownFunctions[token.StringValue].GetTreeNode();
|
---|
210 | }
|
---|
211 |
|
---|
212 | private void Expect(Token token, Queue<Token> tokens) {
|
---|
213 | Token cur = tokens.Dequeue();
|
---|
214 | if (!token.Equals(cur)) throw new FormatException("Expected: " + token.StringValue + ", but got found: " + cur.StringValue);
|
---|
215 | }
|
---|
216 |
|
---|
217 | private enum TokenSymbol { LPAR, RPAR, SYMB, NUMBER };
|
---|
218 | private class Token {
|
---|
219 | public static readonly Token LPAR = Token.Parse("(");
|
---|
220 | public static readonly Token RPAR = Token.Parse(")");
|
---|
221 |
|
---|
222 | public TokenSymbol Symbol { get; set; }
|
---|
223 | public string StringValue { get; set; }
|
---|
224 | public double DoubleValue { get; set; }
|
---|
225 | public Token() { }
|
---|
226 |
|
---|
227 | public override bool Equals(object obj) {
|
---|
228 | Token other = (obj as Token);
|
---|
229 | if (other == null) return false;
|
---|
230 | if (other.Symbol != Symbol) return false;
|
---|
231 | return other.StringValue == this.StringValue;
|
---|
232 | }
|
---|
233 |
|
---|
234 | public static Token Parse(string strToken) {
|
---|
235 | strToken = strToken.Trim();
|
---|
236 | Token t = new Token();
|
---|
237 | t.StringValue = strToken.Trim();
|
---|
238 | double temp;
|
---|
239 | if (strToken == "") {
|
---|
240 | t = null;
|
---|
241 | } else if (strToken == "(") {
|
---|
242 | t.Symbol = TokenSymbol.LPAR;
|
---|
243 | } else if (strToken == ")") {
|
---|
244 | t.Symbol = TokenSymbol.RPAR;
|
---|
245 | } else if (double.TryParse(strToken, out temp)) {
|
---|
246 | t.Symbol = TokenSymbol.NUMBER;
|
---|
247 | t.DoubleValue = double.Parse(strToken);
|
---|
248 | } else {
|
---|
249 | t.Symbol = TokenSymbol.SYMB;
|
---|
250 | }
|
---|
251 | return t;
|
---|
252 | }
|
---|
253 | }
|
---|
254 | }
|
---|
255 | }
|
---|