Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Views/3.4/MenuItems/ExportSymbolicSolutionToExcelMenuItem.cs @ 9699

Last change on this file since 9699 was 9699, checked in by mkommend, 11 years ago

#1730: Improved the performance of the excel export.

File size: 20.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.ComponentModel;
25using System.IO;
26using System.Linq;
27using System.Windows.Forms;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Views;
29using HeuristicLab.MainForm;
30using HeuristicLab.MainForm.WindowsForms;
31using HeuristicLab.Optimizer;
32using OfficeOpenXml;
33using OfficeOpenXml.Drawing.Chart;
34
35namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Views {
36  public class ExportSymbolicSolutionToExcelMenuItem : MainForm.WindowsForms.MenuItem, IOptimizerUserInterfaceItemProvider {
37    private const string TRAININGSTART = "TrainingStart";
38    private const string TRAININGEND = "TrainingEnd";
39    private const string TESTSTART = "TestStart";
40    private const string TESTEND = "TestEnd";
41
42    public override string Name {
43      get { return "Export Symbolic Solution To Excel"; }
44    }
45    public override IEnumerable<string> Structure {
46      get { return new string[] { "&Edit" }; }
47    }
48    public override int Position {
49      get { return 2500; }
50    }
51    public override string ToolTipText {
52      get { return "Create excel file of symbolic data analysis solutions."; }
53    }
54
55    protected override void OnToolStripItemSet(EventArgs e) {
56      base.OnToolStripItemSet(e);
57      ToolStripItem.Enabled = false;
58      var menuItem = ToolStripItem.OwnerItem as ToolStripMenuItem;
59      if (menuItem != null)
60        menuItem.DropDownOpening += menuItem_DropDownOpening;
61    }
62
63    private void menuItem_DropDownOpening(object sender, EventArgs e) {
64      IContentView activeView = MainFormManager.MainForm.ActiveView as IContentView;
65      Control control = activeView as Control;
66      activeView = control.GetNestedControls((c) => c.Visible)
67        .OfType<IContentView>().FirstOrDefault(v => v.Content is ISymbolicDataAnalysisSolution && v.Content is IRegressionSolution);
68      ToolStripItem.Enabled = activeView != null;
69    }
70
71    public override void Execute() {
72      IContentView activeView = MainFormManager.MainForm.ActiveView as IContentView;
73      Control control = activeView as Control;
74      activeView = control.GetNestedControls((c) => c.Visible)
75        .OfType<IContentView>().First(v => v.Content is ISymbolicDataAnalysisSolution && v.Content is IRegressionSolution);
76      var solution = (ISymbolicDataAnalysisSolution)activeView.Content;
77      var formatter = new SymbolicDataAnalysisExpressionExcelFormatter();
78      var formula = formatter.Format(solution.Model.SymbolicExpressionTree, solution.ProblemData.Dataset);
79
80
81      SaveFileDialog saveFileDialog = new SaveFileDialog();
82      saveFileDialog.Filter = "Excel Workbook|*.xlsx";
83      saveFileDialog.Title = "Save an Excel File";
84      if (saveFileDialog.ShowDialog() == DialogResult.OK) {
85        string fileName = saveFileDialog.FileName;
86        using (BackgroundWorker bg = new BackgroundWorker()) {
87          bg.DoWork += (b, e) => ExportChart(fileName, solution, formula);
88          bg.RunWorkerAsync();
89        }
90      }
91    }
92
93    private void ExportChart(string fileName, ISymbolicDataAnalysisSolution solution, string formula) {
94      FileInfo newFile = new FileInfo(fileName);
95      if (newFile.Exists) {
96        newFile.Delete();
97        newFile = new FileInfo(fileName);
98      }
99      var formulaParts = formula.Split(new string[] { Environment.NewLine }, StringSplitOptions.None);
100
101      using (ExcelPackage package = new ExcelPackage(newFile)) {
102        ExcelWorksheet modelWorksheet = package.Workbook.Worksheets.Add("Model");
103        FormatModelSheet(modelWorksheet, solution, formulaParts);
104
105        ExcelWorksheet datasetWorksheet = package.Workbook.Worksheets.Add("Dataset");
106        WriteDatasetToExcel(datasetWorksheet, solution.ProblemData);
107
108        ExcelWorksheet inputsWorksheet = package.Workbook.Worksheets.Add("Inputs");
109        WriteInputSheet(inputsWorksheet, datasetWorksheet, formulaParts.Skip(2), solution.ProblemData.Dataset);
110
111        if (solution is IRegressionSolution) {
112          ExcelWorksheet estimatedWorksheet = package.Workbook.Worksheets.Add("Estimated Values");
113          WriteEstimatedWorksheet(estimatedWorksheet, datasetWorksheet, formulaParts, solution as IRegressionSolution);
114
115          ExcelWorksheet chartsWorksheet = package.Workbook.Worksheets.Add("Charts");
116          AddCharts(chartsWorksheet);
117        }
118        package.Workbook.Properties.Title = "Excel Export";
119        package.Workbook.Properties.Author = "HEAL";
120        package.Workbook.Properties.Comments = "Excel export of a symbolic data analysis solution from HeuristicLab";
121
122        package.Save();
123      }
124    }
125
126    private void FormatModelSheet(ExcelWorksheet modelWorksheet, ISymbolicDataAnalysisSolution solution, IEnumerable<string> formulaParts) {
127      int row = 1;
128      modelWorksheet.Cells[row, 1].Value = "Model";
129      modelWorksheet.Cells[row, 2].Value = solution.Name;
130
131      foreach (var part in formulaParts) {
132        modelWorksheet.Cells[row, 4].Value = part;
133        row++;
134      }
135
136      row = 2;
137      modelWorksheet.Cells[row, 1].Value = "Model Depth";
138      modelWorksheet.Cells[row, 2].Value = solution.Model.SymbolicExpressionTree.Depth;
139      row++;
140
141      modelWorksheet.Cells[row, 1].Value = "Model Length";
142      modelWorksheet.Cells[row, 2].Value = solution.Model.SymbolicExpressionTree.Length;
143      row += 2;
144
145      modelWorksheet.Cells[row, 1].Value = "Estimation Limits Lower";
146      modelWorksheet.Cells[row, 2].Value = solution.Model.LowerEstimationLimit;
147      modelWorksheet.Names.Add("EstimationLimitLower", modelWorksheet.Cells[row, 2]);
148      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
149      row++;
150
151      modelWorksheet.Cells[row, 1].Value = "Estimation Limits Upper";
152      modelWorksheet.Cells[row, 2].Value = solution.Model.UpperEstimationLimit;
153      modelWorksheet.Names.Add("EstimationLimitUpper", modelWorksheet.Cells[row, 2]);
154      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
155      row += 2;
156
157      modelWorksheet.Cells[row, 1].Value = "Trainings Partition Start";
158      modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TrainingPartition.Start;
159      modelWorksheet.Names.Add(TRAININGSTART, modelWorksheet.Cells[row, 2]);
160      row++;
161
162      modelWorksheet.Cells[row, 1].Value = "Trainings Partition End";
163      modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TrainingPartition.End;
164      modelWorksheet.Names.Add(TRAININGEND, modelWorksheet.Cells[row, 2]);
165      row++;
166
167      modelWorksheet.Cells[row, 1].Value = "Test Partition Start";
168      modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TestPartition.Start;
169      modelWorksheet.Names.Add(TESTSTART, modelWorksheet.Cells[row, 2]);
170      row++;
171
172      modelWorksheet.Cells[row, 1].Value = "Test Partition End";
173      modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TestPartition.End;
174      modelWorksheet.Names.Add(TESTEND, modelWorksheet.Cells[row, 2]);
175      row += 2;
176
177      string excelTrainingTarget = Indirect("B", true);
178      string excelTrainingEstimated = Indirect("C", true);
179      string excelTrainingAbsoluteError = Indirect("D", true);
180      string excelTrainingRelativeError = Indirect("E", true);
181      string excelTrainingMeanError = Indirect("F", true);
182      string excelTrainingMSE = Indirect("G", true);
183
184      string excelTestTarget = Indirect("B", false);
185      string excelTestEstimated = Indirect("C", false);
186      string excelTestAbsoluteError = Indirect("D", false);
187      string excelTestRelativeError = Indirect("E", false);
188      string excelTestMeanError = Indirect("F", false);
189      string excelTestMSE = Indirect("G", false);
190
191      modelWorksheet.Cells[row, 1].Value = "Pearson's R² (training)";
192      modelWorksheet.Cells[row, 2].Formula = string.Format("POWER(PEARSON({0},{1}),2)", excelTrainingTarget, excelTrainingEstimated);
193      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
194      row++;
195
196      modelWorksheet.Cells[row, 1].Value = "Pearson's R² (test)";
197      modelWorksheet.Cells[row, 2].Formula = string.Format("POWER(PEARSON({0},{1}),2)", excelTestTarget, excelTestEstimated);
198      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
199      row++;
200
201      modelWorksheet.Cells[row, 1].Value = "Mean Squared Error (training)";
202      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingMSE);
203      modelWorksheet.Names.Add("TrainingMSE", modelWorksheet.Cells[row, 2]);
204      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
205      row++;
206
207      modelWorksheet.Cells[row, 1].Value = "Mean Squared Error (test)";
208      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestMSE);
209      modelWorksheet.Names.Add("TestMSE", modelWorksheet.Cells[row, 2]);
210      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
211      row++;
212
213      modelWorksheet.Cells[row, 1].Value = "Mean absolute error (training)";
214      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingAbsoluteError);
215      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
216      row++;
217
218      modelWorksheet.Cells[row, 1].Value = "Mean absolute error (test)";
219      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestAbsoluteError);
220      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
221      row++;
222
223      modelWorksheet.Cells[row, 1].Value = "Mean error (training)";
224      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingMeanError);
225      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
226      row++;
227
228      modelWorksheet.Cells[row, 1].Value = "Mean error (test)";
229      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestMeanError);
230      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
231      row++;
232
233      modelWorksheet.Cells[row, 1].Value = "Average relative error (training)";
234      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingRelativeError);
235      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.00%";
236      row++;
237
238      modelWorksheet.Cells[row, 1].Value = "Average relative error (test)";
239      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestRelativeError);
240      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.00%";
241      row++;
242
243      modelWorksheet.Cells[row, 1].Value = "Normalized Mean Squared error (training)";
244      modelWorksheet.Cells[row, 2].Formula = string.Format("TrainingMSE / VAR({0})", excelTrainingTarget);
245      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
246      row++;
247
248      modelWorksheet.Cells[row, 1].Value = "Normalized Mean Squared error  (test)";
249      modelWorksheet.Cells[row, 2].Formula = string.Format("TestMSE / VAR({0})", excelTestTarget);
250      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
251
252      modelWorksheet.Cells["A1:B" + row].AutoFitColumns();
253
254      AddModelTreePicture(modelWorksheet, solution.Model);
255    }
256
257    private string Indirect(string column, bool training) {
258      if (training) {
259        return string.Format("INDIRECT(\"'Estimated Values'!{0}\"&{1}+2&\":{0}\"&{2}+1)", column, TRAININGSTART, TRAININGEND);
260      } else {
261        return string.Format("INDIRECT(\"'Estimated Values'!{0}\"&{1}+2&\":{0}\"&{2}+1)", column, TESTSTART, TESTEND);
262      }
263    }
264
265    private void AddCharts(ExcelWorksheet chartsWorksheet) {
266      chartsWorksheet.Names.AddFormula("AllId", "OFFSET('Estimated Values'!$A$1,1,0, COUNTA('Estimated Values'!$A:$A)-1)");
267      chartsWorksheet.Names.AddFormula("AllTarget", "OFFSET('Estimated Values'!$B$1,1,0, COUNTA('Estimated Values'!$B:$B)-1)");
268      chartsWorksheet.Names.AddFormula("AllEstimated", "OFFSET('Estimated Values'!$C$1,1,0, COUNTA('Estimated Values'!$C:$C)-1)");
269      chartsWorksheet.Names.AddFormula("TrainingId", "OFFSET('Estimated Values'!$A$1,Model!TrainingStart + 1,0, Model!TrainingEnd - Model!TrainingStart)");
270      chartsWorksheet.Names.AddFormula("TrainingTarget", "OFFSET('Estimated Values'!$B$1,Model!TrainingStart + 1,0, Model!TrainingEnd - Model!TrainingStart)");
271      chartsWorksheet.Names.AddFormula("TrainingEstimated", "OFFSET('Estimated Values'!$C$1,Model!TrainingStart + 1,0, Model!TrainingEnd - Model!TrainingStart)");
272      chartsWorksheet.Names.AddFormula("TestId", "OFFSET('Estimated Values'!$A$1,Model!TestStart + 1,0, Model!TestEnd - Model!TestStart)");
273      chartsWorksheet.Names.AddFormula("TestTarget", "OFFSET('Estimated Values'!$B$1,Model!TestStart + 1,0, Model!TestEnd - Model!TestStart)");
274      chartsWorksheet.Names.AddFormula("TestEstimated", "OFFSET('Estimated Values'!$C$1,Model!TestStart + 1,0, Model!TestEnd - Model!TestStart)");
275
276      var scatterPlot = chartsWorksheet.Drawings.AddChart("scatterPlot", eChartType.XYScatter);
277      scatterPlot.SetSize(800, 400);
278      scatterPlot.SetPosition(0, 0);
279      scatterPlot.Title.Text = "Scatter Plot";
280      var seriesAll = scatterPlot.Series.Add("AllTarget", "AllEstimated");
281      seriesAll.Header = "All";
282      var seriesTraining = scatterPlot.Series.Add("TrainingTarget", "TrainingEstimated");
283      seriesTraining.Header = "Training";
284      var seriesTest = scatterPlot.Series.Add("TestTarget", "TestEstimated");
285      seriesTest.Header = "Test";
286
287      var lineChart = chartsWorksheet.Drawings.AddChart("lineChart", eChartType.XYScatterLinesNoMarkers);
288      lineChart.SetSize(800, 400);
289      lineChart.SetPosition(400, 0);
290      lineChart.Title.Text = "LineChart";
291      var lineTarget = lineChart.Series.Add("AllTarget", "AllId");
292      lineTarget.Header = "Target";
293      var lineAll = lineChart.Series.Add("AllEstimated", "AllId");
294      lineAll.Header = "All";
295      var lineTraining = lineChart.Series.Add("TrainingEstimated", "TrainingId");
296      lineTraining.Header = "Training";
297      var lineTest = lineChart.Series.Add("TestEstimated", "TestId");
298      lineTest.Header = "Test";
299    }
300
301    private void AddModelTreePicture(ExcelWorksheet modelWorksheet, ISymbolicDataAnalysisModel model) {
302      SymbolicExpressionTreeChart modelTreePicture = new SymbolicExpressionTreeChart();
303      modelTreePicture.Tree = model.SymbolicExpressionTree;
304      string tmpFilename = Path.GetTempFileName();
305      modelTreePicture.Width = 1000;
306      modelTreePicture.Height = 500;
307      modelTreePicture.SaveImageAsEmf(tmpFilename);
308
309      FileInfo fi = new FileInfo(tmpFilename);
310      var excelModelTreePic = modelWorksheet.Drawings.AddPicture("ModelTree", fi);
311      excelModelTreePic.SetSize(50);
312      excelModelTreePic.SetPosition(2, 0, 6, 0);
313    }
314
315    private void WriteEstimatedWorksheet(ExcelWorksheet estimatedWorksheet, ExcelWorksheet datasetWorksheet, string[] formulaParts, IRegressionSolution solution) {
316      string preparedFormula = PrepareFormula(formulaParts);
317      int rows = solution.ProblemData.Dataset.Rows;
318      estimatedWorksheet.Cells[1, 1].Value = "Id";
319      estimatedWorksheet.Cells[1, 2].Value = "Target Variable";
320      estimatedWorksheet.Cells[1, 3].Value = "Estimated Values";
321      estimatedWorksheet.Cells[1, 4].Value = "Absolute Error";
322      estimatedWorksheet.Cells[1, 5].Value = "Relative Error";
323      estimatedWorksheet.Cells[1, 6].Value = "Error";
324      estimatedWorksheet.Cells[1, 7].Value = "Squared Error";
325      estimatedWorksheet.Cells[1, 9].Value = "Unbounded Estimated Values";
326      estimatedWorksheet.Cells[1, 10].Value = "Bounded Estimated Values";
327
328      estimatedWorksheet.Cells[1, 1, 1, 10].AutoFitColumns();
329
330      int targetIndex = solution.ProblemData.Dataset.VariableNames.ToList().FindIndex(x => x.Equals(solution.ProblemData.TargetVariable)) + 1;
331      for (int i = 0; i < rows; i++) {
332        estimatedWorksheet.Cells[i + 2, 1].Value = i;
333        estimatedWorksheet.Cells[i + 2, 2].Formula = datasetWorksheet.Cells[i + 2, targetIndex].FullAddress;
334        estimatedWorksheet.Cells[i + 2, 9].Formula = string.Format(preparedFormula, i + 2);
335      }
336      estimatedWorksheet.Cells["B2:B" + (rows + 1)].Style.Numberformat.Format = "0.000";
337
338      estimatedWorksheet.Cells["C2:C" + (rows + 1)].Formula = "J2";
339      estimatedWorksheet.Cells["C2:C" + (rows + 1)].Style.Numberformat.Format = "0.000";
340      estimatedWorksheet.Cells["D2:D" + (rows + 1)].Formula = "ABS(B2 - C2)";
341      estimatedWorksheet.Cells["D2:D" + (rows + 1)].Style.Numberformat.Format = "0.000";
342      estimatedWorksheet.Cells["E2:E" + (rows + 1)].Formula = "ABS(D2 / B2)";
343      estimatedWorksheet.Cells["E2:E" + (rows + 1)].Style.Numberformat.Format = "0.000";
344      estimatedWorksheet.Cells["F2:F" + (rows + 1)].Formula = "C2 - B2";
345      estimatedWorksheet.Cells["F2:F" + (rows + 1)].Style.Numberformat.Format = "0.000";
346      estimatedWorksheet.Cells["G2:G" + (rows + 1)].Formula = "POWER(F2, 2)";
347      estimatedWorksheet.Cells["G2:G" + (rows + 1)].Style.Numberformat.Format = "0.000";
348
349      estimatedWorksheet.Cells["I2:I" + (rows + 1)].Style.Numberformat.Format = "0.000";
350      estimatedWorksheet.Cells["J2:J" + (rows + 1)].Formula = "IFERROR(IF(I2 > Model!EstimationLimitUpper, Model!EstimationLimitUpper, IF(I2 < Model!EstimationLimitLower, Model!EstimationLimitLower, I2)), AVERAGE(Model!EstimationLimitLower, Model!EstimationLimitUpper))";
351      estimatedWorksheet.Cells["J2:J" + (rows + 1)].Style.Numberformat.Format = "0.000";
352    }
353
354    private string PrepareFormula(string[] formulaParts) {
355      string preparedFormula = formulaParts[0];
356      foreach (var part in formulaParts.Skip(2)) {
357        var varMap = part.Split(new string[] { " = " }, StringSplitOptions.None);
358        var columnName = "$" + varMap[1] + "1";
359        preparedFormula = preparedFormula.Replace(columnName, "Inputs!$" + varMap[1] + "{0}");   //{0} will be replaced later with the row number
360      }
361      return preparedFormula;
362    }
363
364    private void WriteInputSheet(ExcelWorksheet inputsWorksheet, ExcelWorksheet datasetWorksheet, IEnumerable<string> list, Dataset dataset) {
365      //remark the performance of EPPlus drops dramatically
366      //if the data is not written row wise (from left to right) due the internal indices used.
367      var variableNames = dataset.VariableNames.Select((v, i) => new { variable = v, index = i + 1 }).ToDictionary(v => v.variable, v => v.index);
368      var nameMapping = list.Select(x => x.Split('=')[0].Trim()).ToArray();
369
370      for (int row = 1; row <= dataset.Rows + 1; row++) {
371        for (int column = 1; column < nameMapping.Length + 1; column++) {
372          int variableIndex = variableNames[nameMapping[column - 1]];
373          inputsWorksheet.Cells[row, column].Formula = datasetWorksheet.Cells[row, variableIndex].FullAddress;
374        }
375      }
376    }
377
378    private void WriteDatasetToExcel(ExcelWorksheet datasetWorksheet, IDataAnalysisProblemData problemData) {
379      //remark the performance of EPPlus drops dramatically
380      //if the data is not written row wise (from left to right) due the internal indices used.
381      Dataset dataset = problemData.Dataset;
382      var variableNames = dataset.VariableNames.ToList();
383      var doubleVariables = new HashSet<string>(dataset.DoubleVariables);
384
385      for (int col = 1; col <= variableNames.Count; col++)
386        datasetWorksheet.Cells[1, col].Value = variableNames[col - 1];
387
388      for (int row = 0; row < dataset.Rows; row++) {
389        for (int col = 0; col < variableNames.Count; col++) {
390          if (doubleVariables.Contains(variableNames[col]))
391            datasetWorksheet.Cells[row + 2, col + 1].Value = dataset.GetDoubleValue(variableNames[col], row);
392          else
393            datasetWorksheet.Cells[row + 2, col + 1].Value = dataset.GetValue(col, row);
394        }
395      }
396    }
397  }
398}
Note: See TracBrowser for help on using the repository browser.