Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Views/3.4/Exporters/SymbolicSolutionExcelExporter.cs @ 10703

Last change on this file since 10703 was 9994, checked in by gkronber, 11 years ago

#2102 handled estimation bounds in Excel exporter specifically to prevent writing extreme values that Excel can't handle (necessary to support export of linear regression solutions).

File size: 19.0 KB
RevLine 
[9580]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.IO;
25using System.Linq;
26using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Views;
27using OfficeOpenXml;
28using OfficeOpenXml.Drawing.Chart;
29
30namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Views {
[9973]31  public class SymbolicSolutionExcelExporter : IDataAnalysisSolutionExporter {
[9580]32    private const string TRAININGSTART = "TrainingStart";
33    private const string TRAININGEND = "TrainingEnd";
34    private const string TESTSTART = "TestStart";
35    private const string TESTEND = "TestEnd";
36
37
[9973]38    public string FileTypeFilter {
39      get { return "Excel 2007 file (*.xlsx)|*.xlsx"; }
[9580]40    }
[9973]41    public bool Supports(IDataAnalysisSolution solution) {
42      return solution is ISymbolicDataAnalysisSolution &&
43        solution is IRegressionSolution;
[9580]44    }
45
[9973]46    public void Export(IDataAnalysisSolution solution, string fileName) {
47      var symbSolution = solution as ISymbolicDataAnalysisSolution;
48      if (symbSolution == null) throw new NotSupportedException("This solution cannot be exported to Excel");
[9580]49      var formatter = new SymbolicDataAnalysisExpressionExcelFormatter();
[9973]50      var formula = formatter.Format(symbSolution.Model.SymbolicExpressionTree, solution.ProblemData.Dataset);
51      ExportChart(fileName, symbSolution, formula);
[9626]52    }
[9580]53
[9626]54    private void ExportChart(string fileName, ISymbolicDataAnalysisSolution solution, string formula) {
55      FileInfo newFile = new FileInfo(fileName);
56      if (newFile.Exists) {
57        newFile.Delete();
58        newFile = new FileInfo(fileName);
59      }
60      var formulaParts = formula.Split(new string[] { Environment.NewLine }, StringSplitOptions.None);
[9580]61
[9626]62      using (ExcelPackage package = new ExcelPackage(newFile)) {
63        ExcelWorksheet modelWorksheet = package.Workbook.Worksheets.Add("Model");
64        FormatModelSheet(modelWorksheet, solution, formulaParts);
[9580]65
[9626]66        ExcelWorksheet datasetWorksheet = package.Workbook.Worksheets.Add("Dataset");
67        WriteDatasetToExcel(datasetWorksheet, solution.ProblemData);
[9585]68
[9626]69        ExcelWorksheet inputsWorksheet = package.Workbook.Worksheets.Add("Inputs");
70        WriteInputSheet(inputsWorksheet, datasetWorksheet, formulaParts.Skip(2), solution.ProblemData.Dataset);
[9580]71
[9626]72        if (solution is IRegressionSolution) {
73          ExcelWorksheet estimatedWorksheet = package.Workbook.Worksheets.Add("Estimated Values");
74          WriteEstimatedWorksheet(estimatedWorksheet, datasetWorksheet, formulaParts, solution as IRegressionSolution);
75
76          ExcelWorksheet chartsWorksheet = package.Workbook.Worksheets.Add("Charts");
77          AddCharts(chartsWorksheet);
[9580]78        }
[9626]79        package.Workbook.Properties.Title = "Excel Export";
80        package.Workbook.Properties.Author = "HEAL";
81        package.Workbook.Properties.Comments = "Excel export of a symbolic data analysis solution from HeuristicLab";
82
83        package.Save();
[9580]84      }
85    }
86
87    private void FormatModelSheet(ExcelWorksheet modelWorksheet, ISymbolicDataAnalysisSolution solution, IEnumerable<string> formulaParts) {
88      int row = 1;
89      modelWorksheet.Cells[row, 1].Value = "Model";
90      modelWorksheet.Cells[row, 2].Value = solution.Name;
91
92      foreach (var part in formulaParts) {
93        modelWorksheet.Cells[row, 4].Value = part;
94        row++;
95      }
96
97      row = 2;
98      modelWorksheet.Cells[row, 1].Value = "Model Depth";
99      modelWorksheet.Cells[row, 2].Value = solution.Model.SymbolicExpressionTree.Depth;
100      row++;
101
102      modelWorksheet.Cells[row, 1].Value = "Model Length";
103      modelWorksheet.Cells[row, 2].Value = solution.Model.SymbolicExpressionTree.Length;
104      row += 2;
105
[9587]106      modelWorksheet.Cells[row, 1].Value = "Estimation Limits Lower";
[9994]107      modelWorksheet.Cells[row, 2].Value = Math.Max(solution.Model.LowerEstimationLimit, -9.99999999999999E+307); // minimal value supported by excel
[9587]108      modelWorksheet.Names.Add("EstimationLimitLower", modelWorksheet.Cells[row, 2]);
[9994]109      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
[9587]110      row++;
[9580]111
[9587]112      modelWorksheet.Cells[row, 1].Value = "Estimation Limits Upper";
[9994]113      modelWorksheet.Cells[row, 2].Value = Math.Min(solution.Model.UpperEstimationLimit, 9.99999999999999E+307);  // maximal value supported by excel
[9587]114      modelWorksheet.Names.Add("EstimationLimitUpper", modelWorksheet.Cells[row, 2]);
[9994]115      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
[9587]116      row += 2;
[9580]117
118      modelWorksheet.Cells[row, 1].Value = "Trainings Partition Start";
119      modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TrainingPartition.Start;
120      modelWorksheet.Names.Add(TRAININGSTART, modelWorksheet.Cells[row, 2]);
121      row++;
122
123      modelWorksheet.Cells[row, 1].Value = "Trainings Partition End";
124      modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TrainingPartition.End;
125      modelWorksheet.Names.Add(TRAININGEND, modelWorksheet.Cells[row, 2]);
126      row++;
127
128      modelWorksheet.Cells[row, 1].Value = "Test Partition Start";
129      modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TestPartition.Start;
130      modelWorksheet.Names.Add(TESTSTART, modelWorksheet.Cells[row, 2]);
131      row++;
132
133      modelWorksheet.Cells[row, 1].Value = "Test Partition End";
134      modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TestPartition.End;
135      modelWorksheet.Names.Add(TESTEND, modelWorksheet.Cells[row, 2]);
136      row += 2;
137
138      string excelTrainingTarget = Indirect("B", true);
139      string excelTrainingEstimated = Indirect("C", true);
140      string excelTrainingAbsoluteError = Indirect("D", true);
141      string excelTrainingRelativeError = Indirect("E", true);
142      string excelTrainingMeanError = Indirect("F", true);
143      string excelTrainingMSE = Indirect("G", true);
144
145      string excelTestTarget = Indirect("B", false);
146      string excelTestEstimated = Indirect("C", false);
147      string excelTestAbsoluteError = Indirect("D", false);
148      string excelTestRelativeError = Indirect("E", false);
149      string excelTestMeanError = Indirect("F", false);
150      string excelTestMSE = Indirect("G", false);
151
152      modelWorksheet.Cells[row, 1].Value = "Pearson's R² (training)";
153      modelWorksheet.Cells[row, 2].Formula = string.Format("POWER(PEARSON({0},{1}),2)", excelTrainingTarget, excelTrainingEstimated);
[9607]154      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
[9580]155      row++;
156
157      modelWorksheet.Cells[row, 1].Value = "Pearson's R² (test)";
158      modelWorksheet.Cells[row, 2].Formula = string.Format("POWER(PEARSON({0},{1}),2)", excelTestTarget, excelTestEstimated);
[9607]159      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
[9580]160      row++;
161
162      modelWorksheet.Cells[row, 1].Value = "Mean Squared Error (training)";
163      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingMSE);
164      modelWorksheet.Names.Add("TrainingMSE", modelWorksheet.Cells[row, 2]);
[9994]165      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
[9580]166      row++;
167
168      modelWorksheet.Cells[row, 1].Value = "Mean Squared Error (test)";
169      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestMSE);
170      modelWorksheet.Names.Add("TestMSE", modelWorksheet.Cells[row, 2]);
[9994]171      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
[9580]172      row++;
173
174      modelWorksheet.Cells[row, 1].Value = "Mean absolute error (training)";
175      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingAbsoluteError);
[9994]176      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
[9580]177      row++;
178
179      modelWorksheet.Cells[row, 1].Value = "Mean absolute error (test)";
180      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestAbsoluteError);
[9994]181      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
[9580]182      row++;
183
184      modelWorksheet.Cells[row, 1].Value = "Mean error (training)";
185      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingMeanError);
[9994]186      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
[9580]187      row++;
188
189      modelWorksheet.Cells[row, 1].Value = "Mean error (test)";
190      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestMeanError);
[9994]191      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
[9580]192      row++;
193
194      modelWorksheet.Cells[row, 1].Value = "Average relative error (training)";
195      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingRelativeError);
196      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.00%";
197      row++;
198
199      modelWorksheet.Cells[row, 1].Value = "Average relative error (test)";
200      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestRelativeError);
201      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.00%";
202      row++;
203
204      modelWorksheet.Cells[row, 1].Value = "Normalized Mean Squared error (training)";
205      modelWorksheet.Cells[row, 2].Formula = string.Format("TrainingMSE / VAR({0})", excelTrainingTarget);
[9994]206      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
[9580]207      row++;
208
209      modelWorksheet.Cells[row, 1].Value = "Normalized Mean Squared error  (test)";
210      modelWorksheet.Cells[row, 2].Formula = string.Format("TestMSE / VAR({0})", excelTestTarget);
[9994]211      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
[9580]212
213      modelWorksheet.Cells["A1:B" + row].AutoFitColumns();
214
215      AddModelTreePicture(modelWorksheet, solution.Model);
216    }
217
218    private string Indirect(string column, bool training) {
219      if (training) {
220        return string.Format("INDIRECT(\"'Estimated Values'!{0}\"&{1}+2&\":{0}\"&{2}+1)", column, TRAININGSTART, TRAININGEND);
221      } else {
222        return string.Format("INDIRECT(\"'Estimated Values'!{0}\"&{1}+2&\":{0}\"&{2}+1)", column, TESTSTART, TESTEND);
223      }
224    }
225
226    private void AddCharts(ExcelWorksheet chartsWorksheet) {
227      chartsWorksheet.Names.AddFormula("AllId", "OFFSET('Estimated Values'!$A$1,1,0, COUNTA('Estimated Values'!$A:$A)-1)");
228      chartsWorksheet.Names.AddFormula("AllTarget", "OFFSET('Estimated Values'!$B$1,1,0, COUNTA('Estimated Values'!$B:$B)-1)");
229      chartsWorksheet.Names.AddFormula("AllEstimated", "OFFSET('Estimated Values'!$C$1,1,0, COUNTA('Estimated Values'!$C:$C)-1)");
230      chartsWorksheet.Names.AddFormula("TrainingId", "OFFSET('Estimated Values'!$A$1,Model!TrainingStart + 1,0, Model!TrainingEnd - Model!TrainingStart)");
231      chartsWorksheet.Names.AddFormula("TrainingTarget", "OFFSET('Estimated Values'!$B$1,Model!TrainingStart + 1,0, Model!TrainingEnd - Model!TrainingStart)");
232      chartsWorksheet.Names.AddFormula("TrainingEstimated", "OFFSET('Estimated Values'!$C$1,Model!TrainingStart + 1,0, Model!TrainingEnd - Model!TrainingStart)");
233      chartsWorksheet.Names.AddFormula("TestId", "OFFSET('Estimated Values'!$A$1,Model!TestStart + 1,0, Model!TestEnd - Model!TestStart)");
234      chartsWorksheet.Names.AddFormula("TestTarget", "OFFSET('Estimated Values'!$B$1,Model!TestStart + 1,0, Model!TestEnd - Model!TestStart)");
235      chartsWorksheet.Names.AddFormula("TestEstimated", "OFFSET('Estimated Values'!$C$1,Model!TestStart + 1,0, Model!TestEnd - Model!TestStart)");
236
237      var scatterPlot = chartsWorksheet.Drawings.AddChart("scatterPlot", eChartType.XYScatter);
238      scatterPlot.SetSize(800, 400);
239      scatterPlot.SetPosition(0, 0);
240      scatterPlot.Title.Text = "Scatter Plot";
241      var seriesAll = scatterPlot.Series.Add("AllTarget", "AllEstimated");
242      seriesAll.Header = "All";
243      var seriesTraining = scatterPlot.Series.Add("TrainingTarget", "TrainingEstimated");
244      seriesTraining.Header = "Training";
245      var seriesTest = scatterPlot.Series.Add("TestTarget", "TestEstimated");
246      seriesTest.Header = "Test";
247
248      var lineChart = chartsWorksheet.Drawings.AddChart("lineChart", eChartType.XYScatterLinesNoMarkers);
249      lineChart.SetSize(800, 400);
250      lineChart.SetPosition(400, 0);
251      lineChart.Title.Text = "LineChart";
252      var lineTarget = lineChart.Series.Add("AllTarget", "AllId");
253      lineTarget.Header = "Target";
254      var lineAll = lineChart.Series.Add("AllEstimated", "AllId");
255      lineAll.Header = "All";
256      var lineTraining = lineChart.Series.Add("TrainingEstimated", "TrainingId");
257      lineTraining.Header = "Training";
258      var lineTest = lineChart.Series.Add("TestEstimated", "TestId");
259      lineTest.Header = "Test";
260    }
261
262    private void AddModelTreePicture(ExcelWorksheet modelWorksheet, ISymbolicDataAnalysisModel model) {
263      SymbolicExpressionTreeChart modelTreePicture = new SymbolicExpressionTreeChart();
264      modelTreePicture.Tree = model.SymbolicExpressionTree;
265      string tmpFilename = Path.GetTempFileName();
266      modelTreePicture.Width = 1000;
267      modelTreePicture.Height = 500;
268      modelTreePicture.SaveImageAsEmf(tmpFilename);
269
270      FileInfo fi = new FileInfo(tmpFilename);
271      var excelModelTreePic = modelWorksheet.Drawings.AddPicture("ModelTree", fi);
272      excelModelTreePic.SetSize(50);
273      excelModelTreePic.SetPosition(2, 0, 6, 0);
274    }
275
276    private void WriteEstimatedWorksheet(ExcelWorksheet estimatedWorksheet, ExcelWorksheet datasetWorksheet, string[] formulaParts, IRegressionSolution solution) {
277      string preparedFormula = PrepareFormula(formulaParts);
278      int rows = solution.ProblemData.Dataset.Rows;
279      estimatedWorksheet.Cells[1, 1].Value = "Id";
280      estimatedWorksheet.Cells[1, 2].Value = "Target Variable";
281      estimatedWorksheet.Cells[1, 3].Value = "Estimated Values";
282      estimatedWorksheet.Cells[1, 4].Value = "Absolute Error";
283      estimatedWorksheet.Cells[1, 5].Value = "Relative Error";
284      estimatedWorksheet.Cells[1, 6].Value = "Error";
285      estimatedWorksheet.Cells[1, 7].Value = "Squared Error";
286      estimatedWorksheet.Cells[1, 9].Value = "Unbounded Estimated Values";
287      estimatedWorksheet.Cells[1, 10].Value = "Bounded Estimated Values";
288
289      estimatedWorksheet.Cells[1, 1, 1, 10].AutoFitColumns();
290
291      int targetIndex = solution.ProblemData.Dataset.VariableNames.ToList().FindIndex(x => x.Equals(solution.ProblemData.TargetVariable)) + 1;
292      for (int i = 0; i < rows; i++) {
293        estimatedWorksheet.Cells[i + 2, 1].Value = i;
294        estimatedWorksheet.Cells[i + 2, 2].Formula = datasetWorksheet.Cells[i + 2, targetIndex].FullAddress;
295        estimatedWorksheet.Cells[i + 2, 9].Formula = string.Format(preparedFormula, i + 2);
296      }
[9607]297      estimatedWorksheet.Cells["B2:B" + (rows + 1)].Style.Numberformat.Format = "0.000";
[9580]298
299      estimatedWorksheet.Cells["C2:C" + (rows + 1)].Formula = "J2";
[9607]300      estimatedWorksheet.Cells["C2:C" + (rows + 1)].Style.Numberformat.Format = "0.000";
[9580]301      estimatedWorksheet.Cells["D2:D" + (rows + 1)].Formula = "ABS(B2 - C2)";
[9607]302      estimatedWorksheet.Cells["D2:D" + (rows + 1)].Style.Numberformat.Format = "0.000";
[9600]303      estimatedWorksheet.Cells["E2:E" + (rows + 1)].Formula = "ABS(D2 / B2)";
[9607]304      estimatedWorksheet.Cells["E2:E" + (rows + 1)].Style.Numberformat.Format = "0.000";
[9580]305      estimatedWorksheet.Cells["F2:F" + (rows + 1)].Formula = "C2 - B2";
[9607]306      estimatedWorksheet.Cells["F2:F" + (rows + 1)].Style.Numberformat.Format = "0.000";
[9580]307      estimatedWorksheet.Cells["G2:G" + (rows + 1)].Formula = "POWER(F2, 2)";
[9607]308      estimatedWorksheet.Cells["G2:G" + (rows + 1)].Style.Numberformat.Format = "0.000";
[9580]309
[9607]310      estimatedWorksheet.Cells["I2:I" + (rows + 1)].Style.Numberformat.Format = "0.000";
[9583]311      estimatedWorksheet.Cells["J2:J" + (rows + 1)].Formula = "IFERROR(IF(I2 > Model!EstimationLimitUpper, Model!EstimationLimitUpper, IF(I2 < Model!EstimationLimitLower, Model!EstimationLimitLower, I2)), AVERAGE(Model!EstimationLimitLower, Model!EstimationLimitUpper))";
[9607]312      estimatedWorksheet.Cells["J2:J" + (rows + 1)].Style.Numberformat.Format = "0.000";
[9580]313    }
314
315    private string PrepareFormula(string[] formulaParts) {
316      string preparedFormula = formulaParts[0];
317      foreach (var part in formulaParts.Skip(2)) {
318        var varMap = part.Split(new string[] { " = " }, StringSplitOptions.None);
319        var columnName = "$" + varMap[1] + "1";
320        preparedFormula = preparedFormula.Replace(columnName, "Inputs!$" + varMap[1] + "{0}");   //{0} will be replaced later with the row number
321      }
322      return preparedFormula;
323    }
324
325    private void WriteInputSheet(ExcelWorksheet inputsWorksheet, ExcelWorksheet datasetWorksheet, IEnumerable<string> list, Dataset dataset) {
[9699]326      //remark the performance of EPPlus drops dramatically
327      //if the data is not written row wise (from left to right) due the internal indices used.
328      var variableNames = dataset.VariableNames.Select((v, i) => new { variable = v, index = i + 1 }).ToDictionary(v => v.variable, v => v.index);
329      var nameMapping = list.Select(x => x.Split('=')[0].Trim()).ToArray();
330
331      for (int row = 1; row <= dataset.Rows + 1; row++) {
332        for (int column = 1; column < nameMapping.Length + 1; column++) {
333          int variableIndex = variableNames[nameMapping[column - 1]];
334          inputsWorksheet.Cells[row, column].Formula = datasetWorksheet.Cells[row, variableIndex].FullAddress;
[9580]335        }
336      }
337    }
338
339    private void WriteDatasetToExcel(ExcelWorksheet datasetWorksheet, IDataAnalysisProblemData problemData) {
[9699]340      //remark the performance of EPPlus drops dramatically
341      //if the data is not written row wise (from left to right) due the internal indices used.
[9580]342      Dataset dataset = problemData.Dataset;
343      var variableNames = dataset.VariableNames.ToList();
[9699]344      var doubleVariables = new HashSet<string>(dataset.DoubleVariables);
345
346      for (int col = 1; col <= variableNames.Count; col++)
[9580]347        datasetWorksheet.Cells[1, col].Value = variableNames[col - 1];
[9699]348
349      for (int row = 0; row < dataset.Rows; row++) {
350        for (int col = 0; col < variableNames.Count; col++) {
351          if (doubleVariables.Contains(variableNames[col]))
352            datasetWorksheet.Cells[row + 2, col + 1].Value = dataset.GetDoubleValue(variableNames[col], row);
353          else
[9906]354            datasetWorksheet.Cells[row + 2, col + 1].Value = dataset.GetValue(row, col);
[9580]355        }
356      }
357    }
358  }
359}
Note: See TracBrowser for help on using the repository browser.