source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification.Views/3.4/SymbolicDiscriminantFunctionClassificationSolutionExcelExporter.cs @ 11523

Last change on this file since 11523 was 11523, checked in by bburlacu, 8 years ago

#2098: Added support for boolean, conditional and comparison symbols in the SymbolicDataAnalysisExpressionExcelFormatter. Added SymbolicDiscriminantFunctionClassificationSolutionExcelExporter derived from the SymbolicSolutionExcelExporter. Updated view to use the specialized exporter.

File size: 12.1 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.IO;
4using System.Linq;
5using System.Text;
6using HeuristicLab.Problems.DataAnalysis.Symbolic.Views;
7using OfficeOpenXml;
8
9namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Classification.Views {
10  public class SymbolicDiscriminantFunctionClassificationSolutionExcelExporter : SymbolicSolutionExcelExporter {
11    protected override void ExportChart(string fileName, ISymbolicDataAnalysisSolution solution, string formula) {
12      ExportChart(fileName, solution as ISymbolicClassificationSolution, formula);
13    }
14
15    private void ExportChart(string fileName, ISymbolicClassificationSolution solution, string formula) {
16      FileInfo newFile = new FileInfo(fileName);
17      if (newFile.Exists) {
18        newFile.Delete();
19        newFile = new FileInfo(fileName);
20      }
21      var formulaParts = formula.Split(new string[] { Environment.NewLine }, StringSplitOptions.None);
22
23      using (ExcelPackage package = new ExcelPackage(newFile)) {
24        ExcelWorksheet modelWorksheet = package.Workbook.Worksheets.Add("Model");
25        FormatModelSheet(modelWorksheet, solution, formulaParts);
26
27        ExcelWorksheet datasetWorksheet = package.Workbook.Worksheets.Add("Dataset");
28        WriteDatasetToExcel(datasetWorksheet, solution.ProblemData);
29
30        ExcelWorksheet inputsWorksheet = package.Workbook.Worksheets.Add("Inputs");
31        WriteInputSheet(inputsWorksheet, datasetWorksheet, formulaParts.Skip(2), solution.ProblemData.Dataset);
32
33        ExcelWorksheet estimatedWorksheet = package.Workbook.Worksheets.Add("Estimated Values");
34        WriteEstimatedWorksheet(estimatedWorksheet, datasetWorksheet, formulaParts, solution);
35
36        ExcelWorksheet chartsWorksheet = package.Workbook.Worksheets.Add("Charts");
37        AddCharts(chartsWorksheet);
38        package.Workbook.Properties.Title = "Excel Export";
39        package.Workbook.Properties.Author = "HEAL";
40        package.Workbook.Properties.Comments = "Excel export of a symbolic data analysis solution from HeuristicLab";
41
42        package.Save();
43      }
44    }
45
46    protected void WriteEstimatedWorksheet(ExcelWorksheet estimatedWorksheet, ExcelWorksheet datasetWorksheet, string[] formulaParts, ISymbolicDataAnalysisSolution solution) {
47      WriteEstimatedWorksheet(estimatedWorksheet, datasetWorksheet, formulaParts, solution as SymbolicDiscriminantFunctionClassificationSolution);
48    }
49
50    protected override void FormatModelSheet(ExcelWorksheet modelWorksheet, ISymbolicDataAnalysisSolution solution, IEnumerable<string> formulaParts) {
51      FormatModelSheet(modelWorksheet, solution as SymbolicDiscriminantFunctionClassificationSolution, formulaParts);
52    }
53
54    private void FormatModelSheet(ExcelWorksheet modelWorksheet, SymbolicDiscriminantFunctionClassificationSolution solution, IEnumerable<string> formulaParts) {
55      int row = 1;
56      modelWorksheet.Cells[row, 1].Value = "Model";
57      modelWorksheet.Cells[row, 2].Value = solution.Name;
58
59      foreach (var part in formulaParts) {
60        modelWorksheet.Cells[row, 4].Value = part;
61        row++;
62      }
63
64      row = 2;
65      modelWorksheet.Cells[row, 1].Value = "Model Depth";
66      modelWorksheet.Cells[row, 2].Value = solution.Model.SymbolicExpressionTree.Depth;
67      row++;
68
69      modelWorksheet.Cells[row, 1].Value = "Model Length";
70      modelWorksheet.Cells[row, 2].Value = solution.Model.SymbolicExpressionTree.Length;
71      row += 2;
72
73      var thresholds = solution.Model.Thresholds.ToList();
74      // skip first (-inf) and last (+inf) thresholds
75      for (int i = 0; i < thresholds.Count; ++i) {
76        if (double.IsInfinity(thresholds[i]) || double.IsNaN(thresholds[i]))
77          continue;
78        modelWorksheet.Cells[row, 1].Value = "Threshold " + i;
79        modelWorksheet.Cells[row, 2].Value = thresholds[i];
80        ++row;
81      }
82      row++;
83
84      modelWorksheet.Cells[row, 1].Value = "Estimation Limits Lower";
85      modelWorksheet.Cells[row, 2].Value = Math.Max(solution.Model.LowerEstimationLimit, -9.99999999999999E+307); // minimal value supported by excel
86      modelWorksheet.Names.Add("EstimationLimitLower", modelWorksheet.Cells[row, 2]);
87      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
88      row++;
89
90      modelWorksheet.Cells[row, 1].Value = "Estimation Limits Upper";
91      modelWorksheet.Cells[row, 2].Value = Math.Min(solution.Model.UpperEstimationLimit, 9.99999999999999E+307);  // maximal value supported by excel
92      modelWorksheet.Names.Add("EstimationLimitUpper", modelWorksheet.Cells[row, 2]);
93      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
94      row += 2;
95
96      modelWorksheet.Cells[row, 1].Value = "Trainings Partition Start";
97      modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TrainingPartition.Start;
98      modelWorksheet.Names.Add(TRAININGSTART, modelWorksheet.Cells[row, 2]);
99      row++;
100
101      modelWorksheet.Cells[row, 1].Value = "Trainings Partition End";
102      modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TrainingPartition.End;
103      modelWorksheet.Names.Add(TRAININGEND, modelWorksheet.Cells[row, 2]);
104      row++;
105
106      modelWorksheet.Cells[row, 1].Value = "Test Partition Start";
107      modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TestPartition.Start;
108      modelWorksheet.Names.Add(TESTSTART, modelWorksheet.Cells[row, 2]);
109      row++;
110
111      modelWorksheet.Cells[row, 1].Value = "Test Partition End";
112      modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TestPartition.End;
113      modelWorksheet.Names.Add(TESTEND, modelWorksheet.Cells[row, 2]);
114      row += 2;
115
116      string excelTrainingTarget = Indirect("B", true);
117      string excelTrainingEstimated = Indirect("C", true);
118      string excelTrainingClassValues = Indirect("D", true);
119      string excelTrainingMeanError = Indirect("F", true);
120      string excelTrainingMSE = Indirect("G", true);
121
122      string excelTestTarget = Indirect("B", false);
123      string excelTestEstimated = Indirect("C", false);
124      string excelTestClassValues = Indirect("D", false);
125      string excelTestMeanError = Indirect("F", false);
126      string excelTestMSE = Indirect("G", false);
127
128      modelWorksheet.Cells[row, 1].Value = "Accuracy (training)";
129      modelWorksheet.Cells[row, 2].Formula = string.Format("SUMPRODUCT(({0}={1})*1)/COUNT({0})", excelTrainingClassValues, excelTrainingTarget);
130      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
131      row++;
132
133      modelWorksheet.Cells[row, 1].Value = "Accuracy (test)";
134      modelWorksheet.Cells[row, 2].Formula = string.Format("SUMPRODUCT(({0}={1})*1)/COUNT({0})", excelTestClassValues, excelTestTarget);
135      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
136      row++;
137
138      modelWorksheet.Cells[row, 1].Value = "Pearson's R² (training)";
139      modelWorksheet.Cells[row, 2].Formula = string.Format("POWER(PEARSON({0},{1}),2)", excelTrainingTarget, excelTrainingEstimated);
140      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
141      row++;
142
143      modelWorksheet.Cells[row, 1].Value = "Pearson's R² (test)";
144      modelWorksheet.Cells[row, 2].Formula = string.Format("POWER(PEARSON({0},{1}),2)", excelTestTarget, excelTestEstimated);
145      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
146      row++;
147
148      modelWorksheet.Cells[row, 1].Value = "Mean Squared Error (training)";
149      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingMSE);
150      modelWorksheet.Names.Add("TrainingMSE", modelWorksheet.Cells[row, 2]);
151      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
152      row++;
153
154      modelWorksheet.Cells[row, 1].Value = "Mean Squared Error (test)";
155      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestMSE);
156      modelWorksheet.Names.Add("TestMSE", modelWorksheet.Cells[row, 2]);
157      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
158      row++;
159
160      modelWorksheet.Cells[row, 1].Value = "Normalized Gini Coefficient (training)";
161      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingMeanError);
162      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
163      row++;
164
165      modelWorksheet.Cells[row, 1].Value = "Normalized Gini Coefficient (test)";
166      modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestMeanError);
167      modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
168      row++;
169
170      modelWorksheet.Cells["A1:B" + row].AutoFitColumns();
171
172      AddModelTreePicture(modelWorksheet, solution.Model);
173    }
174
175    private void WriteEstimatedWorksheet(ExcelWorksheet estimatedWorksheet, ExcelWorksheet datasetWorksheet, string[] formulaParts, SymbolicDiscriminantFunctionClassificationSolution solution) {
176      string preparedFormula = PrepareFormula(formulaParts);
177      int rows = solution.ProblemData.Dataset.Rows;
178      estimatedWorksheet.Cells[1, 1].Value = "Id"; // A
179      estimatedWorksheet.Cells[1, 2].Value = "Target Variable"; // B
180      estimatedWorksheet.Cells[1, 3].Value = "Estimated Values"; // C
181      estimatedWorksheet.Cells[1, 4].Value = "Estimated Class Values"; // D
182      estimatedWorksheet.Cells[1, 6].Value = "Error"; // E
183      estimatedWorksheet.Cells[1, 7].Value = "Squared Error"; // G
184      estimatedWorksheet.Cells[1, 9].Value = "Unbounded Estimated Values"; // I
185      estimatedWorksheet.Cells[1, 10].Value = "Bounded Estimated Values"; // J
186
187      estimatedWorksheet.Cells[1, 1, 1, 10].AutoFitColumns();
188
189      var thresholds = solution.Model.Thresholds.Where(x => !double.IsInfinity(x)).ToList();
190      var thresholdsFormula = GenerateThresholdsFormula(thresholds);
191
192      int targetIndex = solution.ProblemData.Dataset.VariableNames.ToList().FindIndex(x => x.Equals(solution.ProblemData.TargetVariable)) + 1;
193      for (int i = 0; i < rows; i++) {
194        estimatedWorksheet.Cells[i + 2, 1].Value = i;
195        estimatedWorksheet.Cells[i + 2, 2].Formula = datasetWorksheet.Cells[i + 2, targetIndex].FullAddress; // target values
196        estimatedWorksheet.Cells[i + 2, 9].Formula = string.Format(preparedFormula, i + 2); // formula (estimated) values
197      }
198      estimatedWorksheet.Cells["B2:B" + (rows + 1)].Style.Numberformat.Format = "0.000";
199
200      estimatedWorksheet.Cells["C2:C" + (rows + 1)].Formula = "J2";
201      estimatedWorksheet.Cells["C2:C" + (rows + 1)].Style.Numberformat.Format = "0.000";
202
203      estimatedWorksheet.Cells["D2:D" + (rows + 1)].Formula = thresholdsFormula;
204      estimatedWorksheet.Cells["D2:D" + (rows + 1)].Style.Numberformat.Format = "0.0";
205
206      estimatedWorksheet.Cells["F2:F" + (rows + 1)].Formula = "B2 - D2";
207      estimatedWorksheet.Cells["F2:F" + (rows + 1)].Style.Numberformat.Format = "0.0";
208
209      estimatedWorksheet.Cells["G2:G" + (rows + 1)].Formula = "POWER(F2, 2)";
210      estimatedWorksheet.Cells["G2:G" + (rows + 1)].Style.Numberformat.Format = "0.000";
211
212      estimatedWorksheet.Cells["I2:I" + (rows + 1)].Style.Numberformat.Format = "0.000";
213      estimatedWorksheet.Cells["J2:J" + (rows + 1)].Formula = "IFERROR(IF(I2 > Model!EstimationLimitUpper, Model!EstimationLimitUpper, IF(I2 < Model!EstimationLimitLower, Model!EstimationLimitLower, I2)), AVERAGE(Model!EstimationLimitLower, Model!EstimationLimitUpper))";
214      estimatedWorksheet.Cells["J2:J" + (rows + 1)].Style.Numberformat.Format = "0.000";
215    }
216
217    // this method assumes that the thresholds list is sorted in ascending order
218    private string GenerateThresholdsFormula(List<double> thresholds) {
219      if (thresholds.Count == 1) {
220        return String.Format("IF(J2 < {0}, 0, 1)", thresholds[0]);
221      }
222      var sb = new StringBuilder();
223      sb.Append(String.Format("IF(J2 < {0}, 0,", thresholds[0]));
224      for (int i = 1; i < thresholds.Count; ++i) {
225        double v = thresholds[i];
226        sb.Append(String.Format("IF(J2 < {0}, {1},", v, i));
227      }
228      for (int i = 1; i < thresholds.Count; ++i)
229        sb.Append(")");
230      sb.Append(")");
231      return sb.ToString();
232    }
233  }
234}
Note: See TracBrowser for help on using the repository browser.