1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.IO;
|
---|
4 | using System.Linq;
|
---|
5 | using System.Text;
|
---|
6 | using HeuristicLab.Problems.DataAnalysis.Symbolic.Views;
|
---|
7 | using OfficeOpenXml;
|
---|
8 |
|
---|
9 | namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Classification.Views {
|
---|
10 | public class SymbolicDiscriminantFunctionClassificationSolutionExcelExporter : SymbolicSolutionExcelExporter {
|
---|
11 | protected override void ExportChart(string fileName, ISymbolicDataAnalysisSolution solution, string formula) {
|
---|
12 | ExportChart(fileName, solution as ISymbolicClassificationSolution, formula);
|
---|
13 | }
|
---|
14 |
|
---|
15 | private void ExportChart(string fileName, ISymbolicClassificationSolution solution, string formula) {
|
---|
16 | FileInfo newFile = new FileInfo(fileName);
|
---|
17 | if (newFile.Exists) {
|
---|
18 | newFile.Delete();
|
---|
19 | newFile = new FileInfo(fileName);
|
---|
20 | }
|
---|
21 | var formulaParts = formula.Split(new string[] { Environment.NewLine }, StringSplitOptions.None);
|
---|
22 |
|
---|
23 | using (ExcelPackage package = new ExcelPackage(newFile)) {
|
---|
24 | ExcelWorksheet modelWorksheet = package.Workbook.Worksheets.Add("Model");
|
---|
25 | FormatModelSheet(modelWorksheet, solution, formulaParts);
|
---|
26 |
|
---|
27 | ExcelWorksheet datasetWorksheet = package.Workbook.Worksheets.Add("Dataset");
|
---|
28 | WriteDatasetToExcel(datasetWorksheet, solution.ProblemData);
|
---|
29 |
|
---|
30 | ExcelWorksheet inputsWorksheet = package.Workbook.Worksheets.Add("Inputs");
|
---|
31 | WriteInputSheet(inputsWorksheet, datasetWorksheet, formulaParts.Skip(2), solution.ProblemData.Dataset);
|
---|
32 |
|
---|
33 | ExcelWorksheet estimatedWorksheet = package.Workbook.Worksheets.Add("Estimated Values");
|
---|
34 | WriteEstimatedWorksheet(estimatedWorksheet, datasetWorksheet, formulaParts, solution);
|
---|
35 |
|
---|
36 | ExcelWorksheet chartsWorksheet = package.Workbook.Worksheets.Add("Charts");
|
---|
37 | AddCharts(chartsWorksheet);
|
---|
38 | package.Workbook.Properties.Title = "Excel Export";
|
---|
39 | package.Workbook.Properties.Author = "HEAL";
|
---|
40 | package.Workbook.Properties.Comments = "Excel export of a symbolic data analysis solution from HeuristicLab";
|
---|
41 |
|
---|
42 | package.Save();
|
---|
43 | }
|
---|
44 | }
|
---|
45 |
|
---|
46 | protected void WriteEstimatedWorksheet(ExcelWorksheet estimatedWorksheet, ExcelWorksheet datasetWorksheet, string[] formulaParts, ISymbolicDataAnalysisSolution solution) {
|
---|
47 | WriteEstimatedWorksheet(estimatedWorksheet, datasetWorksheet, formulaParts, solution as SymbolicDiscriminantFunctionClassificationSolution);
|
---|
48 | }
|
---|
49 |
|
---|
50 | protected override void FormatModelSheet(ExcelWorksheet modelWorksheet, ISymbolicDataAnalysisSolution solution, IEnumerable<string> formulaParts) {
|
---|
51 | FormatModelSheet(modelWorksheet, solution as SymbolicDiscriminantFunctionClassificationSolution, formulaParts);
|
---|
52 | }
|
---|
53 |
|
---|
54 | private void FormatModelSheet(ExcelWorksheet modelWorksheet, SymbolicDiscriminantFunctionClassificationSolution solution, IEnumerable<string> formulaParts) {
|
---|
55 | int row = 1;
|
---|
56 | modelWorksheet.Cells[row, 1].Value = "Model";
|
---|
57 | modelWorksheet.Cells[row, 2].Value = solution.Name;
|
---|
58 |
|
---|
59 | foreach (var part in formulaParts) {
|
---|
60 | modelWorksheet.Cells[row, 4].Value = part;
|
---|
61 | row++;
|
---|
62 | }
|
---|
63 |
|
---|
64 | row = 2;
|
---|
65 | modelWorksheet.Cells[row, 1].Value = "Model Depth";
|
---|
66 | modelWorksheet.Cells[row, 2].Value = solution.Model.SymbolicExpressionTree.Depth;
|
---|
67 | row++;
|
---|
68 |
|
---|
69 | modelWorksheet.Cells[row, 1].Value = "Model Length";
|
---|
70 | modelWorksheet.Cells[row, 2].Value = solution.Model.SymbolicExpressionTree.Length;
|
---|
71 | row += 2;
|
---|
72 |
|
---|
73 | var thresholds = solution.Model.Thresholds.ToList();
|
---|
74 | // skip first (-inf) and last (+inf) thresholds
|
---|
75 | for (int i = 0; i < thresholds.Count; ++i) {
|
---|
76 | if (double.IsInfinity(thresholds[i]) || double.IsNaN(thresholds[i]))
|
---|
77 | continue;
|
---|
78 | modelWorksheet.Cells[row, 1].Value = "Threshold " + i;
|
---|
79 | modelWorksheet.Cells[row, 2].Value = thresholds[i];
|
---|
80 | ++row;
|
---|
81 | }
|
---|
82 | row++;
|
---|
83 |
|
---|
84 | modelWorksheet.Cells[row, 1].Value = "Estimation Limits Lower";
|
---|
85 | modelWorksheet.Cells[row, 2].Value = Math.Max(solution.Model.LowerEstimationLimit, -9.99999999999999E+307); // minimal value supported by excel
|
---|
86 | modelWorksheet.Names.Add("EstimationLimitLower", modelWorksheet.Cells[row, 2]);
|
---|
87 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
88 | row++;
|
---|
89 |
|
---|
90 | modelWorksheet.Cells[row, 1].Value = "Estimation Limits Upper";
|
---|
91 | modelWorksheet.Cells[row, 2].Value = Math.Min(solution.Model.UpperEstimationLimit, 9.99999999999999E+307); // maximal value supported by excel
|
---|
92 | modelWorksheet.Names.Add("EstimationLimitUpper", modelWorksheet.Cells[row, 2]);
|
---|
93 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
94 | row += 2;
|
---|
95 |
|
---|
96 | modelWorksheet.Cells[row, 1].Value = "Trainings Partition Start";
|
---|
97 | modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TrainingPartition.Start;
|
---|
98 | modelWorksheet.Names.Add(TRAININGSTART, modelWorksheet.Cells[row, 2]);
|
---|
99 | row++;
|
---|
100 |
|
---|
101 | modelWorksheet.Cells[row, 1].Value = "Trainings Partition End";
|
---|
102 | modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TrainingPartition.End;
|
---|
103 | modelWorksheet.Names.Add(TRAININGEND, modelWorksheet.Cells[row, 2]);
|
---|
104 | row++;
|
---|
105 |
|
---|
106 | modelWorksheet.Cells[row, 1].Value = "Test Partition Start";
|
---|
107 | modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TestPartition.Start;
|
---|
108 | modelWorksheet.Names.Add(TESTSTART, modelWorksheet.Cells[row, 2]);
|
---|
109 | row++;
|
---|
110 |
|
---|
111 | modelWorksheet.Cells[row, 1].Value = "Test Partition End";
|
---|
112 | modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TestPartition.End;
|
---|
113 | modelWorksheet.Names.Add(TESTEND, modelWorksheet.Cells[row, 2]);
|
---|
114 | row += 2;
|
---|
115 |
|
---|
116 | string excelTrainingTarget = Indirect("B", true);
|
---|
117 | string excelTrainingEstimated = Indirect("C", true);
|
---|
118 | string excelTrainingClassValues = Indirect("D", true);
|
---|
119 | string excelTrainingMeanError = Indirect("F", true);
|
---|
120 | string excelTrainingMSE = Indirect("G", true);
|
---|
121 |
|
---|
122 | string excelTestTarget = Indirect("B", false);
|
---|
123 | string excelTestEstimated = Indirect("C", false);
|
---|
124 | string excelTestClassValues = Indirect("D", false);
|
---|
125 | string excelTestMeanError = Indirect("F", false);
|
---|
126 | string excelTestMSE = Indirect("G", false);
|
---|
127 |
|
---|
128 | modelWorksheet.Cells[row, 1].Value = "Accuracy (training)";
|
---|
129 | modelWorksheet.Cells[row, 2].Formula = string.Format("SUMPRODUCT(({0}={1})*1)/COUNT({0})", excelTrainingClassValues, excelTrainingTarget);
|
---|
130 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
|
---|
131 | row++;
|
---|
132 |
|
---|
133 | modelWorksheet.Cells[row, 1].Value = "Accuracy (test)";
|
---|
134 | modelWorksheet.Cells[row, 2].Formula = string.Format("SUMPRODUCT(({0}={1})*1)/COUNT({0})", excelTestClassValues, excelTestTarget);
|
---|
135 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
|
---|
136 | row++;
|
---|
137 |
|
---|
138 | modelWorksheet.Cells[row, 1].Value = "Pearson's R² (training)";
|
---|
139 | modelWorksheet.Cells[row, 2].Formula = string.Format("POWER(PEARSON({0},{1}),2)", excelTrainingTarget, excelTrainingEstimated);
|
---|
140 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
|
---|
141 | row++;
|
---|
142 |
|
---|
143 | modelWorksheet.Cells[row, 1].Value = "Pearson's R² (test)";
|
---|
144 | modelWorksheet.Cells[row, 2].Formula = string.Format("POWER(PEARSON({0},{1}),2)", excelTestTarget, excelTestEstimated);
|
---|
145 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
|
---|
146 | row++;
|
---|
147 |
|
---|
148 | modelWorksheet.Cells[row, 1].Value = "Mean Squared Error (training)";
|
---|
149 | modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingMSE);
|
---|
150 | modelWorksheet.Names.Add("TrainingMSE", modelWorksheet.Cells[row, 2]);
|
---|
151 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
152 | row++;
|
---|
153 |
|
---|
154 | modelWorksheet.Cells[row, 1].Value = "Mean Squared Error (test)";
|
---|
155 | modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestMSE);
|
---|
156 | modelWorksheet.Names.Add("TestMSE", modelWorksheet.Cells[row, 2]);
|
---|
157 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
158 | row++;
|
---|
159 |
|
---|
160 | modelWorksheet.Cells[row, 1].Value = "Normalized Gini Coefficient (training)";
|
---|
161 | modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingMeanError);
|
---|
162 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
163 | row++;
|
---|
164 |
|
---|
165 | modelWorksheet.Cells[row, 1].Value = "Normalized Gini Coefficient (test)";
|
---|
166 | modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestMeanError);
|
---|
167 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
168 | row++;
|
---|
169 |
|
---|
170 | modelWorksheet.Cells["A1:B" + row].AutoFitColumns();
|
---|
171 |
|
---|
172 | AddModelTreePicture(modelWorksheet, solution.Model);
|
---|
173 | }
|
---|
174 |
|
---|
175 | private void WriteEstimatedWorksheet(ExcelWorksheet estimatedWorksheet, ExcelWorksheet datasetWorksheet, string[] formulaParts, SymbolicDiscriminantFunctionClassificationSolution solution) {
|
---|
176 | string preparedFormula = PrepareFormula(formulaParts);
|
---|
177 | int rows = solution.ProblemData.Dataset.Rows;
|
---|
178 | estimatedWorksheet.Cells[1, 1].Value = "Id"; // A
|
---|
179 | estimatedWorksheet.Cells[1, 2].Value = "Target Variable"; // B
|
---|
180 | estimatedWorksheet.Cells[1, 3].Value = "Estimated Values"; // C
|
---|
181 | estimatedWorksheet.Cells[1, 4].Value = "Estimated Class Values"; // D
|
---|
182 | estimatedWorksheet.Cells[1, 6].Value = "Error"; // E
|
---|
183 | estimatedWorksheet.Cells[1, 7].Value = "Squared Error"; // G
|
---|
184 | estimatedWorksheet.Cells[1, 9].Value = "Unbounded Estimated Values"; // I
|
---|
185 | estimatedWorksheet.Cells[1, 10].Value = "Bounded Estimated Values"; // J
|
---|
186 |
|
---|
187 | estimatedWorksheet.Cells[1, 1, 1, 10].AutoFitColumns();
|
---|
188 |
|
---|
189 | var thresholds = solution.Model.Thresholds.Where(x => !double.IsInfinity(x)).ToList();
|
---|
190 | var thresholdsFormula = GenerateThresholdsFormula(thresholds);
|
---|
191 |
|
---|
192 | int targetIndex = solution.ProblemData.Dataset.VariableNames.ToList().FindIndex(x => x.Equals(solution.ProblemData.TargetVariable)) + 1;
|
---|
193 | for (int i = 0; i < rows; i++) {
|
---|
194 | estimatedWorksheet.Cells[i + 2, 1].Value = i;
|
---|
195 | estimatedWorksheet.Cells[i + 2, 2].Formula = datasetWorksheet.Cells[i + 2, targetIndex].FullAddress; // target values
|
---|
196 | estimatedWorksheet.Cells[i + 2, 9].Formula = string.Format(preparedFormula, i + 2); // formula (estimated) values
|
---|
197 | }
|
---|
198 | estimatedWorksheet.Cells["B2:B" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
199 |
|
---|
200 | estimatedWorksheet.Cells["C2:C" + (rows + 1)].Formula = "J2";
|
---|
201 | estimatedWorksheet.Cells["C2:C" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
202 |
|
---|
203 | estimatedWorksheet.Cells["D2:D" + (rows + 1)].Formula = thresholdsFormula;
|
---|
204 | estimatedWorksheet.Cells["D2:D" + (rows + 1)].Style.Numberformat.Format = "0.0";
|
---|
205 |
|
---|
206 | estimatedWorksheet.Cells["F2:F" + (rows + 1)].Formula = "B2 - D2";
|
---|
207 | estimatedWorksheet.Cells["F2:F" + (rows + 1)].Style.Numberformat.Format = "0.0";
|
---|
208 |
|
---|
209 | estimatedWorksheet.Cells["G2:G" + (rows + 1)].Formula = "POWER(F2, 2)";
|
---|
210 | estimatedWorksheet.Cells["G2:G" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
211 |
|
---|
212 | estimatedWorksheet.Cells["I2:I" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
213 | estimatedWorksheet.Cells["J2:J" + (rows + 1)].Formula = "IFERROR(IF(I2 > Model!EstimationLimitUpper, Model!EstimationLimitUpper, IF(I2 < Model!EstimationLimitLower, Model!EstimationLimitLower, I2)), AVERAGE(Model!EstimationLimitLower, Model!EstimationLimitUpper))";
|
---|
214 | estimatedWorksheet.Cells["J2:J" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
215 | }
|
---|
216 |
|
---|
217 | // this method assumes that the thresholds list is sorted in ascending order
|
---|
218 | private string GenerateThresholdsFormula(List<double> thresholds) {
|
---|
219 | if (thresholds.Count == 1) {
|
---|
220 | return String.Format("IF(J2 < {0}, 0, 1)", thresholds[0]);
|
---|
221 | }
|
---|
222 | var sb = new StringBuilder();
|
---|
223 | sb.Append(String.Format("IF(J2 < {0}, 0,", thresholds[0]));
|
---|
224 | for (int i = 1; i < thresholds.Count; ++i) {
|
---|
225 | double v = thresholds[i];
|
---|
226 | sb.Append(String.Format("IF(J2 < {0}, {1},", v, i));
|
---|
227 | }
|
---|
228 | for (int i = 1; i < thresholds.Count; ++i)
|
---|
229 | sb.Append(")");
|
---|
230 | sb.Append(")");
|
---|
231 | return sb.ToString();
|
---|
232 | }
|
---|
233 | }
|
---|
234 | }
|
---|