1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.IO;
|
---|
4 | using System.Linq;
|
---|
5 | using System.Text;
|
---|
6 | using HeuristicLab.Problems.DataAnalysis.Symbolic.Views;
|
---|
7 | using OfficeOpenXml;
|
---|
8 | using OfficeOpenXml.Drawing.Chart;
|
---|
9 |
|
---|
10 | namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Classification.Views {
|
---|
11 | public class SymbolicDiscriminantFunctionClassificationSolutionExcelExporter : SymbolicSolutionExcelExporter {
|
---|
12 | public override void Export(IDataAnalysisSolution solution, string fileName) {
|
---|
13 | var symbDiscriminantSolution = solution as SymbolicDiscriminantFunctionClassificationSolution;
|
---|
14 | if (symbDiscriminantSolution == null) throw new NotSupportedException("This solution cannot be exported to Excel");
|
---|
15 | var formatter = new SymbolicDataAnalysisExpressionExcelFormatter();
|
---|
16 | var formula = formatter.Format(symbDiscriminantSolution.Model.SymbolicExpressionTree, solution.ProblemData.Dataset);
|
---|
17 | ExportChart(fileName, symbDiscriminantSolution, formula);
|
---|
18 | }
|
---|
19 |
|
---|
20 | private void ExportChart(string fileName, SymbolicDiscriminantFunctionClassificationSolution solution, string formula) {
|
---|
21 | FileInfo newFile = new FileInfo(fileName);
|
---|
22 | if (newFile.Exists) {
|
---|
23 | newFile.Delete();
|
---|
24 | newFile = new FileInfo(fileName);
|
---|
25 | }
|
---|
26 | var formulaParts = formula.Split(new string[] { Environment.NewLine }, StringSplitOptions.None);
|
---|
27 |
|
---|
28 | using (ExcelPackage package = new ExcelPackage(newFile)) {
|
---|
29 | ExcelWorksheet modelWorksheet = package.Workbook.Worksheets.Add("Model");
|
---|
30 | FormatModelSheet(modelWorksheet, solution, formulaParts);
|
---|
31 |
|
---|
32 | ExcelWorksheet datasetWorksheet = package.Workbook.Worksheets.Add("Dataset");
|
---|
33 | WriteDatasetToExcel(datasetWorksheet, solution.ProblemData);
|
---|
34 |
|
---|
35 | ExcelWorksheet inputsWorksheet = package.Workbook.Worksheets.Add("Inputs");
|
---|
36 | WriteInputSheet(inputsWorksheet, datasetWorksheet, formulaParts.Skip(2), solution.ProblemData.Dataset);
|
---|
37 |
|
---|
38 | ExcelWorksheet estimatedWorksheet = package.Workbook.Worksheets.Add("Estimated Values");
|
---|
39 | WriteEstimatedWorksheet(estimatedWorksheet, datasetWorksheet, formulaParts, solution);
|
---|
40 |
|
---|
41 | ExcelWorksheet chartsWorksheet = package.Workbook.Worksheets.Add("Charts");
|
---|
42 | AddCharts(chartsWorksheet, solution);
|
---|
43 | package.Workbook.Properties.Title = "Excel Export";
|
---|
44 | package.Workbook.Properties.Author = "HEAL";
|
---|
45 | package.Workbook.Properties.Comments = "Excel export of a symbolic data analysis solution from HeuristicLab";
|
---|
46 |
|
---|
47 | package.Save();
|
---|
48 | }
|
---|
49 | }
|
---|
50 |
|
---|
51 | private void FormatModelSheet(ExcelWorksheet modelWorksheet, SymbolicDiscriminantFunctionClassificationSolution solution, IEnumerable<string> formulaParts) {
|
---|
52 | int row = 1;
|
---|
53 | modelWorksheet.Cells[row, 1].Value = "Model";
|
---|
54 | modelWorksheet.Cells[row, 2].Value = solution.Name;
|
---|
55 |
|
---|
56 | foreach (var part in formulaParts) {
|
---|
57 | modelWorksheet.Cells[row, 4].Value = part;
|
---|
58 | row++;
|
---|
59 | }
|
---|
60 |
|
---|
61 | row = 2;
|
---|
62 | modelWorksheet.Cells[row, 1].Value = "Model Depth";
|
---|
63 | modelWorksheet.Cells[row, 2].Value = solution.Model.SymbolicExpressionTree.Depth;
|
---|
64 | row++;
|
---|
65 |
|
---|
66 | modelWorksheet.Cells[row, 1].Value = "Model Length";
|
---|
67 | modelWorksheet.Cells[row, 2].Value = solution.Model.SymbolicExpressionTree.Length;
|
---|
68 | row += 2;
|
---|
69 |
|
---|
70 | var thresholds = solution.Model.Thresholds.ToList();
|
---|
71 | // skip first (-inf) and last (+inf) thresholds
|
---|
72 | for (int i = 0; i < thresholds.Count; ++i) {
|
---|
73 | if (double.IsInfinity(thresholds[i]) || double.IsNaN(thresholds[i]))
|
---|
74 | continue;
|
---|
75 | modelWorksheet.Cells[row, 1].Value = "Threshold " + i;
|
---|
76 | modelWorksheet.Cells[row, 2].Value = thresholds[i];
|
---|
77 | ++row;
|
---|
78 | }
|
---|
79 | row++;
|
---|
80 |
|
---|
81 | modelWorksheet.Cells[row, 1].Value = "Estimation Limits Lower";
|
---|
82 | modelWorksheet.Cells[row, 2].Value = Math.Max(solution.Model.LowerEstimationLimit, -9.99999999999999E+307); // minimal value supported by excel
|
---|
83 | modelWorksheet.Names.Add("EstimationLimitLower", modelWorksheet.Cells[row, 2]);
|
---|
84 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
85 | row++;
|
---|
86 |
|
---|
87 | modelWorksheet.Cells[row, 1].Value = "Estimation Limits Upper";
|
---|
88 | modelWorksheet.Cells[row, 2].Value = Math.Min(solution.Model.UpperEstimationLimit, 9.99999999999999E+307); // maximal value supported by excel
|
---|
89 | modelWorksheet.Names.Add("EstimationLimitUpper", modelWorksheet.Cells[row, 2]);
|
---|
90 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
91 | row += 2;
|
---|
92 |
|
---|
93 | modelWorksheet.Cells[row, 1].Value = "Trainings Partition Start";
|
---|
94 | modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TrainingPartition.Start;
|
---|
95 | modelWorksheet.Names.Add(TRAININGSTART, modelWorksheet.Cells[row, 2]);
|
---|
96 | row++;
|
---|
97 |
|
---|
98 | modelWorksheet.Cells[row, 1].Value = "Trainings Partition End";
|
---|
99 | modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TrainingPartition.End;
|
---|
100 | modelWorksheet.Names.Add(TRAININGEND, modelWorksheet.Cells[row, 2]);
|
---|
101 | row++;
|
---|
102 |
|
---|
103 | modelWorksheet.Cells[row, 1].Value = "Test Partition Start";
|
---|
104 | modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TestPartition.Start;
|
---|
105 | modelWorksheet.Names.Add(TESTSTART, modelWorksheet.Cells[row, 2]);
|
---|
106 | row++;
|
---|
107 |
|
---|
108 | modelWorksheet.Cells[row, 1].Value = "Test Partition End";
|
---|
109 | modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TestPartition.End;
|
---|
110 | modelWorksheet.Names.Add(TESTEND, modelWorksheet.Cells[row, 2]);
|
---|
111 | row += 2;
|
---|
112 |
|
---|
113 | string excelTrainingTarget = Indirect("B", true);
|
---|
114 | string excelTrainingEstimated = Indirect("C", true);
|
---|
115 | string excelTrainingClassValues = Indirect("D", true);
|
---|
116 | string excelTrainingMeanError = Indirect("F", true);
|
---|
117 | string excelTrainingMSE = Indirect("G", true);
|
---|
118 |
|
---|
119 | string excelTestTarget = Indirect("B", false);
|
---|
120 | string excelTestEstimated = Indirect("C", false);
|
---|
121 | string excelTestClassValues = Indirect("D", false);
|
---|
122 | string excelTestMeanError = Indirect("F", false);
|
---|
123 | string excelTestMSE = Indirect("G", false);
|
---|
124 |
|
---|
125 | modelWorksheet.Cells[row, 1].Value = "Accuracy (training)";
|
---|
126 | modelWorksheet.Cells[row, 2].Formula = string.Format("SUMPRODUCT(({0}={1})*1)/COUNT({0})", excelTrainingClassValues, excelTrainingTarget);
|
---|
127 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
|
---|
128 | row++;
|
---|
129 |
|
---|
130 | modelWorksheet.Cells[row, 1].Value = "Accuracy (test)";
|
---|
131 | modelWorksheet.Cells[row, 2].Formula = string.Format("SUMPRODUCT(({0}={1})*1)/COUNT({0})", excelTestClassValues, excelTestTarget);
|
---|
132 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
|
---|
133 | row++;
|
---|
134 |
|
---|
135 | modelWorksheet.Cells[row, 1].Value = "Pearson's R² (training)";
|
---|
136 | modelWorksheet.Cells[row, 2].Formula = string.Format("POWER(PEARSON({0},{1}),2)", excelTrainingTarget, excelTrainingEstimated);
|
---|
137 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
|
---|
138 | row++;
|
---|
139 |
|
---|
140 | modelWorksheet.Cells[row, 1].Value = "Pearson's R² (test)";
|
---|
141 | modelWorksheet.Cells[row, 2].Formula = string.Format("POWER(PEARSON({0},{1}),2)", excelTestTarget, excelTestEstimated);
|
---|
142 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
|
---|
143 | row++;
|
---|
144 |
|
---|
145 | modelWorksheet.Cells[row, 1].Value = "Mean Squared Error (training)";
|
---|
146 | modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingMSE);
|
---|
147 | modelWorksheet.Names.Add("TrainingMSE", modelWorksheet.Cells[row, 2]);
|
---|
148 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
149 | row++;
|
---|
150 |
|
---|
151 | modelWorksheet.Cells[row, 1].Value = "Mean Squared Error (test)";
|
---|
152 | modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestMSE);
|
---|
153 | modelWorksheet.Names.Add("TestMSE", modelWorksheet.Cells[row, 2]);
|
---|
154 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
155 | row++;
|
---|
156 |
|
---|
157 | modelWorksheet.Cells[row, 1].Value = "Normalized Gini Coefficient (training)";
|
---|
158 | modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingMeanError);
|
---|
159 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
160 | row++;
|
---|
161 |
|
---|
162 | modelWorksheet.Cells[row, 1].Value = "Normalized Gini Coefficient (test)";
|
---|
163 | modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestMeanError);
|
---|
164 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
165 | row++;
|
---|
166 |
|
---|
167 | modelWorksheet.Cells["A1:B" + row].AutoFitColumns();
|
---|
168 |
|
---|
169 | AddModelTreePicture(modelWorksheet, solution.Model);
|
---|
170 | }
|
---|
171 |
|
---|
172 | private void WriteEstimatedWorksheet(ExcelWorksheet estimatedWorksheet, ExcelWorksheet datasetWorksheet, string[] formulaParts, SymbolicDiscriminantFunctionClassificationSolution solution) {
|
---|
173 | string preparedFormula = PrepareFormula(formulaParts);
|
---|
174 | int rows = solution.ProblemData.Dataset.Rows;
|
---|
175 | estimatedWorksheet.Cells[1, 1].Value = "Id"; // A
|
---|
176 | estimatedWorksheet.Cells[1, 2].Value = "Target Variable"; // B
|
---|
177 | estimatedWorksheet.Cells[1, 3].Value = "Estimated Values"; // C
|
---|
178 | estimatedWorksheet.Cells[1, 4].Value = "Estimated Class Values"; // D
|
---|
179 | estimatedWorksheet.Cells[1, 6].Value = "Error"; // F
|
---|
180 | estimatedWorksheet.Cells[1, 7].Value = "Squared Error"; // G
|
---|
181 | estimatedWorksheet.Cells[1, 9].Value = "Unbounded Estimated Values"; // I
|
---|
182 | estimatedWorksheet.Cells[1, 10].Value = "Bounded Estimated Values"; // J
|
---|
183 | estimatedWorksheet.Cells[1, 11].Value = "Random Key"; // K
|
---|
184 |
|
---|
185 | var thresholds = solution.Model.Thresholds.Where(x => !double.IsInfinity(x)).ToList();
|
---|
186 | var thresholdsFormula = GenerateThresholdsFormula(thresholds);
|
---|
187 |
|
---|
188 | const int columnIndex = 13; // index of beginning columns for class values
|
---|
189 | for (int i = 0; i <= thresholds.Count; ++i) {
|
---|
190 | estimatedWorksheet.Cells[1, i + columnIndex].Value = "Class " + i;
|
---|
191 | if (i < thresholds.Count)
|
---|
192 | estimatedWorksheet.Cells[1, i + columnIndex + thresholds.Count + 1].Value = "Threshold " + i;
|
---|
193 | }
|
---|
194 | estimatedWorksheet.Cells[1, 1, 1, 10].AutoFitColumns();
|
---|
195 |
|
---|
196 | int targetIndex = solution.ProblemData.Dataset.VariableNames.ToList().FindIndex(x => x.Equals(solution.ProblemData.TargetVariable)) + 1;
|
---|
197 | for (int i = 0; i < rows; i++) {
|
---|
198 | estimatedWorksheet.Cells[i + 2, 1].Value = i;
|
---|
199 | estimatedWorksheet.Cells[i + 2, 2].Formula = datasetWorksheet.Cells[i + 2, targetIndex].FullAddress; // target values
|
---|
200 | estimatedWorksheet.Cells[i + 2, 9].Formula = string.Format(preparedFormula, i + 2); // formula (estimated) values
|
---|
201 |
|
---|
202 | string condition = string.Empty;
|
---|
203 | string rowRef = "C" + (i + 2);
|
---|
204 |
|
---|
205 | int nClasses = thresholds.Count + 1;
|
---|
206 | for (int j = columnIndex; j < columnIndex + nClasses; ++j) {
|
---|
207 | int idx = j - columnIndex + 5; // row index for the threshold values
|
---|
208 | if (j == columnIndex) {
|
---|
209 | condition = rowRef + " < Model!$B$" + idx;
|
---|
210 | } else if (j > columnIndex && j < columnIndex + thresholds.Count) {
|
---|
211 | condition = "AND(" + rowRef + "> Model!$B$" + (idx - 1) + ", " + rowRef + " < Model!$B$" + idx + ")";
|
---|
212 | } else if (j == columnIndex + thresholds.Count) {
|
---|
213 | condition = rowRef + " > Model!$B$" + (idx - 1);
|
---|
214 | }
|
---|
215 | estimatedWorksheet.Cells[i + 2, j].Formula = "IF(" + condition + ", " + rowRef + ", #N/A)";
|
---|
216 |
|
---|
217 | if (j < columnIndex + thresholds.Count)
|
---|
218 | estimatedWorksheet.Cells[i + 2, j + nClasses].Formula = "Model!$B$" + idx;
|
---|
219 | }
|
---|
220 | }
|
---|
221 | estimatedWorksheet.Cells["B2:B" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
222 |
|
---|
223 | estimatedWorksheet.Cells["C2:C" + (rows + 1)].Formula = "J2";
|
---|
224 | estimatedWorksheet.Cells["C2:C" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
225 |
|
---|
226 | estimatedWorksheet.Cells["D2:D" + (rows + 1)].Formula = thresholdsFormula;
|
---|
227 | estimatedWorksheet.Cells["D2:D" + (rows + 1)].Style.Numberformat.Format = "0.0";
|
---|
228 |
|
---|
229 | estimatedWorksheet.Cells["F2:F" + (rows + 1)].Formula = "B2 - D2";
|
---|
230 | estimatedWorksheet.Cells["F2:F" + (rows + 1)].Style.Numberformat.Format = "0.0";
|
---|
231 |
|
---|
232 | estimatedWorksheet.Cells["G2:G" + (rows + 1)].Formula = "POWER(F2, 2)";
|
---|
233 | estimatedWorksheet.Cells["G2:G" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
234 |
|
---|
235 | estimatedWorksheet.Cells["I2:I" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
236 | estimatedWorksheet.Cells["J2:J" + (rows + 1)].Formula = "IFERROR(IF(I2 > Model!EstimationLimitUpper, Model!EstimationLimitUpper, IF(I2 < Model!EstimationLimitLower, Model!EstimationLimitLower, I2)), AVERAGE(Model!EstimationLimitLower, Model!EstimationLimitUpper))";
|
---|
237 | estimatedWorksheet.Cells["J2:J" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
238 |
|
---|
239 | estimatedWorksheet.Cells["K2:K" + (rows + 1)].Formula = "RAND()";
|
---|
240 | estimatedWorksheet.Cells["K2:K" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
241 | }
|
---|
242 |
|
---|
243 | private void AddCharts(ExcelWorksheet chartsWorksheet, DiscriminantFunctionClassificationSolution solution) {
|
---|
244 | List<char> columns = new List<char> { 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' };
|
---|
245 | var scatterPlot = chartsWorksheet.Drawings.AddChart("scatterPlot", eChartType.XYScatter);
|
---|
246 | scatterPlot.SetSize(800, 400);
|
---|
247 | scatterPlot.SetPosition(0, 0);
|
---|
248 | scatterPlot.Title.Text = "Scatter Plot";
|
---|
249 | var thresholds = solution.Model.Thresholds.Where(x => !double.IsInfinity(x)).ToList();
|
---|
250 | chartsWorksheet.Names.AddFormula("XKey", "OFFSET('Estimated Values'!$K$1,1,0, COUNTA('Estimated Values'!$K:$K)-1)");
|
---|
251 |
|
---|
252 | for (int i = 0; i <= thresholds.Count; ++i) {
|
---|
253 | string header = "Class" + i;
|
---|
254 | chartsWorksheet.Names.AddFormula(header, String.Format("OFFSET('Estimated Values'!${0}$1, 1, 0, COUNTA('Estimated Values'!${0}:${0})-1)", columns[i]));
|
---|
255 | var series = scatterPlot.Series.Add(header, "XKey");
|
---|
256 | series.Header = header;
|
---|
257 |
|
---|
258 | if (i < thresholds.Count) {
|
---|
259 | chartsWorksheet.Names.AddFormula("Threshold" + i, String.Format("OFFSET('Estimated Values'!${0}$1, 1, 0, COUNTA('Estimated Values'!${0}:${0})-1)", columns[i + thresholds.Count + 1]));
|
---|
260 | var s = scatterPlot.Series.Add("Threshold" + i, "XKey");
|
---|
261 | s.Header = "Threshold" + i;
|
---|
262 | }
|
---|
263 | }
|
---|
264 | }
|
---|
265 |
|
---|
266 | // this method assumes that the thresholds list is sorted in ascending order
|
---|
267 | private static string GenerateThresholdsFormula(List<double> thresholds) {
|
---|
268 | if (thresholds.Count == 1) {
|
---|
269 | return String.Format("IF(J2 < {0}, 0, 1)", thresholds[0]);
|
---|
270 | }
|
---|
271 | var sb = new StringBuilder();
|
---|
272 | sb.Append(String.Format("IF(J2 < {0}, 0,", thresholds[0]));
|
---|
273 | for (int i = 1; i < thresholds.Count; ++i) {
|
---|
274 | double v = thresholds[i];
|
---|
275 | sb.Append(String.Format("IF(J2 < {0}, {1},", v, i));
|
---|
276 | }
|
---|
277 | for (int i = 1; i < thresholds.Count; ++i)
|
---|
278 | sb.Append(")");
|
---|
279 | sb.Append(")");
|
---|
280 | return sb.ToString();
|
---|
281 | }
|
---|
282 | }
|
---|
283 | }
|
---|