[11523] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
| 3 | using System.IO;
|
---|
| 4 | using System.Linq;
|
---|
| 5 | using System.Text;
|
---|
| 6 | using HeuristicLab.Problems.DataAnalysis.Symbolic.Views;
|
---|
| 7 | using OfficeOpenXml;
|
---|
| 8 |
|
---|
| 9 | namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Classification.Views {
|
---|
| 10 | public class SymbolicDiscriminantFunctionClassificationSolutionExcelExporter : SymbolicSolutionExcelExporter {
|
---|
| 11 | protected override void ExportChart(string fileName, ISymbolicDataAnalysisSolution solution, string formula) {
|
---|
| 12 | ExportChart(fileName, solution as ISymbolicClassificationSolution, formula);
|
---|
| 13 | }
|
---|
| 14 |
|
---|
| 15 | private void ExportChart(string fileName, ISymbolicClassificationSolution solution, string formula) {
|
---|
| 16 | FileInfo newFile = new FileInfo(fileName);
|
---|
| 17 | if (newFile.Exists) {
|
---|
| 18 | newFile.Delete();
|
---|
| 19 | newFile = new FileInfo(fileName);
|
---|
| 20 | }
|
---|
| 21 | var formulaParts = formula.Split(new string[] { Environment.NewLine }, StringSplitOptions.None);
|
---|
| 22 |
|
---|
| 23 | using (ExcelPackage package = new ExcelPackage(newFile)) {
|
---|
| 24 | ExcelWorksheet modelWorksheet = package.Workbook.Worksheets.Add("Model");
|
---|
| 25 | FormatModelSheet(modelWorksheet, solution, formulaParts);
|
---|
| 26 |
|
---|
| 27 | ExcelWorksheet datasetWorksheet = package.Workbook.Worksheets.Add("Dataset");
|
---|
| 28 | WriteDatasetToExcel(datasetWorksheet, solution.ProblemData);
|
---|
| 29 |
|
---|
| 30 | ExcelWorksheet inputsWorksheet = package.Workbook.Worksheets.Add("Inputs");
|
---|
| 31 | WriteInputSheet(inputsWorksheet, datasetWorksheet, formulaParts.Skip(2), solution.ProblemData.Dataset);
|
---|
| 32 |
|
---|
| 33 | ExcelWorksheet estimatedWorksheet = package.Workbook.Worksheets.Add("Estimated Values");
|
---|
| 34 | WriteEstimatedWorksheet(estimatedWorksheet, datasetWorksheet, formulaParts, solution);
|
---|
| 35 |
|
---|
| 36 | ExcelWorksheet chartsWorksheet = package.Workbook.Worksheets.Add("Charts");
|
---|
| 37 | AddCharts(chartsWorksheet);
|
---|
| 38 | package.Workbook.Properties.Title = "Excel Export";
|
---|
| 39 | package.Workbook.Properties.Author = "HEAL";
|
---|
| 40 | package.Workbook.Properties.Comments = "Excel export of a symbolic data analysis solution from HeuristicLab";
|
---|
| 41 |
|
---|
| 42 | package.Save();
|
---|
| 43 | }
|
---|
| 44 | }
|
---|
| 45 |
|
---|
| 46 | protected void WriteEstimatedWorksheet(ExcelWorksheet estimatedWorksheet, ExcelWorksheet datasetWorksheet, string[] formulaParts, ISymbolicDataAnalysisSolution solution) {
|
---|
| 47 | WriteEstimatedWorksheet(estimatedWorksheet, datasetWorksheet, formulaParts, solution as SymbolicDiscriminantFunctionClassificationSolution);
|
---|
| 48 | }
|
---|
| 49 |
|
---|
| 50 | protected override void FormatModelSheet(ExcelWorksheet modelWorksheet, ISymbolicDataAnalysisSolution solution, IEnumerable<string> formulaParts) {
|
---|
| 51 | FormatModelSheet(modelWorksheet, solution as SymbolicDiscriminantFunctionClassificationSolution, formulaParts);
|
---|
| 52 | }
|
---|
| 53 |
|
---|
| 54 | private void FormatModelSheet(ExcelWorksheet modelWorksheet, SymbolicDiscriminantFunctionClassificationSolution solution, IEnumerable<string> formulaParts) {
|
---|
| 55 | int row = 1;
|
---|
| 56 | modelWorksheet.Cells[row, 1].Value = "Model";
|
---|
| 57 | modelWorksheet.Cells[row, 2].Value = solution.Name;
|
---|
| 58 |
|
---|
| 59 | foreach (var part in formulaParts) {
|
---|
| 60 | modelWorksheet.Cells[row, 4].Value = part;
|
---|
| 61 | row++;
|
---|
| 62 | }
|
---|
| 63 |
|
---|
| 64 | row = 2;
|
---|
| 65 | modelWorksheet.Cells[row, 1].Value = "Model Depth";
|
---|
| 66 | modelWorksheet.Cells[row, 2].Value = solution.Model.SymbolicExpressionTree.Depth;
|
---|
| 67 | row++;
|
---|
| 68 |
|
---|
| 69 | modelWorksheet.Cells[row, 1].Value = "Model Length";
|
---|
| 70 | modelWorksheet.Cells[row, 2].Value = solution.Model.SymbolicExpressionTree.Length;
|
---|
| 71 | row += 2;
|
---|
| 72 |
|
---|
| 73 | var thresholds = solution.Model.Thresholds.ToList();
|
---|
| 74 | // skip first (-inf) and last (+inf) thresholds
|
---|
| 75 | for (int i = 0; i < thresholds.Count; ++i) {
|
---|
| 76 | if (double.IsInfinity(thresholds[i]) || double.IsNaN(thresholds[i]))
|
---|
| 77 | continue;
|
---|
| 78 | modelWorksheet.Cells[row, 1].Value = "Threshold " + i;
|
---|
| 79 | modelWorksheet.Cells[row, 2].Value = thresholds[i];
|
---|
| 80 | ++row;
|
---|
| 81 | }
|
---|
| 82 | row++;
|
---|
| 83 |
|
---|
| 84 | modelWorksheet.Cells[row, 1].Value = "Estimation Limits Lower";
|
---|
| 85 | modelWorksheet.Cells[row, 2].Value = Math.Max(solution.Model.LowerEstimationLimit, -9.99999999999999E+307); // minimal value supported by excel
|
---|
| 86 | modelWorksheet.Names.Add("EstimationLimitLower", modelWorksheet.Cells[row, 2]);
|
---|
| 87 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
| 88 | row++;
|
---|
| 89 |
|
---|
| 90 | modelWorksheet.Cells[row, 1].Value = "Estimation Limits Upper";
|
---|
| 91 | modelWorksheet.Cells[row, 2].Value = Math.Min(solution.Model.UpperEstimationLimit, 9.99999999999999E+307); // maximal value supported by excel
|
---|
| 92 | modelWorksheet.Names.Add("EstimationLimitUpper", modelWorksheet.Cells[row, 2]);
|
---|
| 93 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
| 94 | row += 2;
|
---|
| 95 |
|
---|
| 96 | modelWorksheet.Cells[row, 1].Value = "Trainings Partition Start";
|
---|
| 97 | modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TrainingPartition.Start;
|
---|
| 98 | modelWorksheet.Names.Add(TRAININGSTART, modelWorksheet.Cells[row, 2]);
|
---|
| 99 | row++;
|
---|
| 100 |
|
---|
| 101 | modelWorksheet.Cells[row, 1].Value = "Trainings Partition End";
|
---|
| 102 | modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TrainingPartition.End;
|
---|
| 103 | modelWorksheet.Names.Add(TRAININGEND, modelWorksheet.Cells[row, 2]);
|
---|
| 104 | row++;
|
---|
| 105 |
|
---|
| 106 | modelWorksheet.Cells[row, 1].Value = "Test Partition Start";
|
---|
| 107 | modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TestPartition.Start;
|
---|
| 108 | modelWorksheet.Names.Add(TESTSTART, modelWorksheet.Cells[row, 2]);
|
---|
| 109 | row++;
|
---|
| 110 |
|
---|
| 111 | modelWorksheet.Cells[row, 1].Value = "Test Partition End";
|
---|
| 112 | modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TestPartition.End;
|
---|
| 113 | modelWorksheet.Names.Add(TESTEND, modelWorksheet.Cells[row, 2]);
|
---|
| 114 | row += 2;
|
---|
| 115 |
|
---|
| 116 | string excelTrainingTarget = Indirect("B", true);
|
---|
| 117 | string excelTrainingEstimated = Indirect("C", true);
|
---|
| 118 | string excelTrainingClassValues = Indirect("D", true);
|
---|
| 119 | string excelTrainingMeanError = Indirect("F", true);
|
---|
| 120 | string excelTrainingMSE = Indirect("G", true);
|
---|
| 121 |
|
---|
| 122 | string excelTestTarget = Indirect("B", false);
|
---|
| 123 | string excelTestEstimated = Indirect("C", false);
|
---|
| 124 | string excelTestClassValues = Indirect("D", false);
|
---|
| 125 | string excelTestMeanError = Indirect("F", false);
|
---|
| 126 | string excelTestMSE = Indirect("G", false);
|
---|
| 127 |
|
---|
| 128 | modelWorksheet.Cells[row, 1].Value = "Accuracy (training)";
|
---|
| 129 | modelWorksheet.Cells[row, 2].Formula = string.Format("SUMPRODUCT(({0}={1})*1)/COUNT({0})", excelTrainingClassValues, excelTrainingTarget);
|
---|
| 130 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
|
---|
| 131 | row++;
|
---|
| 132 |
|
---|
| 133 | modelWorksheet.Cells[row, 1].Value = "Accuracy (test)";
|
---|
| 134 | modelWorksheet.Cells[row, 2].Formula = string.Format("SUMPRODUCT(({0}={1})*1)/COUNT({0})", excelTestClassValues, excelTestTarget);
|
---|
| 135 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
|
---|
| 136 | row++;
|
---|
| 137 |
|
---|
| 138 | modelWorksheet.Cells[row, 1].Value = "Pearson's R² (training)";
|
---|
| 139 | modelWorksheet.Cells[row, 2].Formula = string.Format("POWER(PEARSON({0},{1}),2)", excelTrainingTarget, excelTrainingEstimated);
|
---|
| 140 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
|
---|
| 141 | row++;
|
---|
| 142 |
|
---|
| 143 | modelWorksheet.Cells[row, 1].Value = "Pearson's R² (test)";
|
---|
| 144 | modelWorksheet.Cells[row, 2].Formula = string.Format("POWER(PEARSON({0},{1}),2)", excelTestTarget, excelTestEstimated);
|
---|
| 145 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
|
---|
| 146 | row++;
|
---|
| 147 |
|
---|
| 148 | modelWorksheet.Cells[row, 1].Value = "Mean Squared Error (training)";
|
---|
| 149 | modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingMSE);
|
---|
| 150 | modelWorksheet.Names.Add("TrainingMSE", modelWorksheet.Cells[row, 2]);
|
---|
| 151 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
| 152 | row++;
|
---|
| 153 |
|
---|
| 154 | modelWorksheet.Cells[row, 1].Value = "Mean Squared Error (test)";
|
---|
| 155 | modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestMSE);
|
---|
| 156 | modelWorksheet.Names.Add("TestMSE", modelWorksheet.Cells[row, 2]);
|
---|
| 157 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
| 158 | row++;
|
---|
| 159 |
|
---|
| 160 | modelWorksheet.Cells[row, 1].Value = "Normalized Gini Coefficient (training)";
|
---|
| 161 | modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingMeanError);
|
---|
| 162 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
| 163 | row++;
|
---|
| 164 |
|
---|
| 165 | modelWorksheet.Cells[row, 1].Value = "Normalized Gini Coefficient (test)";
|
---|
| 166 | modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestMeanError);
|
---|
| 167 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
| 168 | row++;
|
---|
| 169 |
|
---|
| 170 | modelWorksheet.Cells["A1:B" + row].AutoFitColumns();
|
---|
| 171 |
|
---|
| 172 | AddModelTreePicture(modelWorksheet, solution.Model);
|
---|
| 173 | }
|
---|
| 174 |
|
---|
| 175 | private void WriteEstimatedWorksheet(ExcelWorksheet estimatedWorksheet, ExcelWorksheet datasetWorksheet, string[] formulaParts, SymbolicDiscriminantFunctionClassificationSolution solution) {
|
---|
| 176 | string preparedFormula = PrepareFormula(formulaParts);
|
---|
| 177 | int rows = solution.ProblemData.Dataset.Rows;
|
---|
| 178 | estimatedWorksheet.Cells[1, 1].Value = "Id"; // A
|
---|
| 179 | estimatedWorksheet.Cells[1, 2].Value = "Target Variable"; // B
|
---|
| 180 | estimatedWorksheet.Cells[1, 3].Value = "Estimated Values"; // C
|
---|
| 181 | estimatedWorksheet.Cells[1, 4].Value = "Estimated Class Values"; // D
|
---|
| 182 | estimatedWorksheet.Cells[1, 6].Value = "Error"; // E
|
---|
| 183 | estimatedWorksheet.Cells[1, 7].Value = "Squared Error"; // G
|
---|
| 184 | estimatedWorksheet.Cells[1, 9].Value = "Unbounded Estimated Values"; // I
|
---|
| 185 | estimatedWorksheet.Cells[1, 10].Value = "Bounded Estimated Values"; // J
|
---|
| 186 |
|
---|
| 187 | estimatedWorksheet.Cells[1, 1, 1, 10].AutoFitColumns();
|
---|
| 188 |
|
---|
| 189 | var thresholds = solution.Model.Thresholds.Where(x => !double.IsInfinity(x)).ToList();
|
---|
| 190 | var thresholdsFormula = GenerateThresholdsFormula(thresholds);
|
---|
| 191 |
|
---|
| 192 | int targetIndex = solution.ProblemData.Dataset.VariableNames.ToList().FindIndex(x => x.Equals(solution.ProblemData.TargetVariable)) + 1;
|
---|
| 193 | for (int i = 0; i < rows; i++) {
|
---|
| 194 | estimatedWorksheet.Cells[i + 2, 1].Value = i;
|
---|
| 195 | estimatedWorksheet.Cells[i + 2, 2].Formula = datasetWorksheet.Cells[i + 2, targetIndex].FullAddress; // target values
|
---|
| 196 | estimatedWorksheet.Cells[i + 2, 9].Formula = string.Format(preparedFormula, i + 2); // formula (estimated) values
|
---|
| 197 | }
|
---|
| 198 | estimatedWorksheet.Cells["B2:B" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
| 199 |
|
---|
| 200 | estimatedWorksheet.Cells["C2:C" + (rows + 1)].Formula = "J2";
|
---|
| 201 | estimatedWorksheet.Cells["C2:C" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
| 202 |
|
---|
| 203 | estimatedWorksheet.Cells["D2:D" + (rows + 1)].Formula = thresholdsFormula;
|
---|
| 204 | estimatedWorksheet.Cells["D2:D" + (rows + 1)].Style.Numberformat.Format = "0.0";
|
---|
| 205 |
|
---|
| 206 | estimatedWorksheet.Cells["F2:F" + (rows + 1)].Formula = "B2 - D2";
|
---|
| 207 | estimatedWorksheet.Cells["F2:F" + (rows + 1)].Style.Numberformat.Format = "0.0";
|
---|
| 208 |
|
---|
| 209 | estimatedWorksheet.Cells["G2:G" + (rows + 1)].Formula = "POWER(F2, 2)";
|
---|
| 210 | estimatedWorksheet.Cells["G2:G" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
| 211 |
|
---|
| 212 | estimatedWorksheet.Cells["I2:I" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
| 213 | estimatedWorksheet.Cells["J2:J" + (rows + 1)].Formula = "IFERROR(IF(I2 > Model!EstimationLimitUpper, Model!EstimationLimitUpper, IF(I2 < Model!EstimationLimitLower, Model!EstimationLimitLower, I2)), AVERAGE(Model!EstimationLimitLower, Model!EstimationLimitUpper))";
|
---|
| 214 | estimatedWorksheet.Cells["J2:J" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
| 215 | }
|
---|
| 216 |
|
---|
| 217 | // this method assumes that the thresholds list is sorted in ascending order
|
---|
| 218 | private string GenerateThresholdsFormula(List<double> thresholds) {
|
---|
| 219 | if (thresholds.Count == 1) {
|
---|
| 220 | return String.Format("IF(J2 < {0}, 0, 1)", thresholds[0]);
|
---|
| 221 | }
|
---|
| 222 | var sb = new StringBuilder();
|
---|
| 223 | sb.Append(String.Format("IF(J2 < {0}, 0,", thresholds[0]));
|
---|
| 224 | for (int i = 1; i < thresholds.Count; ++i) {
|
---|
| 225 | double v = thresholds[i];
|
---|
| 226 | sb.Append(String.Format("IF(J2 < {0}, {1},", v, i));
|
---|
| 227 | }
|
---|
| 228 | for (int i = 1; i < thresholds.Count; ++i)
|
---|
| 229 | sb.Append(")");
|
---|
| 230 | sb.Append(")");
|
---|
| 231 | return sb.ToString();
|
---|
| 232 | }
|
---|
| 233 | }
|
---|
| 234 | }
|
---|