[11523] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
| 3 | using System.IO;
|
---|
| 4 | using System.Linq;
|
---|
| 5 | using System.Text;
|
---|
| 6 | using HeuristicLab.Problems.DataAnalysis.Symbolic.Views;
|
---|
| 7 | using OfficeOpenXml;
|
---|
[11541] | 8 | using OfficeOpenXml.Drawing.Chart;
|
---|
[11523] | 9 |
|
---|
| 10 | namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Classification.Views {
|
---|
| 11 | public class SymbolicDiscriminantFunctionClassificationSolutionExcelExporter : SymbolicSolutionExcelExporter {
|
---|
[11541] | 12 | public override void Export(IDataAnalysisSolution solution, string fileName) {
|
---|
| 13 | var symbDiscriminantSolution = solution as SymbolicDiscriminantFunctionClassificationSolution;
|
---|
| 14 | if (symbDiscriminantSolution == null) throw new NotSupportedException("This solution cannot be exported to Excel");
|
---|
| 15 | var formatter = new SymbolicDataAnalysisExpressionExcelFormatter();
|
---|
| 16 | var formula = formatter.Format(symbDiscriminantSolution.Model.SymbolicExpressionTree, solution.ProblemData.Dataset);
|
---|
| 17 | ExportChart(fileName, symbDiscriminantSolution, formula);
|
---|
[11523] | 18 | }
|
---|
| 19 |
|
---|
[11541] | 20 | private void ExportChart(string fileName, SymbolicDiscriminantFunctionClassificationSolution solution, string formula) {
|
---|
[11523] | 21 | FileInfo newFile = new FileInfo(fileName);
|
---|
| 22 | if (newFile.Exists) {
|
---|
| 23 | newFile.Delete();
|
---|
| 24 | newFile = new FileInfo(fileName);
|
---|
| 25 | }
|
---|
| 26 | var formulaParts = formula.Split(new string[] { Environment.NewLine }, StringSplitOptions.None);
|
---|
| 27 |
|
---|
| 28 | using (ExcelPackage package = new ExcelPackage(newFile)) {
|
---|
| 29 | ExcelWorksheet modelWorksheet = package.Workbook.Worksheets.Add("Model");
|
---|
| 30 | FormatModelSheet(modelWorksheet, solution, formulaParts);
|
---|
| 31 |
|
---|
| 32 | ExcelWorksheet datasetWorksheet = package.Workbook.Worksheets.Add("Dataset");
|
---|
| 33 | WriteDatasetToExcel(datasetWorksheet, solution.ProblemData);
|
---|
| 34 |
|
---|
| 35 | ExcelWorksheet inputsWorksheet = package.Workbook.Worksheets.Add("Inputs");
|
---|
| 36 | WriteInputSheet(inputsWorksheet, datasetWorksheet, formulaParts.Skip(2), solution.ProblemData.Dataset);
|
---|
| 37 |
|
---|
| 38 | ExcelWorksheet estimatedWorksheet = package.Workbook.Worksheets.Add("Estimated Values");
|
---|
| 39 | WriteEstimatedWorksheet(estimatedWorksheet, datasetWorksheet, formulaParts, solution);
|
---|
| 40 |
|
---|
| 41 | ExcelWorksheet chartsWorksheet = package.Workbook.Worksheets.Add("Charts");
|
---|
[11541] | 42 | AddCharts(chartsWorksheet, solution);
|
---|
[11523] | 43 | package.Workbook.Properties.Title = "Excel Export";
|
---|
| 44 | package.Workbook.Properties.Author = "HEAL";
|
---|
| 45 | package.Workbook.Properties.Comments = "Excel export of a symbolic data analysis solution from HeuristicLab";
|
---|
| 46 |
|
---|
| 47 | package.Save();
|
---|
| 48 | }
|
---|
| 49 | }
|
---|
| 50 |
|
---|
| 51 | private void FormatModelSheet(ExcelWorksheet modelWorksheet, SymbolicDiscriminantFunctionClassificationSolution solution, IEnumerable<string> formulaParts) {
|
---|
| 52 | int row = 1;
|
---|
| 53 | modelWorksheet.Cells[row, 1].Value = "Model";
|
---|
| 54 | modelWorksheet.Cells[row, 2].Value = solution.Name;
|
---|
| 55 |
|
---|
| 56 | foreach (var part in formulaParts) {
|
---|
| 57 | modelWorksheet.Cells[row, 4].Value = part;
|
---|
| 58 | row++;
|
---|
| 59 | }
|
---|
| 60 |
|
---|
| 61 | row = 2;
|
---|
| 62 | modelWorksheet.Cells[row, 1].Value = "Model Depth";
|
---|
| 63 | modelWorksheet.Cells[row, 2].Value = solution.Model.SymbolicExpressionTree.Depth;
|
---|
| 64 | row++;
|
---|
| 65 |
|
---|
| 66 | modelWorksheet.Cells[row, 1].Value = "Model Length";
|
---|
| 67 | modelWorksheet.Cells[row, 2].Value = solution.Model.SymbolicExpressionTree.Length;
|
---|
| 68 | row += 2;
|
---|
| 69 |
|
---|
| 70 | var thresholds = solution.Model.Thresholds.ToList();
|
---|
| 71 | // skip first (-inf) and last (+inf) thresholds
|
---|
| 72 | for (int i = 0; i < thresholds.Count; ++i) {
|
---|
| 73 | if (double.IsInfinity(thresholds[i]) || double.IsNaN(thresholds[i]))
|
---|
| 74 | continue;
|
---|
| 75 | modelWorksheet.Cells[row, 1].Value = "Threshold " + i;
|
---|
| 76 | modelWorksheet.Cells[row, 2].Value = thresholds[i];
|
---|
| 77 | ++row;
|
---|
| 78 | }
|
---|
| 79 | row++;
|
---|
| 80 |
|
---|
| 81 | modelWorksheet.Cells[row, 1].Value = "Estimation Limits Lower";
|
---|
| 82 | modelWorksheet.Cells[row, 2].Value = Math.Max(solution.Model.LowerEstimationLimit, -9.99999999999999E+307); // minimal value supported by excel
|
---|
| 83 | modelWorksheet.Names.Add("EstimationLimitLower", modelWorksheet.Cells[row, 2]);
|
---|
| 84 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
| 85 | row++;
|
---|
| 86 |
|
---|
| 87 | modelWorksheet.Cells[row, 1].Value = "Estimation Limits Upper";
|
---|
| 88 | modelWorksheet.Cells[row, 2].Value = Math.Min(solution.Model.UpperEstimationLimit, 9.99999999999999E+307); // maximal value supported by excel
|
---|
| 89 | modelWorksheet.Names.Add("EstimationLimitUpper", modelWorksheet.Cells[row, 2]);
|
---|
| 90 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
| 91 | row += 2;
|
---|
| 92 |
|
---|
| 93 | modelWorksheet.Cells[row, 1].Value = "Trainings Partition Start";
|
---|
| 94 | modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TrainingPartition.Start;
|
---|
| 95 | modelWorksheet.Names.Add(TRAININGSTART, modelWorksheet.Cells[row, 2]);
|
---|
| 96 | row++;
|
---|
| 97 |
|
---|
| 98 | modelWorksheet.Cells[row, 1].Value = "Trainings Partition End";
|
---|
| 99 | modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TrainingPartition.End;
|
---|
| 100 | modelWorksheet.Names.Add(TRAININGEND, modelWorksheet.Cells[row, 2]);
|
---|
| 101 | row++;
|
---|
| 102 |
|
---|
| 103 | modelWorksheet.Cells[row, 1].Value = "Test Partition Start";
|
---|
| 104 | modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TestPartition.Start;
|
---|
| 105 | modelWorksheet.Names.Add(TESTSTART, modelWorksheet.Cells[row, 2]);
|
---|
| 106 | row++;
|
---|
| 107 |
|
---|
| 108 | modelWorksheet.Cells[row, 1].Value = "Test Partition End";
|
---|
| 109 | modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TestPartition.End;
|
---|
| 110 | modelWorksheet.Names.Add(TESTEND, modelWorksheet.Cells[row, 2]);
|
---|
| 111 | row += 2;
|
---|
| 112 |
|
---|
| 113 | string excelTrainingTarget = Indirect("B", true);
|
---|
| 114 | string excelTrainingEstimated = Indirect("C", true);
|
---|
| 115 | string excelTrainingClassValues = Indirect("D", true);
|
---|
| 116 | string excelTrainingMeanError = Indirect("F", true);
|
---|
| 117 | string excelTrainingMSE = Indirect("G", true);
|
---|
| 118 |
|
---|
| 119 | string excelTestTarget = Indirect("B", false);
|
---|
| 120 | string excelTestEstimated = Indirect("C", false);
|
---|
| 121 | string excelTestClassValues = Indirect("D", false);
|
---|
| 122 | string excelTestMeanError = Indirect("F", false);
|
---|
| 123 | string excelTestMSE = Indirect("G", false);
|
---|
| 124 |
|
---|
| 125 | modelWorksheet.Cells[row, 1].Value = "Accuracy (training)";
|
---|
| 126 | modelWorksheet.Cells[row, 2].Formula = string.Format("SUMPRODUCT(({0}={1})*1)/COUNT({0})", excelTrainingClassValues, excelTrainingTarget);
|
---|
| 127 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
|
---|
| 128 | row++;
|
---|
| 129 |
|
---|
| 130 | modelWorksheet.Cells[row, 1].Value = "Accuracy (test)";
|
---|
| 131 | modelWorksheet.Cells[row, 2].Formula = string.Format("SUMPRODUCT(({0}={1})*1)/COUNT({0})", excelTestClassValues, excelTestTarget);
|
---|
| 132 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
|
---|
| 133 | row++;
|
---|
| 134 |
|
---|
| 135 | modelWorksheet.Cells[row, 1].Value = "Pearson's R² (training)";
|
---|
| 136 | modelWorksheet.Cells[row, 2].Formula = string.Format("POWER(PEARSON({0},{1}),2)", excelTrainingTarget, excelTrainingEstimated);
|
---|
| 137 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
|
---|
| 138 | row++;
|
---|
| 139 |
|
---|
| 140 | modelWorksheet.Cells[row, 1].Value = "Pearson's R² (test)";
|
---|
| 141 | modelWorksheet.Cells[row, 2].Formula = string.Format("POWER(PEARSON({0},{1}),2)", excelTestTarget, excelTestEstimated);
|
---|
| 142 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000";
|
---|
| 143 | row++;
|
---|
| 144 |
|
---|
| 145 | modelWorksheet.Cells[row, 1].Value = "Mean Squared Error (training)";
|
---|
| 146 | modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingMSE);
|
---|
| 147 | modelWorksheet.Names.Add("TrainingMSE", modelWorksheet.Cells[row, 2]);
|
---|
| 148 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
| 149 | row++;
|
---|
| 150 |
|
---|
| 151 | modelWorksheet.Cells[row, 1].Value = "Mean Squared Error (test)";
|
---|
| 152 | modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestMSE);
|
---|
| 153 | modelWorksheet.Names.Add("TestMSE", modelWorksheet.Cells[row, 2]);
|
---|
| 154 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
| 155 | row++;
|
---|
| 156 |
|
---|
| 157 | modelWorksheet.Cells[row, 1].Value = "Normalized Gini Coefficient (training)";
|
---|
| 158 | modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingMeanError);
|
---|
| 159 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
| 160 | row++;
|
---|
| 161 |
|
---|
| 162 | modelWorksheet.Cells[row, 1].Value = "Normalized Gini Coefficient (test)";
|
---|
| 163 | modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestMeanError);
|
---|
| 164 | modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00";
|
---|
| 165 | row++;
|
---|
| 166 |
|
---|
| 167 | modelWorksheet.Cells["A1:B" + row].AutoFitColumns();
|
---|
| 168 |
|
---|
| 169 | AddModelTreePicture(modelWorksheet, solution.Model);
|
---|
| 170 | }
|
---|
| 171 |
|
---|
| 172 | private void WriteEstimatedWorksheet(ExcelWorksheet estimatedWorksheet, ExcelWorksheet datasetWorksheet, string[] formulaParts, SymbolicDiscriminantFunctionClassificationSolution solution) {
|
---|
| 173 | string preparedFormula = PrepareFormula(formulaParts);
|
---|
| 174 | int rows = solution.ProblemData.Dataset.Rows;
|
---|
| 175 | estimatedWorksheet.Cells[1, 1].Value = "Id"; // A
|
---|
| 176 | estimatedWorksheet.Cells[1, 2].Value = "Target Variable"; // B
|
---|
| 177 | estimatedWorksheet.Cells[1, 3].Value = "Estimated Values"; // C
|
---|
| 178 | estimatedWorksheet.Cells[1, 4].Value = "Estimated Class Values"; // D
|
---|
[11541] | 179 | estimatedWorksheet.Cells[1, 6].Value = "Error"; // F
|
---|
[11523] | 180 | estimatedWorksheet.Cells[1, 7].Value = "Squared Error"; // G
|
---|
| 181 | estimatedWorksheet.Cells[1, 9].Value = "Unbounded Estimated Values"; // I
|
---|
| 182 | estimatedWorksheet.Cells[1, 10].Value = "Bounded Estimated Values"; // J
|
---|
[11541] | 183 | estimatedWorksheet.Cells[1, 11].Value = "Random Key"; // K
|
---|
[11523] | 184 |
|
---|
| 185 | var thresholds = solution.Model.Thresholds.Where(x => !double.IsInfinity(x)).ToList();
|
---|
| 186 | var thresholdsFormula = GenerateThresholdsFormula(thresholds);
|
---|
| 187 |
|
---|
[11541] | 188 | const int columnIndex = 13; // index of beginning columns for class values
|
---|
| 189 | for (int i = 0; i <= thresholds.Count; ++i) {
|
---|
| 190 | estimatedWorksheet.Cells[1, i + columnIndex].Value = "Class " + i;
|
---|
| 191 | if (i < thresholds.Count)
|
---|
| 192 | estimatedWorksheet.Cells[1, i + columnIndex + thresholds.Count + 1].Value = "Threshold " + i;
|
---|
| 193 | }
|
---|
| 194 | estimatedWorksheet.Cells[1, 1, 1, 10].AutoFitColumns();
|
---|
| 195 |
|
---|
[11523] | 196 | int targetIndex = solution.ProblemData.Dataset.VariableNames.ToList().FindIndex(x => x.Equals(solution.ProblemData.TargetVariable)) + 1;
|
---|
| 197 | for (int i = 0; i < rows; i++) {
|
---|
| 198 | estimatedWorksheet.Cells[i + 2, 1].Value = i;
|
---|
| 199 | estimatedWorksheet.Cells[i + 2, 2].Formula = datasetWorksheet.Cells[i + 2, targetIndex].FullAddress; // target values
|
---|
| 200 | estimatedWorksheet.Cells[i + 2, 9].Formula = string.Format(preparedFormula, i + 2); // formula (estimated) values
|
---|
[11541] | 201 |
|
---|
| 202 | string condition = string.Empty;
|
---|
| 203 | string rowRef = "C" + (i + 2);
|
---|
| 204 |
|
---|
| 205 | int nClasses = thresholds.Count + 1;
|
---|
| 206 | for (int j = columnIndex; j < columnIndex + nClasses; ++j) {
|
---|
| 207 | int idx = j - columnIndex + 5; // row index for the threshold values
|
---|
| 208 | if (j == columnIndex) {
|
---|
| 209 | condition = rowRef + " < Model!$B$" + idx;
|
---|
| 210 | } else if (j > columnIndex && j < columnIndex + thresholds.Count) {
|
---|
| 211 | condition = "AND(" + rowRef + "> Model!$B$" + (idx - 1) + ", " + rowRef + " < Model!$B$" + idx + ")";
|
---|
| 212 | } else if (j == columnIndex + thresholds.Count) {
|
---|
| 213 | condition = rowRef + " > Model!$B$" + (idx - 1);
|
---|
| 214 | }
|
---|
| 215 | estimatedWorksheet.Cells[i + 2, j].Formula = "IF(" + condition + ", " + rowRef + ", #N/A)";
|
---|
| 216 |
|
---|
| 217 | if (j < columnIndex + thresholds.Count)
|
---|
| 218 | estimatedWorksheet.Cells[i + 2, j + nClasses].Formula = "Model!$B$" + idx;
|
---|
| 219 | }
|
---|
[11523] | 220 | }
|
---|
| 221 | estimatedWorksheet.Cells["B2:B" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
| 222 |
|
---|
| 223 | estimatedWorksheet.Cells["C2:C" + (rows + 1)].Formula = "J2";
|
---|
| 224 | estimatedWorksheet.Cells["C2:C" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
| 225 |
|
---|
| 226 | estimatedWorksheet.Cells["D2:D" + (rows + 1)].Formula = thresholdsFormula;
|
---|
| 227 | estimatedWorksheet.Cells["D2:D" + (rows + 1)].Style.Numberformat.Format = "0.0";
|
---|
| 228 |
|
---|
| 229 | estimatedWorksheet.Cells["F2:F" + (rows + 1)].Formula = "B2 - D2";
|
---|
| 230 | estimatedWorksheet.Cells["F2:F" + (rows + 1)].Style.Numberformat.Format = "0.0";
|
---|
| 231 |
|
---|
| 232 | estimatedWorksheet.Cells["G2:G" + (rows + 1)].Formula = "POWER(F2, 2)";
|
---|
| 233 | estimatedWorksheet.Cells["G2:G" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
| 234 |
|
---|
| 235 | estimatedWorksheet.Cells["I2:I" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
| 236 | estimatedWorksheet.Cells["J2:J" + (rows + 1)].Formula = "IFERROR(IF(I2 > Model!EstimationLimitUpper, Model!EstimationLimitUpper, IF(I2 < Model!EstimationLimitLower, Model!EstimationLimitLower, I2)), AVERAGE(Model!EstimationLimitLower, Model!EstimationLimitUpper))";
|
---|
| 237 | estimatedWorksheet.Cells["J2:J" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
[11541] | 238 |
|
---|
| 239 | estimatedWorksheet.Cells["K2:K" + (rows + 1)].Formula = "RAND()";
|
---|
| 240 | estimatedWorksheet.Cells["K2:K" + (rows + 1)].Style.Numberformat.Format = "0.000";
|
---|
[11523] | 241 | }
|
---|
| 242 |
|
---|
[11541] | 243 | private void AddCharts(ExcelWorksheet chartsWorksheet, DiscriminantFunctionClassificationSolution solution) {
|
---|
| 244 | List<char> columns = new List<char> { 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' };
|
---|
| 245 | var scatterPlot = chartsWorksheet.Drawings.AddChart("scatterPlot", eChartType.XYScatter);
|
---|
| 246 | scatterPlot.SetSize(800, 400);
|
---|
| 247 | scatterPlot.SetPosition(0, 0);
|
---|
| 248 | scatterPlot.Title.Text = "Scatter Plot";
|
---|
| 249 | var thresholds = solution.Model.Thresholds.Where(x => !double.IsInfinity(x)).ToList();
|
---|
| 250 | chartsWorksheet.Names.AddFormula("XKey", "OFFSET('Estimated Values'!$K$1,1,0, COUNTA('Estimated Values'!$K:$K)-1)");
|
---|
| 251 |
|
---|
| 252 | for (int i = 0; i <= thresholds.Count; ++i) {
|
---|
| 253 | string header = "Class" + i;
|
---|
| 254 | chartsWorksheet.Names.AddFormula(header, String.Format("OFFSET('Estimated Values'!${0}$1, 1, 0, COUNTA('Estimated Values'!${0}:${0})-1)", columns[i]));
|
---|
| 255 | var series = scatterPlot.Series.Add(header, "XKey");
|
---|
| 256 | series.Header = header;
|
---|
| 257 |
|
---|
| 258 | if (i < thresholds.Count) {
|
---|
| 259 | chartsWorksheet.Names.AddFormula("Threshold" + i, String.Format("OFFSET('Estimated Values'!${0}$1, 1, 0, COUNTA('Estimated Values'!${0}:${0})-1)", columns[i + thresholds.Count + 1]));
|
---|
| 260 | var s = scatterPlot.Series.Add("Threshold" + i, "XKey");
|
---|
| 261 | s.Header = "Threshold" + i;
|
---|
| 262 | }
|
---|
| 263 | }
|
---|
| 264 | }
|
---|
| 265 |
|
---|
[11523] | 266 | // this method assumes that the thresholds list is sorted in ascending order
|
---|
[11541] | 267 | private static string GenerateThresholdsFormula(List<double> thresholds) {
|
---|
[11523] | 268 | if (thresholds.Count == 1) {
|
---|
| 269 | return String.Format("IF(J2 < {0}, 0, 1)", thresholds[0]);
|
---|
| 270 | }
|
---|
| 271 | var sb = new StringBuilder();
|
---|
| 272 | sb.Append(String.Format("IF(J2 < {0}, 0,", thresholds[0]));
|
---|
| 273 | for (int i = 1; i < thresholds.Count; ++i) {
|
---|
| 274 | double v = thresholds[i];
|
---|
| 275 | sb.Append(String.Format("IF(J2 < {0}, {1},", v, i));
|
---|
| 276 | }
|
---|
| 277 | for (int i = 1; i < thresholds.Count; ++i)
|
---|
| 278 | sb.Append(")");
|
---|
| 279 | sb.Append(")");
|
---|
| 280 | return sb.ToString();
|
---|
| 281 | }
|
---|
| 282 | }
|
---|
| 283 | }
|
---|