#region License Information /* HeuristicLab * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.IO; using System.Linq; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Views; using OfficeOpenXml; using OfficeOpenXml.Drawing.Chart; namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Views { public class SymbolicSolutionExcelExporter : IDataAnalysisSolutionExporter { private const string TRAININGSTART = "TrainingStart"; private const string TRAININGEND = "TrainingEnd"; private const string TESTSTART = "TestStart"; private const string TESTEND = "TestEnd"; public string FileTypeFilter { get { return "Excel 2007 file (*.xlsx)|*.xlsx"; } } public bool Supports(IDataAnalysisSolution solution) { return solution is ISymbolicDataAnalysisSolution && solution is IRegressionSolution; } public void Export(IDataAnalysisSolution solution, string fileName) { var symbSolution = solution as ISymbolicDataAnalysisSolution; if (symbSolution == null) throw new NotSupportedException("This solution cannot be exported to Excel"); var formatter = new SymbolicDataAnalysisExpressionExcelFormatter(); var formula = formatter.Format(symbSolution.Model.SymbolicExpressionTree, solution.ProblemData.Dataset); ExportChart(fileName, symbSolution, formula); } private void ExportChart(string fileName, ISymbolicDataAnalysisSolution solution, string formula) { FileInfo newFile = new FileInfo(fileName); if (newFile.Exists) { newFile.Delete(); newFile = new FileInfo(fileName); } var formulaParts = formula.Split(new string[] { Environment.NewLine }, StringSplitOptions.None); using (ExcelPackage package = new ExcelPackage(newFile)) { ExcelWorksheet modelWorksheet = package.Workbook.Worksheets.Add("Model"); FormatModelSheet(modelWorksheet, solution, formulaParts); ExcelWorksheet datasetWorksheet = package.Workbook.Worksheets.Add("Dataset"); WriteDatasetToExcel(datasetWorksheet, solution.ProblemData); ExcelWorksheet inputsWorksheet = package.Workbook.Worksheets.Add("Inputs"); WriteInputSheet(inputsWorksheet, datasetWorksheet, formulaParts.Skip(2), solution.ProblemData.Dataset); if (solution is IRegressionSolution) { ExcelWorksheet estimatedWorksheet = package.Workbook.Worksheets.Add("Estimated Values"); WriteEstimatedWorksheet(estimatedWorksheet, datasetWorksheet, formulaParts, solution as IRegressionSolution); ExcelWorksheet chartsWorksheet = package.Workbook.Worksheets.Add("Charts"); AddCharts(chartsWorksheet); } package.Workbook.Properties.Title = "Excel Export"; package.Workbook.Properties.Author = "HEAL"; package.Workbook.Properties.Comments = "Excel export of a symbolic data analysis solution from HeuristicLab"; package.Save(); } } private void FormatModelSheet(ExcelWorksheet modelWorksheet, ISymbolicDataAnalysisSolution solution, IEnumerable formulaParts) { int row = 1; modelWorksheet.Cells[row, 1].Value = "Model"; modelWorksheet.Cells[row, 2].Value = solution.Name; foreach (var part in formulaParts) { modelWorksheet.Cells[row, 4].Value = part; row++; } row = 2; modelWorksheet.Cells[row, 1].Value = "Model Depth"; modelWorksheet.Cells[row, 2].Value = solution.Model.SymbolicExpressionTree.Depth; row++; modelWorksheet.Cells[row, 1].Value = "Model Length"; modelWorksheet.Cells[row, 2].Value = solution.Model.SymbolicExpressionTree.Length; row += 2; modelWorksheet.Cells[row, 1].Value = "Estimation Limits Lower"; modelWorksheet.Cells[row, 2].Value = Math.Max(solution.Model.LowerEstimationLimit, -9.99999999999999E+307); // minimal value supported by excel modelWorksheet.Names.Add("EstimationLimitLower", modelWorksheet.Cells[row, 2]); modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00"; row++; modelWorksheet.Cells[row, 1].Value = "Estimation Limits Upper"; modelWorksheet.Cells[row, 2].Value = Math.Min(solution.Model.UpperEstimationLimit, 9.99999999999999E+307); // maximal value supported by excel modelWorksheet.Names.Add("EstimationLimitUpper", modelWorksheet.Cells[row, 2]); modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00"; row += 2; modelWorksheet.Cells[row, 1].Value = "Trainings Partition Start"; modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TrainingPartition.Start; modelWorksheet.Names.Add(TRAININGSTART, modelWorksheet.Cells[row, 2]); row++; modelWorksheet.Cells[row, 1].Value = "Trainings Partition End"; modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TrainingPartition.End; modelWorksheet.Names.Add(TRAININGEND, modelWorksheet.Cells[row, 2]); row++; modelWorksheet.Cells[row, 1].Value = "Test Partition Start"; modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TestPartition.Start; modelWorksheet.Names.Add(TESTSTART, modelWorksheet.Cells[row, 2]); row++; modelWorksheet.Cells[row, 1].Value = "Test Partition End"; modelWorksheet.Cells[row, 2].Value = solution.ProblemData.TestPartition.End; modelWorksheet.Names.Add(TESTEND, modelWorksheet.Cells[row, 2]); row += 2; string excelTrainingTarget = Indirect("B", true); string excelTrainingEstimated = Indirect("C", true); string excelTrainingAbsoluteError = Indirect("D", true); string excelTrainingRelativeError = Indirect("E", true); string excelTrainingMeanError = Indirect("F", true); string excelTrainingMSE = Indirect("G", true); string excelTestTarget = Indirect("B", false); string excelTestEstimated = Indirect("C", false); string excelTestAbsoluteError = Indirect("D", false); string excelTestRelativeError = Indirect("E", false); string excelTestMeanError = Indirect("F", false); string excelTestMSE = Indirect("G", false); modelWorksheet.Cells[row, 1].Value = "Pearson's R² (training)"; modelWorksheet.Cells[row, 2].Formula = string.Format("POWER(PEARSON({0},{1}),2)", excelTrainingTarget, excelTrainingEstimated); modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000"; row++; modelWorksheet.Cells[row, 1].Value = "Pearson's R² (test)"; modelWorksheet.Cells[row, 2].Formula = string.Format("POWER(PEARSON({0},{1}),2)", excelTestTarget, excelTestEstimated); modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000"; row++; modelWorksheet.Cells[row, 1].Value = "Mean Squared Error (training)"; modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingMSE); modelWorksheet.Names.Add("TrainingMSE", modelWorksheet.Cells[row, 2]); modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00"; row++; modelWorksheet.Cells[row, 1].Value = "Mean Squared Error (test)"; modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestMSE); modelWorksheet.Names.Add("TestMSE", modelWorksheet.Cells[row, 2]); modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00"; row++; modelWorksheet.Cells[row, 1].Value = "Mean absolute error (training)"; modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingAbsoluteError); modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00"; row++; modelWorksheet.Cells[row, 1].Value = "Mean absolute error (test)"; modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestAbsoluteError); modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00"; row++; modelWorksheet.Cells[row, 1].Value = "Mean error (training)"; modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingMeanError); modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00"; row++; modelWorksheet.Cells[row, 1].Value = "Mean error (test)"; modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestMeanError); modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00"; row++; modelWorksheet.Cells[row, 1].Value = "Average relative error (training)"; modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTrainingRelativeError); modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.00%"; row++; modelWorksheet.Cells[row, 1].Value = "Average relative error (test)"; modelWorksheet.Cells[row, 2].Formula = string.Format("AVERAGE({0})", excelTestRelativeError); modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.00%"; row++; modelWorksheet.Cells[row, 1].Value = "Normalized Mean Squared error (training)"; modelWorksheet.Cells[row, 2].Formula = string.Format("TrainingMSE / VAR({0})", excelTrainingTarget); modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00"; row++; modelWorksheet.Cells[row, 1].Value = "Normalized Mean Squared error (test)"; modelWorksheet.Cells[row, 2].Formula = string.Format("TestMSE / VAR({0})", excelTestTarget); modelWorksheet.Cells[row, 2].Style.Numberformat.Format = "0.000E+00"; modelWorksheet.Cells["A1:B" + row].AutoFitColumns(); AddModelTreePicture(modelWorksheet, solution.Model); } private string Indirect(string column, bool training) { if (training) { return string.Format("INDIRECT(\"'Estimated Values'!{0}\"&{1}+2&\":{0}\"&{2}+1)", column, TRAININGSTART, TRAININGEND); } else { return string.Format("INDIRECT(\"'Estimated Values'!{0}\"&{1}+2&\":{0}\"&{2}+1)", column, TESTSTART, TESTEND); } } private void AddCharts(ExcelWorksheet chartsWorksheet) { chartsWorksheet.Names.AddFormula("AllId", "OFFSET('Estimated Values'!$A$1,1,0, COUNTA('Estimated Values'!$A:$A)-1)"); chartsWorksheet.Names.AddFormula("AllTarget", "OFFSET('Estimated Values'!$B$1,1,0, COUNTA('Estimated Values'!$B:$B)-1)"); chartsWorksheet.Names.AddFormula("AllEstimated", "OFFSET('Estimated Values'!$C$1,1,0, COUNTA('Estimated Values'!$C:$C)-1)"); chartsWorksheet.Names.AddFormula("TrainingId", "OFFSET('Estimated Values'!$A$1,Model!TrainingStart + 1,0, Model!TrainingEnd - Model!TrainingStart)"); chartsWorksheet.Names.AddFormula("TrainingTarget", "OFFSET('Estimated Values'!$B$1,Model!TrainingStart + 1,0, Model!TrainingEnd - Model!TrainingStart)"); chartsWorksheet.Names.AddFormula("TrainingEstimated", "OFFSET('Estimated Values'!$C$1,Model!TrainingStart + 1,0, Model!TrainingEnd - Model!TrainingStart)"); chartsWorksheet.Names.AddFormula("TestId", "OFFSET('Estimated Values'!$A$1,Model!TestStart + 1,0, Model!TestEnd - Model!TestStart)"); chartsWorksheet.Names.AddFormula("TestTarget", "OFFSET('Estimated Values'!$B$1,Model!TestStart + 1,0, Model!TestEnd - Model!TestStart)"); chartsWorksheet.Names.AddFormula("TestEstimated", "OFFSET('Estimated Values'!$C$1,Model!TestStart + 1,0, Model!TestEnd - Model!TestStart)"); var scatterPlot = chartsWorksheet.Drawings.AddChart("scatterPlot", eChartType.XYScatter); scatterPlot.SetSize(800, 400); scatterPlot.SetPosition(0, 0); scatterPlot.Title.Text = "Scatter Plot"; var seriesAll = scatterPlot.Series.Add("AllTarget", "AllEstimated"); seriesAll.Header = "All"; var seriesTraining = scatterPlot.Series.Add("TrainingTarget", "TrainingEstimated"); seriesTraining.Header = "Training"; var seriesTest = scatterPlot.Series.Add("TestTarget", "TestEstimated"); seriesTest.Header = "Test"; var lineChart = chartsWorksheet.Drawings.AddChart("lineChart", eChartType.XYScatterLinesNoMarkers); lineChart.SetSize(800, 400); lineChart.SetPosition(400, 0); lineChart.Title.Text = "LineChart"; var lineTarget = lineChart.Series.Add("AllTarget", "AllId"); lineTarget.Header = "Target"; var lineAll = lineChart.Series.Add("AllEstimated", "AllId"); lineAll.Header = "All"; var lineTraining = lineChart.Series.Add("TrainingEstimated", "TrainingId"); lineTraining.Header = "Training"; var lineTest = lineChart.Series.Add("TestEstimated", "TestId"); lineTest.Header = "Test"; } private void AddModelTreePicture(ExcelWorksheet modelWorksheet, ISymbolicDataAnalysisModel model) { SymbolicExpressionTreeChart modelTreePicture = new SymbolicExpressionTreeChart(); modelTreePicture.Tree = model.SymbolicExpressionTree; string tmpFilename = Path.GetTempFileName(); modelTreePicture.Width = 1000; modelTreePicture.Height = 500; modelTreePicture.SaveImageAsEmf(tmpFilename); FileInfo fi = new FileInfo(tmpFilename); var excelModelTreePic = modelWorksheet.Drawings.AddPicture("ModelTree", fi); excelModelTreePic.SetSize(50); excelModelTreePic.SetPosition(2, 0, 6, 0); } private void WriteEstimatedWorksheet(ExcelWorksheet estimatedWorksheet, ExcelWorksheet datasetWorksheet, string[] formulaParts, IRegressionSolution solution) { string preparedFormula = PrepareFormula(formulaParts); int rows = solution.ProblemData.Dataset.Rows; estimatedWorksheet.Cells[1, 1].Value = "Id"; estimatedWorksheet.Cells[1, 2].Value = "Target Variable"; estimatedWorksheet.Cells[1, 3].Value = "Estimated Values"; estimatedWorksheet.Cells[1, 4].Value = "Absolute Error"; estimatedWorksheet.Cells[1, 5].Value = "Relative Error"; estimatedWorksheet.Cells[1, 6].Value = "Error"; estimatedWorksheet.Cells[1, 7].Value = "Squared Error"; estimatedWorksheet.Cells[1, 9].Value = "Unbounded Estimated Values"; estimatedWorksheet.Cells[1, 10].Value = "Bounded Estimated Values"; estimatedWorksheet.Cells[1, 1, 1, 10].AutoFitColumns(); int targetIndex = solution.ProblemData.Dataset.VariableNames.ToList().FindIndex(x => x.Equals(solution.ProblemData.TargetVariable)) + 1; for (int i = 0; i < rows; i++) { estimatedWorksheet.Cells[i + 2, 1].Value = i; estimatedWorksheet.Cells[i + 2, 2].Formula = datasetWorksheet.Cells[i + 2, targetIndex].FullAddress; estimatedWorksheet.Cells[i + 2, 9].Formula = string.Format(preparedFormula, i + 2); } estimatedWorksheet.Cells["B2:B" + (rows + 1)].Style.Numberformat.Format = "0.000"; estimatedWorksheet.Cells["C2:C" + (rows + 1)].Formula = "J2"; estimatedWorksheet.Cells["C2:C" + (rows + 1)].Style.Numberformat.Format = "0.000"; estimatedWorksheet.Cells["D2:D" + (rows + 1)].Formula = "ABS(B2 - C2)"; estimatedWorksheet.Cells["D2:D" + (rows + 1)].Style.Numberformat.Format = "0.000"; estimatedWorksheet.Cells["E2:E" + (rows + 1)].Formula = "ABS(D2 / B2)"; estimatedWorksheet.Cells["E2:E" + (rows + 1)].Style.Numberformat.Format = "0.000"; estimatedWorksheet.Cells["F2:F" + (rows + 1)].Formula = "C2 - B2"; estimatedWorksheet.Cells["F2:F" + (rows + 1)].Style.Numberformat.Format = "0.000"; estimatedWorksheet.Cells["G2:G" + (rows + 1)].Formula = "POWER(F2, 2)"; estimatedWorksheet.Cells["G2:G" + (rows + 1)].Style.Numberformat.Format = "0.000"; estimatedWorksheet.Cells["I2:I" + (rows + 1)].Style.Numberformat.Format = "0.000"; estimatedWorksheet.Cells["J2:J" + (rows + 1)].Formula = "IFERROR(IF(I2 > Model!EstimationLimitUpper, Model!EstimationLimitUpper, IF(I2 < Model!EstimationLimitLower, Model!EstimationLimitLower, I2)), AVERAGE(Model!EstimationLimitLower, Model!EstimationLimitUpper))"; estimatedWorksheet.Cells["J2:J" + (rows + 1)].Style.Numberformat.Format = "0.000"; } private string PrepareFormula(string[] formulaParts) { string preparedFormula = formulaParts[0]; foreach (var part in formulaParts.Skip(2)) { var varMap = part.Split(new string[] { " = " }, StringSplitOptions.None); var columnName = "$" + varMap[1] + "1"; preparedFormula = preparedFormula.Replace(columnName, "Inputs!$" + varMap[1] + "{0}"); //{0} will be replaced later with the row number } return preparedFormula; } private void WriteInputSheet(ExcelWorksheet inputsWorksheet, ExcelWorksheet datasetWorksheet, IEnumerable list, Dataset dataset) { //remark the performance of EPPlus drops dramatically //if the data is not written row wise (from left to right) due the internal indices used. var variableNames = dataset.VariableNames.Select((v, i) => new { variable = v, index = i + 1 }).ToDictionary(v => v.variable, v => v.index); var nameMapping = list.Select(x => x.Split('=')[0].Trim()).ToArray(); for (int row = 1; row <= dataset.Rows + 1; row++) { for (int column = 1; column < nameMapping.Length + 1; column++) { int variableIndex = variableNames[nameMapping[column - 1]]; inputsWorksheet.Cells[row, column].Formula = datasetWorksheet.Cells[row, variableIndex].FullAddress; } } } private void WriteDatasetToExcel(ExcelWorksheet datasetWorksheet, IDataAnalysisProblemData problemData) { //remark the performance of EPPlus drops dramatically //if the data is not written row wise (from left to right) due the internal indices used. Dataset dataset = problemData.Dataset; var variableNames = dataset.VariableNames.ToList(); var doubleVariables = new HashSet(dataset.DoubleVariables); for (int col = 1; col <= variableNames.Count; col++) datasetWorksheet.Cells[1, col].Value = variableNames[col - 1]; for (int row = 0; row < dataset.Rows; row++) { for (int col = 0; col < variableNames.Count; col++) { if (doubleVariables.Contains(variableNames[col])) datasetWorksheet.Cells[row + 2, col + 1].Value = dataset.GetDoubleValue(variableNames[col], row); else datasetWorksheet.Cells[row + 2, col + 1].Value = dataset.GetValue(row, col); } } } } }