Changeset 16416


Ignore:
Timestamp:
12/20/18 11:11:37 (4 months ago)
Author:
fholzing
Message:

#2904: cleared variableImpacts on OnContentChanged(); Used existing epsilon (almost) function for comparison of estimated impacts; removed direct usage of alglib; Added a LDA- and RF-Unittest for Impact-Calculation; Changed the approach of loading ProblemData (no more strings); Renamed PerformanceTests for Regression/Classification so it can be found easier

Location:
branches/2904_CalculateImpacts
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/2904_CalculateImpacts/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/ClassificationSolutionVariableImpactsView.cs

    r16397 r16416  
    8080    protected override void OnContentChanged() {
    8181      base.OnContentChanged();
     82      rawVariableImpacts.Clear();
     83
    8284      if (Content == null) {
    8385        variableImpactsArrayView.Content = null;
     
    142144        if (impacts == null) { return; }
    143145
    144         rawVariableImpacts.Clear();
    145146        rawVariableImpacts.AddRange(impacts);
    146147        UpdateOrdering();
  • branches/2904_CalculateImpacts/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionVariableImpactsView.cs

    r16051 r16416  
    7878    protected override void OnContentChanged() {
    7979      base.OnContentChanged();
     80      rawVariableImpacts.Clear();
     81
    8082      if (Content == null) {
    8183        variableImpactsArrayView.Content = null;
     
    140142        if (impacts == null) { return; }
    141143
    142         rawVariableImpacts.Clear();
    143144        rawVariableImpacts.AddRange(impacts);
    144145        UpdateOrdering();
  • branches/2904_CalculateImpacts/HeuristicLab.Tests/HeuristicLab.Problems.DataAnalysis-3.4/ClassificationVariableImpactCalculationTest.cs

    r16067 r16416  
    2626    }
    2727
    28     private static readonly double epsilon = 0.00001;
    2928
    3029    [TestMethod]
     
    4847      ClassificationSolutionVariableImpactsCalculator.CalculateImpacts(solution);
    4948      Dictionary<string, double> expectedImpacts = GetExpectedValuesForIrisKNNModel();
     49
     50      CheckDefaultAsserts(solution, expectedImpacts);
     51    }
     52
     53
     54    [TestMethod]
     55    [TestCategory("Problems.DataAnalysis")]
     56    [TestProperty("Time", "short")]
     57    public void LDAIrisVariableImpactTest() {
     58      IClassificationProblemData problemData = LoadIrisProblem();
     59      IClassificationSolution solution = LinearDiscriminantAnalysis.CreateLinearDiscriminantAnalysisSolution(problemData);
     60      ClassificationSolutionVariableImpactsCalculator.CalculateImpacts(solution);
     61      Dictionary<string, double> expectedImpacts = GetExpectedValuesForIrisLDAModel();
    5062
    5163      CheckDefaultAsserts(solution, expectedImpacts);
     
    8597    [TestProperty("Time", "short")]
    8698    [ExpectedException(typeof(ArgumentException))]
    87     public void WrongDataSetTest() {
     99    public void WrongDataSetVariableImpactClassificationTest() {
    88100      IClassificationProblemData problemData = LoadIrisProblem();
    89101      IClassificationSolution solution = NearestNeighbourClassification.CreateNearestNeighbourClassificationSolution(problemData, 3);
     
    99111    [TestCategory("Problems.DataAnalysis")]
    100112    [TestProperty("Time", "medium")]
    101     public void PerformanceTest() {
     113    public void PerformanceVariableImpactClassificationTest() {
    102114      int rows = 1500;
    103115      int columns = 77;
     
    229241      return expectedImpacts;
    230242    }
     243    private Dictionary<string, double> GetExpectedValuesForIrisLDAModel() {
     244      Dictionary<string, double> expectedImpacts = new Dictionary<string, double>();
     245      expectedImpacts.Add("sepal_width", 0.01);
     246      expectedImpacts.Add("sepal_length", 0.03);
     247      expectedImpacts.Add("petal_width", 0.2);
     248      expectedImpacts.Add("petal_length", 0.5);
     249
     250      return expectedImpacts;
     251    }
    231252    #endregion
    232253
     
    243264      //Check if impacts are as expected
    244265      Assert.AreEqual(modelImpacts.Count(), expectedImpacts.Count);
    245       Assert.IsTrue(modelImpacts.All(v => Math.Abs(expectedImpacts[v.Item1] - v.Item2) < epsilon));
     266      Assert.IsTrue(modelImpacts.All(v => v.Item2.IsAlmost(expectedImpacts[v.Item1])));
    246267    }
    247268  }
  • branches/2904_CalculateImpacts/HeuristicLab.Tests/HeuristicLab.Problems.DataAnalysis-3.4/RegressionVariableImpactCalculationTest.cs

    r16061 r16416  
    33using System.Diagnostics;
    44using System.Linq;
     5using HeuristicLab.Algorithms.DataAnalysis;
    56using HeuristicLab.Common;
    67using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
     
    2526    }
    2627
    27     private static readonly double epsilon = 0.00001;
    2828
    2929    [TestMethod]
     
    4444    public void LinearRegressionModelVariableImpactTowerTest() {
    4545      IRegressionProblemData problemData = LoadDefaultTowerProblem();
    46       ISymbolicExpressionTree tree = CreateLRExpressionTree(problemData);
    47       IRegressionModel model = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter());
    48       IRegressionSolution solution = new RegressionSolution(model, (IRegressionProblemData)problemData.Clone());
     46      double rmsError;
     47      double cvRmsError;
     48      var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError);
    4949      Dictionary<string, double> expectedImpacts = GetExpectedValuesForLRTower();
    5050
     
    5757    public void LinearRegressionModelVariableImpactMibaTest() {
    5858      IRegressionProblemData problemData = LoadDefaultMibaProblem();
    59       ISymbolicExpressionTree tree = CreateLRExpressionTree(problemData);
    60       IRegressionModel model = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter());
    61       IRegressionSolution solution = new RegressionSolution(model, (IRegressionProblemData)problemData.Clone());
     59      double rmsError;
     60      double cvRmsError;
     61      var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError);
    6262      Dictionary<string, double> expectedImpacts = GetExpectedValuesForLRMiba();
     63
     64      CheckDefaultAsserts(solution, expectedImpacts);
     65    }
     66
     67    [TestMethod]
     68    [TestCategory("Problems.DataAnalysis")]
     69    [TestProperty("Time", "short")]
     70    public void RandomForestModelVariableImpactTowerTest() {
     71      IRegressionProblemData problemData = LoadDefaultTowerProblem();
     72      double rmsError;
     73      double avgRelError;
     74      double outOfBagRmsError;
     75      double outofBagAvgRelError;
     76      var solution = RandomForestRegression.CreateRandomForestRegressionSolution(problemData, 50, 0.2, 0.5, 1234, out rmsError, out avgRelError, out outOfBagRmsError, out outofBagAvgRelError);
     77      Dictionary<string, double> expectedImpacts = GetExpectedValuesForRFTower();
    6378
    6479      CheckDefaultAsserts(solution, expectedImpacts);
     
    95110    [TestProperty("Time", "short")]
    96111    [ExpectedException(typeof(ArgumentException))]
    97     public void WrongDataSetTest() {
     112    public void WrongDataSetVariableImpactRegressionTest() {
    98113      IRegressionProblemData problemData = LoadDefaultTowerProblem();
    99       ISymbolicExpressionTree tree = CreateLRExpressionTree(problemData);
    100       IRegressionModel model = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter());
    101       IRegressionSolution solution = new RegressionSolution(model, (IRegressionProblemData)problemData.Clone());
    102 
     114      double rmsError;
     115      double cvRmsError;
     116      var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError);
    103117      solution.ProblemData = LoadDefaultMibaProblem();
    104118      RegressionSolutionVariableImpactsCalculator.CalculateImpacts(solution);
     
    109123    [TestCategory("Problems.DataAnalysis")]
    110124    [TestProperty("Time", "medium")]
    111     public void PerformanceTest() {
     125    public void PerformanceVariableImpactRegressionTest() {
    112126      int rows = 20000;
    113127      int columns = 77;
    114128      var dataSet = OnlineCalculatorPerformanceTest.CreateRandomDataset(new MersenneTwister(1234), rows, columns);
    115129      IRegressionProblemData problemData = new RegressionProblemData(dataSet, dataSet.VariableNames.Except("y".ToEnumerable()), "y");
    116       ISymbolicExpressionTree tree = CreateLRExpressionTree(problemData);
    117       IRegressionModel model = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter());
    118       IRegressionSolution solution = new RegressionSolution(model, (IRegressionProblemData)problemData.Clone());
     130      double rmsError;
     131      double cvRmsError;
     132      var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError);
    119133
    120134      Stopwatch watch = new Stopwatch();
     
    131145    private IRegressionProblemData LoadDefaultTowerProblem() {
    132146      RegressionRealWorldInstanceProvider provider = new RegressionRealWorldInstanceProvider();
    133       var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("Tower")).Single();
    134       return provider.LoadData(instance);
     147      var tower = new HeuristicLab.Problems.Instances.DataAnalysis.Tower();
     148      return provider.LoadData(tower);
    135149    }
    136150    private IRegressionProblemData LoadDefaultMibaProblem() {
    137151      MibaFrictionRegressionInstanceProvider provider = new MibaFrictionRegressionInstanceProvider();
    138       var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("CF1")).Single();
    139       return provider.LoadData(instance);
     152      var cf1 = new HeuristicLab.Problems.Instances.DataAnalysis.CF1();
     153      return provider.LoadData(cf1);
    140154    }
    141155    private IRegressionProblemData CreateDefaultProblem() {
     
    158172
    159173    #region Create SymbolicExpressionTree
    160     private ISymbolicExpressionTree CreateLRExpressionTree(IRegressionProblemData problemData) {
    161       IEnumerable<int> rows = problemData.TrainingIndices;
    162       var doubleVariables = problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<double>);
    163       var factorVariableNames = problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<string>);
    164       var factorVariables = problemData.Dataset.GetFactorVariableValues(factorVariableNames, rows);
    165       double[,] binaryMatrix = problemData.Dataset.ToArray(factorVariables, rows);
    166       double[,] doubleVarMatrix = problemData.Dataset.ToArray(doubleVariables.Concat(new string[] { problemData.TargetVariable }), rows);
    167       var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix);
    168 
    169       alglib.linearmodel lm = new alglib.linearmodel();
    170       alglib.lrreport ar = new alglib.lrreport();
    171       int nRows = inputMatrix.GetLength(0);
    172       int nFeatures = inputMatrix.GetLength(1) - 1;
    173       double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant
    174 
    175       int retVal = 1;
    176       alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
    177       if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution");
    178 
    179       alglib.lrunpack(lm, out coefficients, out nFeatures);
    180 
    181       int nFactorCoeff = binaryMatrix.GetLength(1);
    182       int nVarCoeff = doubleVariables.Count();
    183       return LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(),
    184         doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(),
    185         @const: coefficients[nFeatures]);
    186     }
     174
    187175    private ISymbolicExpressionTree CreateCustomExpressionTree() {
    188176      return new InfixExpressionParser().Parse("x1*x2 - x2*x2 + x3*x3 + x4*x4 - x5*x5 + 14/12");
     
    281269      return expectedImpacts;
    282270    }
     271    private Dictionary<string, double> GetExpectedValuesForRFTower() {
     272      Dictionary<string, double> expectedImpacts = new Dictionary<string, double>();
     273      expectedImpacts.Add("x5", 0.00138095702433039);
     274      expectedImpacts.Add("x19", 0.00220739387855795);
     275      expectedImpacts.Add("x14", 0.00225120540266954);
     276      expectedImpacts.Add("x18", 0.00311857736968479);
     277      expectedImpacts.Add("x9", 0.00313474690023097);
     278      expectedImpacts.Add("x20", 0.00321781251408282);
     279      expectedImpacts.Add("x21", 0.00397483365571383);
     280      expectedImpacts.Add("x16", 0.00433280262892111);
     281      expectedImpacts.Add("x15", 0.00529918809786456);
     282      expectedImpacts.Add("x3", 0.00658791244929757);
     283      expectedImpacts.Add("x24", 0.0078645281886035);
     284      expectedImpacts.Add("x4", 0.00907314110749047);
     285      expectedImpacts.Add("x13", 0.0102943761648944);
     286      expectedImpacts.Add("x22", 0.0107132858548163);
     287      expectedImpacts.Add("x12", 0.0157078677788507);
     288      expectedImpacts.Add("x23", 0.0235857534562318);
     289      expectedImpacts.Add("x7", 0.0304143401617055);
     290      expectedImpacts.Add("x11", 0.0310773441767309);
     291      expectedImpacts.Add("x25", 0.0328308945873665);
     292      expectedImpacts.Add("x17", 0.0428771226844575);
     293      expectedImpacts.Add("x10", 0.0456335367972532);
     294      expectedImpacts.Add("x8", 0.049849257881126);
     295      expectedImpacts.Add("x1", 0.0663686086323108);
     296      expectedImpacts.Add("x2", 0.0799083890750926);
     297      expectedImpacts.Add("x6", 0.196557814244287);
     298
     299      return expectedImpacts;
     300    }
    283301    private Dictionary<string, double> GetExpectedValuesForCustomProblem() {
    284302      Dictionary<string, double> expectedImpacts = new Dictionary<string, double>();
     
    315333      //Check if impacts are as expected
    316334      Assert.AreEqual(modelImpacts.Count(), expectedImpacts.Count);
    317       Assert.IsTrue(modelImpacts.All(v => Math.Abs(expectedImpacts[v.Item1] - v.Item2) < epsilon));
     335      Assert.IsTrue(modelImpacts.All(v => v.Item2.IsAlmost(expectedImpacts[v.Item1])));
    318336    }
    319337  }
Note: See TracChangeset for help on using the changeset viewer.