Changeset 17602


Ignore:
Timestamp:
06/16/20 11:21:34 (4 weeks ago)
Author:
pfleck
Message:

#3040

  • Changed stddev, variance, etc. to population variant
  • Added multiplicative simplifications for stdev and variance symbols
Location:
branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Converters/VectorTreeSimplifier.cs

    r17597 r17602  
    6262    private static readonly Mean meanSymbol = new Mean();
    6363    private static readonly Length lengthSymbol = new Length();
     64    private static readonly StandardDeviation standardDeviationSymbol = new StandardDeviation();
     65    private static readonly Variance varianceSymbol = new Variance();
    6466
    6567    private readonly SymbolicDataAnalysisExpressionTreeVectorInterpreter interpreter;
     
    271273    private static bool IsLength(ISymbolicExpressionTreeNode node) {
    272274      return node.Symbol is Length;
     275    }
     276
     277    private static bool IsStandardDeviation(ISymbolicExpressionTreeNode node) {
     278      return node.Symbol is StandardDeviation;
     279    }
     280
     281    private static bool IsVariance(ISymbolicExpressionTreeNode node) {
     282      return node.Symbol is Variance;
    273283    }
    274284    #endregion
     
    351361      } else if (IsLength(original)) {
    352362        return SimplifyLengthAggregation(original);
     363      } else if (IsStandardDeviation(original)) {
     364        return SimplifyStandardDeviationAggregation(original);
     365      } else if (IsVariance(original)) {
     366        return SimplifyVarianceAggregation(original);
    353367      } else {
    354368        return SimplifyAny(original);
     
    554568    private ISymbolicExpressionTreeNode SimplifyLengthAggregation(ISymbolicExpressionTreeNode original) {
    555569      return MakeLengthAggregation(GetSimplifiedTree(original.GetSubtree(0)));
     570    }
     571
     572    private ISymbolicExpressionTreeNode SimplifyStandardDeviationAggregation(ISymbolicExpressionTreeNode original) {
     573      return MakeStandardDeviationAggregation(GetSimplifiedTree(original.GetSubtree(0)));
     574    }
     575
     576    private ISymbolicExpressionTreeNode SimplifyVarianceAggregation(ISymbolicExpressionTreeNode original) {
     577      return MakeVarianceAggregation(GetSimplifiedTree(original.GetSubtree(0)));
    556578    }
    557579    #endregion
     
    15611583    }
    15621584
     1585    private ISymbolicExpressionTreeNode MakeStandardDeviationAggregation(ISymbolicExpressionTreeNode node) {
     1586      if (IsConstant(node)) {
     1587        return MakeConstant(0.0);
     1588      } else if (IsMultiplication(node) || IsDivision(node)) {
     1589        var factors = node.Subtrees;
     1590        if (IsDivision(node)) factors = InvertNodes(factors, Invert);
     1591
     1592        var scalarFactors = factors.Where(IsScalarNode).ToList();
     1593        var remainingFactors = factors.Except(scalarFactors).ToList();
     1594
     1595        if (scalarFactors.Any() && remainingFactors.Any()) {
     1596          var scalarNode = scalarFactors.Aggregate(MakeProduct);
     1597          var vectorNode = remainingFactors.Aggregate(MakeProduct);
     1598
     1599          var stdevNode = MakeStandardDeviationAggregation(vectorNode);
     1600
     1601          return MakeProduct(scalarNode, stdevNode);
     1602        } else if (scalarFactors.Any()) {
     1603          var scalarNode = scalarFactors.Aggregate(MakeProduct);
     1604          return scalarNode;
     1605        } else if (remainingFactors.Any()) {
     1606          var vectorNode = remainingFactors.Aggregate(MakeProduct);
     1607          var stdevNode = standardDeviationSymbol.CreateTreeNode();
     1608          stdevNode.AddSubtree(vectorNode);
     1609          return stdevNode;
     1610        } else
     1611          throw new InvalidOperationException("Multiplication does not contain any terms to simplify.");
     1612      } else if (IsVariableBase(node)) { // weight is like multiplication
     1613        var variableNode = (VariableTreeNodeBase)node;
     1614        var weight = variableNode.Weight;
     1615        variableNode.Weight = 1.0;
     1616        var stdevNode = standardDeviationSymbol.CreateTreeNode();
     1617        stdevNode.AddSubtree(node);
     1618        return MakeProduct(MakeConstant(weight), stdevNode);
     1619      } else {
     1620        var stdevNode = standardDeviationSymbol.CreateTreeNode();
     1621        stdevNode.AddSubtree(node);
     1622        return stdevNode;
     1623      }
     1624    }
     1625
     1626    private ISymbolicExpressionTreeNode MakeVarianceAggregation(ISymbolicExpressionTreeNode node) {
     1627      if (IsConstant(node)) {
     1628        return MakeConstant(0.0);
     1629      } else if (IsMultiplication(node) || IsDivision(node)) {
     1630        var factors = node.Subtrees;
     1631        if (IsDivision(node)) factors = InvertNodes(factors, Invert);
     1632
     1633        var scalarFactors = factors.Where(IsScalarNode).ToList();
     1634        var remainingFactors = factors.Except(scalarFactors).ToList();
     1635
     1636        if (scalarFactors.Any() && remainingFactors.Any()) {
     1637          var scalarNode = scalarFactors.Aggregate(MakeProduct);
     1638          var vectorNode = remainingFactors.Aggregate(MakeProduct);
     1639
     1640          var varNode = MakeVarianceAggregation(vectorNode);
     1641
     1642          return MakeProduct(MakeSquare(scalarNode), varNode);
     1643        } else if (scalarFactors.Any()) {
     1644          var scalarNode = scalarFactors.Aggregate(MakeProduct);
     1645          return MakeSquare(scalarNode);
     1646        } else if (remainingFactors.Any()) {
     1647          var vectorNode = remainingFactors.Aggregate(MakeProduct);
     1648          var varNode = varianceSymbol.CreateTreeNode();
     1649          varNode.AddSubtree(vectorNode);
     1650          return varNode;
     1651        } else
     1652          throw new InvalidOperationException("Multiplication does not contain any terms to simplify.");
     1653      } else if (IsVariableBase(node)) { // weight is like multiplication
     1654        var variableNode = (VariableTreeNodeBase)node;
     1655        var weight = variableNode.Weight;
     1656        variableNode.Weight = 1.0;
     1657        var varNode = varianceSymbol.CreateTreeNode();
     1658        varNode.AddSubtree(node);
     1659        return MakeProduct(MakeSquare(MakeConstant(weight)), varNode);
     1660      } else {
     1661        var varNode = varianceSymbol.CreateTreeNode();
     1662        varNode.AddSubtree(node);
     1663        return varNode;
     1664      }
     1665    }
    15631666    #endregion
    15641667
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interpreter/SymbolicDataAnalysisExpressionTreeVectorInterpreter.cs

    r17593 r17602  
    415415            var cur = Evaluate(dataset, ref row, state);
    416416            return AggregateApply(cur,
    417               s => double.NaN,
    418               v => Statistics.StandardDeviation(v));
     417              s => 0,
     418              v => Statistics.PopulationStandardDeviation(v));
    419419          }
    420420        case OpCodes.Length: {
     
    439439            var cur = Evaluate(dataset, ref row, state);
    440440            return AggregateApply(cur,
     441              s => 0,
     442              v => Statistics.PopulationVariance(v));
     443          }
     444        case OpCodes.Skewness: {
     445            var cur = Evaluate(dataset, ref row, state);
     446            return AggregateApply(cur,
    441447              s => double.NaN,
    442               v => Statistics.Variance(v));
    443           }
    444         case OpCodes.Skewness: {
     448              v => Statistics.PopulationSkewness(v));
     449          }
     450        case OpCodes.Kurtosis: {
    445451            var cur = Evaluate(dataset, ref row, state);
    446452            return AggregateApply(cur,
    447453              s => double.NaN,
    448               v => Statistics.Skewness(v));
    449           }
    450         case OpCodes.Kurtosis: {
    451             var cur = Evaluate(dataset, ref row, state);
    452             return AggregateApply(cur,
    453               s => double.NaN,
    454               v => Statistics.Kurtosis(v));
     454              v => Statistics.PopulationKurtosis(v));
    455455          }
    456456        case OpCodes.EuclideanDistance: {
     
    470470              //(s1, v2) => 0,
    471471              //(v1, s2) => 0,
    472               vvFunc: (v1, v2) => v1.Count == v2.Count ? Statistics.Covariance(v1, v2) : double.NaN);
     472              vvFunc: (v1, v2) => v1.Count == v2.Count ? Statistics.PopulationCovariance(v1, v2) : double.NaN);
    473473          }
    474474        case OpCodes.Variable: {
Note: See TracChangeset for help on using the changeset viewer.