#region License Information
/* HeuristicLab
* Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
using HeuristicLab.Problems.DataAnalysis;
using HeuristicLab.Problems.DataAnalysis.Symbolic;
using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
using DiffSharp.Interop.Float64;
using System.Diagnostics;
namespace HeuristicLab.Algorithms.DataAnalysis.Experimental {
[Item("Constant Optimization Evaluator with Constraints", "")]
[StorableClass]
public class SymbolicRegressionConstantOptimizationEvaluator : SymbolicRegressionSingleObjectiveEvaluator {
private const string ConstantOptimizationIterationsParameterName = "ConstantOptimizationIterations";
private const string ConstantOptimizationImprovementParameterName = "ConstantOptimizationImprovement";
private const string ConstantOptimizationProbabilityParameterName = "ConstantOptimizationProbability";
private const string ConstantOptimizationRowsPercentageParameterName = "ConstantOptimizationRowsPercentage";
private const string UpdateConstantsInTreeParameterName = "UpdateConstantsInSymbolicExpressionTree";
private const string UpdateVariableWeightsParameterName = "Update Variable Weights";
public IFixedValueParameter ConstantOptimizationIterationsParameter {
get { return (IFixedValueParameter)Parameters[ConstantOptimizationIterationsParameterName]; }
}
public IFixedValueParameter ConstantOptimizationImprovementParameter {
get { return (IFixedValueParameter)Parameters[ConstantOptimizationImprovementParameterName]; }
}
public IFixedValueParameter ConstantOptimizationProbabilityParameter {
get { return (IFixedValueParameter)Parameters[ConstantOptimizationProbabilityParameterName]; }
}
public IFixedValueParameter ConstantOptimizationRowsPercentageParameter {
get { return (IFixedValueParameter)Parameters[ConstantOptimizationRowsPercentageParameterName]; }
}
public IFixedValueParameter UpdateConstantsInTreeParameter {
get { return (IFixedValueParameter)Parameters[UpdateConstantsInTreeParameterName]; }
}
public IFixedValueParameter UpdateVariableWeightsParameter {
get { return (IFixedValueParameter)Parameters[UpdateVariableWeightsParameterName]; }
}
public IntValue ConstantOptimizationIterations {
get { return ConstantOptimizationIterationsParameter.Value; }
}
public DoubleValue ConstantOptimizationImprovement {
get { return ConstantOptimizationImprovementParameter.Value; }
}
public PercentValue ConstantOptimizationProbability {
get { return ConstantOptimizationProbabilityParameter.Value; }
}
public PercentValue ConstantOptimizationRowsPercentage {
get { return ConstantOptimizationRowsPercentageParameter.Value; }
}
public bool UpdateConstantsInTree {
get { return UpdateConstantsInTreeParameter.Value.Value; }
set { UpdateConstantsInTreeParameter.Value.Value = value; }
}
public bool UpdateVariableWeights {
get { return UpdateVariableWeightsParameter.Value.Value; }
set { UpdateVariableWeightsParameter.Value.Value = value; }
}
public override bool Maximization {
get { return true; }
}
[StorableConstructor]
protected SymbolicRegressionConstantOptimizationEvaluator(bool deserializing) : base(deserializing) { }
protected SymbolicRegressionConstantOptimizationEvaluator(SymbolicRegressionConstantOptimizationEvaluator original, Cloner cloner)
: base(original, cloner) {
}
public SymbolicRegressionConstantOptimizationEvaluator()
: base() {
Parameters.Add(new FixedValueParameter(ConstantOptimizationIterationsParameterName, "Determines how many iterations should be calculated while optimizing the constant of a symbolic expression tree (0 indicates other or default stopping criterion).", new IntValue(10), true));
Parameters.Add(new FixedValueParameter(ConstantOptimizationImprovementParameterName, "Determines the relative improvement which must be achieved in the constant optimization to continue with it (0 indicates other or default stopping criterion).", new DoubleValue(0), true) { Hidden = true });
Parameters.Add(new FixedValueParameter(ConstantOptimizationProbabilityParameterName, "Determines the probability that the constants are optimized", new PercentValue(1), true));
Parameters.Add(new FixedValueParameter(ConstantOptimizationRowsPercentageParameterName, "Determines the percentage of the rows which should be used for constant optimization", new PercentValue(1), true));
Parameters.Add(new FixedValueParameter(UpdateConstantsInTreeParameterName, "Determines if the constants in the tree should be overwritten by the optimized constants.", new BoolValue(true)) { Hidden = true });
Parameters.Add(new FixedValueParameter(UpdateVariableWeightsParameterName, "Determines if the variable weights in the tree should be optimized.", new BoolValue(true)) { Hidden = true });
}
public override IDeepCloneable Clone(Cloner cloner) {
return new SymbolicRegressionConstantOptimizationEvaluator(this, cloner);
}
[StorableHook(HookType.AfterDeserialization)]
private void AfterDeserialization() {
if (!Parameters.ContainsKey(UpdateConstantsInTreeParameterName))
Parameters.Add(new FixedValueParameter(UpdateConstantsInTreeParameterName, "Determines if the constants in the tree should be overwritten by the optimized constants.", new BoolValue(true)));
if (!Parameters.ContainsKey(UpdateVariableWeightsParameterName))
Parameters.Add(new FixedValueParameter(UpdateVariableWeightsParameterName, "Determines if the variable weights in the tree should be optimized.", new BoolValue(true)));
}
public override IOperation InstrumentedApply() {
var solution = SymbolicExpressionTreeParameter.ActualValue;
double quality;
if (RandomParameter.ActualValue.NextDouble() < ConstantOptimizationProbability.Value) {
IEnumerable constantOptimizationRows = GenerateRowsToEvaluate(ConstantOptimizationRowsPercentage.Value);
quality = OptimizeConstants(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, ProblemDataParameter.ActualValue,
constantOptimizationRows, ApplyLinearScalingParameter.ActualValue.Value, ConstantOptimizationIterations.Value, updateVariableWeights: UpdateVariableWeights, lowerEstimationLimit: EstimationLimitsParameter.ActualValue.Lower, upperEstimationLimit: EstimationLimitsParameter.ActualValue.Upper, updateConstantsInTree: UpdateConstantsInTree);
if (ConstantOptimizationRowsPercentage.Value != RelativeNumberOfEvaluatedSamplesParameter.ActualValue.Value) {
var evaluationRows = GenerateRowsToEvaluate();
quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value);
}
} else {
var evaluationRows = GenerateRowsToEvaluate();
quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value);
}
QualityParameter.ActualValue = new DoubleValue(quality);
return base.InstrumentedApply();
}
public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable rows) {
SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
EstimationLimitsParameter.ExecutionContext = context;
ApplyLinearScalingParameter.ExecutionContext = context;
// Pearson Rē evaluator is used on purpose instead of the const-opt evaluator,
// because Evaluate() is used to get the quality of evolved models on
// different partitions of the dataset (e.g., best validation model)
double r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, tree, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, problemData, rows, ApplyLinearScalingParameter.ActualValue.Value);
SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
EstimationLimitsParameter.ExecutionContext = null;
ApplyLinearScalingParameter.ExecutionContext = null;
return r2;
}
public static double OptimizeConstants(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable rows, bool applyLinearScaling,
int maxIterations, bool updateVariableWeights = true,
double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue,
bool updateConstantsInTree = true) {
// numeric constants in the tree become variables for constant opt
// variables in the tree become parameters (fixed values) for constant opt
// for each parameter (variable in the original tree) we store the
// variable name, variable value (for factor vars) and lag as a DataForVariable object.
// A dictionary is used to find parameters
double[] initialConstants;
var parameters = new List();
Func func;
if (!TreeToDiffSharpConverter.TryConvertToDiffSharp(tree, updateVariableWeights, out parameters, out initialConstants,
out func))
throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree.");
if (parameters.Count == 0) return 0.0; // gkronber: constant expressions always have a Rē of 0.0
var parameterEntries = parameters.ToArray(); // order of entries must be the same for x
// extract inital constants
double[] c = new double[initialConstants.Length];
double[] s = new double[c.Length];
{
Array.Copy(initialConstants, 0, c, 0, initialConstants.Length);
// c[0] = 1.0;
// c[1] = 0.0;
// Array.Copy(initialConstants, 0, c, 2, initialConstants.Length);
// s[0] = 1.0;
// s[1] = 1.0;
// Array.Copy(initialConstants.Select(ci=>Math.Abs(ci)).ToArray()
// , 0, s, 2, initialConstants.Length);
}
s = Enumerable.Repeat(0.01, c.Length).ToArray();
double[] originalConstants = (double[])c.Clone();
double originalQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);
alglib.minnlcstate state;
alglib.minnlcreport rep;
IDataset ds = problemData.Dataset;
double[,] x = new double[rows.Count(), parameters.Count];
int col = 0;
int row = 0;
foreach (var info in parameterEntries) {
row = 0;
// copy training rows
foreach (var r in rows) {
if (ds.VariableHasType(info.variableName)) {
x[row, col] = ds.GetDoubleValue(info.variableName, r + info.lag);
} else if (ds.VariableHasType(info.variableName)) {
x[row, col] = ds.GetStringValue(info.variableName, r) == info.variableValue ? 1 : 0;
} else throw new InvalidProgramException("found a variable of unknown type");
row++;
}
col++;
}
var target = problemData.TargetVariable;
// determine rows with constraints by checking of any constraint column contains a value
var constraintColNames = parameterEntries
.Select(e => e.variableName)
.Select(name => string.Format("df/d({0})", name))
.ToArray();
var constraintRows = Enumerable.Range(0, problemData.Dataset.Rows)
.Where(rIdx => constraintColNames.Any(name => !double.IsNaN(ds.GetDoubleValue(name, rIdx))));
double[,] constraintX = new double[constraintRows.Count(), parameters.Count]; // inputs for constraint values
double[,] constraints = new double[constraintRows.Count(), parameters.Count + 1]; // constraint values +1 column for constraint values for f(x)
string[,] comp = new string[constraintRows.Count(), parameters.Count + 1]; // comparison types <= = >=
int eqConstraints = 0;
int ieqConstraints = 0;
col = 0;
foreach (var info in parameterEntries) {
row = 0;
// find the matching df/dx column
var colName = string.Format("df/d({0})", info.variableName);
var compColName = string.Format("df/d({0}) constraint-type", info.variableName);
foreach (var r in constraintRows) {
constraintX[row, col] = ds.GetDoubleValue(info.variableName, r);
if (ds.VariableNames.Contains(colName)) {
constraints[row, col] = ds.GetDoubleValue(colName, r);
comp[row, col] = ds.GetStringValue(compColName, r);
if (comp[row, col] == "EQ") eqConstraints++;
else if (comp[row, col] == "LEQ" || comp[row, col] == "GEQ") ieqConstraints++;
}
row++;
}
col++;
}
// f(x) constraint
row = 0;
col = constraints.GetLength(1) - 1;
foreach (var r in constraintRows) {
constraints[row, col] = ds.GetDoubleValue("f(x)", r);
comp[row, col] = ds.GetStringValue("f(x) constraint-type", r);
if (comp[row, col] == "EQ") eqConstraints++;
else if (comp[row, col] == "LEQ" || comp[row, col] == "GEQ") ieqConstraints++;
row++;
}
double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray();
alglib.ndimensional_jac jac = CreateJac(x, y, constraintX, constraints, comp, func);
double rho = 10000;
int outeriters = 3;
int updateFreq = 10;
try {
alglib.minnlccreate(c, out state);
alglib.minnlcsetalgoaul(state, rho, outeriters);
alglib.minnlcsetcond(state, 0.0, 0.0, 0.0, maxIterations);
alglib.minnlcsetscale(state, s);
alglib.minnlcsetprecexactlowrank(state, updateFreq);
alglib.minnlcsetnlc(state, eqConstraints, ieqConstraints);
alglib.minnlcoptimize(state, jac, null, null);
alglib.minnlcresults(state, out c, out rep);
} catch (ArithmeticException) {
return originalQuality;
} catch (alglib.alglibexception) {
return originalQuality;
}
// -7 => constant optimization failed due to wrong gradient
// -8 => integrity check failed (e.g. gradient NaN
if (rep.terminationtype != -7 && rep.terminationtype != -8)
UpdateConstants(tree, c.ToArray(), updateVariableWeights);
else {
UpdateConstants(tree, Enumerable.Repeat(0.0, c.Length).ToArray(), updateVariableWeights);
}
var quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);
Console.WriteLine("{0:N4} {1:N4} {2} {3}", originalQuality, quality, state.fi.Skip(1).Where(fii => fii > 0).Count(), rep.terminationtype);
if (!updateConstantsInTree) UpdateConstants(tree, originalConstants.ToArray(), updateVariableWeights);
if (
// originalQuality - quality > 0.001 ||
double.IsNaN(quality)) {
UpdateConstants(tree, originalConstants.ToArray(), updateVariableWeights);
return originalQuality;
}
return quality;
}
private static void UpdateConstants(ISymbolicExpressionTree tree, double[] constants, bool updateVariableWeights) {
int i = 0;
foreach (var node in tree.Root.IterateNodesPrefix().OfType()) {
ConstantTreeNode constantTreeNode = node as ConstantTreeNode;
VariableTreeNodeBase variableTreeNodeBase = node as VariableTreeNodeBase;
FactorVariableTreeNode factorVarTreeNode = node as FactorVariableTreeNode;
if (constantTreeNode != null)
constantTreeNode.Value = constants[i++];
else if (updateVariableWeights && variableTreeNodeBase != null)
variableTreeNodeBase.Weight = constants[i++];
else if (factorVarTreeNode != null) {
for (int j = 0; j < factorVarTreeNode.Weights.Length; j++)
factorVarTreeNode.Weights[j] = constants[i++];
}
}
}
private static alglib.ndimensional_jac CreateJac(
double[,] x, // x is same size as y
double[] y, // only targets
double[,] constraintX, // inputs for constraints
double[,] constraints, // df/d(xi), same size as constraintX, same number of rows as x less columns
string[,] comparison, // {LEQ, GEQ, EQ } same size as constraints
Func func) {
Trace.Assert(x.GetLength(0) == y.Length);
Trace.Assert(x.GetLength(1) == constraintX.GetLength(1) - 1);
Trace.Assert(constraintX.GetLength(0) == constraints.GetLength(0));
Trace.Assert(constraintX.GetLength(1) == constraints.GetLength(1));
Trace.Assert(constraints.GetLength(0) == comparison.GetLength(0));
Trace.Assert(constraints.GetLength(1) == comparison.GetLength(1));
return (double[] c, double[] fi, double[,] jac, object o) => {
// objective function is sum of squared errors
int nRows = y.Length;
int nParams = x.GetLength(1);
// zero fi and jac
Array.Clear(fi, 0, fi.Length);
Array.Clear(jac, 0, jac.Length);
var p = new double[nParams + c.Length];
Array.Copy(c, 0, p, nParams, c.Length); // copy c to the end of the function parameters vector
for (int rowIdx = 0; rowIdx < nRows; rowIdx++) {
// copy x_i to the beginning of the function parameters vector
for (int cIdx = 0; cIdx < nParams; cIdx++)
p[cIdx] = x[rowIdx, cIdx];
double f = (double)func(p);
double[] g = (double[])AD.Grad(func, p);
var e = y[rowIdx] - f;
fi[0] += e * e;
// update gradient
for (int colIdx = 0; colIdx < c.Length; colIdx++) {
jac[0, colIdx] += -2 * e * g[nParams + colIdx]; // skip the elements for the variable values
}
}
int fidx = 1;
// eq constraints
for (int rowIdx = 0; rowIdx < constraintX.GetLength(0); rowIdx++) {
for (var colIdx = 0; colIdx < constraintX.GetLength(1); colIdx++) {
if (comparison[rowIdx, colIdx] == "EQ") {
throw new NotSupportedException();
}
}
}
// ineq constraints
for (int rowIdx = 0; rowIdx < constraintX.GetLength(0); rowIdx++) {
for (int colIdx = 0; colIdx < constraints.GetLength(1); colIdx++) {
// there is a constraint value
if (!double.IsNaN(constraints[rowIdx, colIdx]) && !string.IsNullOrEmpty(comparison[rowIdx, colIdx])) {
var factor = (comparison[rowIdx, colIdx] == "LEQ") ? 1.0
: comparison[rowIdx, colIdx] == "GEQ" ? -1.0 : 0.0;
// copy x_i to the beginning of the function parameters vector
for (int cIdx = 0; cIdx < nParams; cIdx++)
p[cIdx] = constraintX[rowIdx, cIdx];
// f(x) constraint
if (colIdx == constraints.GetLength(1) - 1) {
fi[fidx] = factor * ((double)(func(p)) - constraints[rowIdx, colIdx]);
var g = (double[])AD.Grad(func, p);
for (int jacIdx = 0; jacIdx < c.Length; jacIdx++) {
jac[fidx, jacIdx] = factor * g[nParams + jacIdx]; // skip the elements for the variable values
}
fidx++;
} else {
// df / dxi constraint
var g = (double[])AD.Grad(func, p);
fi[fidx] = factor * g[colIdx];
var hess = AD.Hessian(func, p);
for (int jacIdx = 0; jacIdx < c.Length; jacIdx++) {
jac[fidx, jacIdx] = factor * (double)hess[colIdx, nParams + jacIdx]; // skip the elements for the variable values
}
fidx++;
}
}
}
}
};
}
public static bool CanOptimizeConstants(ISymbolicExpressionTree tree) {
return TreeToAutoDiffTermConverter.IsCompatible(tree);
}
}
}