#region License Information
/* HeuristicLab
* Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.Operators;
using HeuristicLab.Optimization;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
using HeuristicLab.Random;
namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
[StorableClass]
public abstract class SymbolicDataAnalysisEvaluator : InstrumentedOperator,
ISymbolicDataAnalysisEvaluator, ISymbolicDataAnalysisInterpreterOperator, ISymbolicDataAnalysisBoundedOperator, IStochasticOperator
where T : class, IDataAnalysisProblemData {
private const string RandomParameterName = "Random";
private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
private const string SymbolicDataAnalysisTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
private const string ProblemDataParameterName = "ProblemData";
private const string EstimationLimitsParameterName = "EstimationLimits";
private const string EvaluationPartitionParameterName = "EvaluationPartition";
private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
private const string ValidRowIndicatorParameterName = "ValidRowIndicator";
public override bool CanChangeName { get { return false; } }
#region parameter properties
ILookupParameter IStochasticOperator.RandomParameter {
get { return RandomParameter; }
}
public IValueLookupParameter RandomParameter {
get { return (IValueLookupParameter)Parameters[RandomParameterName]; }
}
public ILookupParameter SymbolicExpressionTreeParameter {
get { return (ILookupParameter)Parameters[SymbolicExpressionTreeParameterName]; }
}
public ILookupParameter SymbolicDataAnalysisTreeInterpreterParameter {
get { return (ILookupParameter)Parameters[SymbolicDataAnalysisTreeInterpreterParameterName]; }
}
public IValueLookupParameter ProblemDataParameter {
get { return (IValueLookupParameter)Parameters[ProblemDataParameterName]; }
}
public IValueLookupParameter EvaluationPartitionParameter {
get { return (IValueLookupParameter)Parameters[EvaluationPartitionParameterName]; }
}
public IValueLookupParameter EstimationLimitsParameter {
get { return (IValueLookupParameter)Parameters[EstimationLimitsParameterName]; }
}
public IValueLookupParameter RelativeNumberOfEvaluatedSamplesParameter {
get { return (IValueLookupParameter)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
}
public ILookupParameter ApplyLinearScalingParameter {
get { return (ILookupParameter)Parameters[ApplyLinearScalingParameterName]; }
}
public IValueLookupParameter ValidRowIndicatorParameter {
get { return (IValueLookupParameter)Parameters[ValidRowIndicatorParameterName]; }
}
#endregion
[StorableConstructor]
protected SymbolicDataAnalysisEvaluator(bool deserializing) : base(deserializing) { }
protected SymbolicDataAnalysisEvaluator(SymbolicDataAnalysisEvaluator original, Cloner cloner)
: base(original, cloner) {
}
public SymbolicDataAnalysisEvaluator()
: base() {
Parameters.Add(new ValueLookupParameter(RandomParameterName, "The random generator to use."));
Parameters.Add(new LookupParameter(SymbolicDataAnalysisTreeInterpreterParameterName, "The interpreter that should be used to calculate the output values of the symbolic data analysis tree."));
Parameters.Add(new LookupParameter(SymbolicExpressionTreeParameterName, "The symbolic data analysis solution encoded as a symbolic expression tree."));
Parameters.Add(new ValueLookupParameter(ProblemDataParameterName, "The problem data on which the symbolic data analysis solution should be evaluated."));
Parameters.Add(new ValueLookupParameter(EvaluationPartitionParameterName, "The start index of the dataset partition on which the symbolic data analysis solution should be evaluated."));
Parameters.Add(new ValueLookupParameter(EstimationLimitsParameterName, "The upper and lower limit that should be used as cut off value for the output values of symbolic data analysis trees."));
Parameters.Add(new ValueLookupParameter(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index."));
Parameters.Add(new LookupParameter(ApplyLinearScalingParameterName, "Flag that indicates if the individual should be linearly scaled before evaluating."));
Parameters.Add(new ValueLookupParameter(ValidRowIndicatorParameterName, "An indicator variable in the data set that specifies which rows should be evaluated (those for which the indicator <> 0) (optional)."));
}
[StorableHook(HookType.AfterDeserialization)]
private void AfterDeserialization() {
if (Parameters.ContainsKey(ApplyLinearScalingParameterName) && !(Parameters[ApplyLinearScalingParameterName] is LookupParameter))
Parameters.Remove(ApplyLinearScalingParameterName);
if (!Parameters.ContainsKey(ApplyLinearScalingParameterName))
Parameters.Add(new LookupParameter(ApplyLinearScalingParameterName, "Flag that indicates if the individual should be linearly scaled before evaluating."));
if (!Parameters.ContainsKey(ValidRowIndicatorParameterName))
Parameters.Add(new ValueLookupParameter(ValidRowIndicatorParameterName, "An indicator variable in the data set that specifies which rows should be evaluated (those for which the indicator <> 0) (optional)."));
}
protected IEnumerable GenerateRowsToEvaluate() {
return GenerateRowsToEvaluate(RelativeNumberOfEvaluatedSamplesParameter.ActualValue.Value);
}
protected IEnumerable GenerateRowsToEvaluate(double percentageOfRows) {
IEnumerable rows;
int samplesStart = EvaluationPartitionParameter.ActualValue.Start;
int samplesEnd = EvaluationPartitionParameter.ActualValue.End;
int testPartitionStart = ProblemDataParameter.ActualValue.TestPartition.Start;
int testPartitionEnd = ProblemDataParameter.ActualValue.TestPartition.End;
if (samplesEnd < samplesStart) throw new ArgumentException("Start value is larger than end value.");
if (percentageOfRows.IsAlmost(1.0))
rows = Enumerable.Range(samplesStart, samplesEnd - samplesStart);
else {
int seed = RandomParameter.ActualValue.Next();
int count = (int)((samplesEnd - samplesStart) * percentageOfRows);
if (count == 0) count = 1;
rows = RandomEnumerable.SampleRandomNumbers(seed, samplesStart, samplesEnd, count);
}
rows = rows.Where(i => i < testPartitionStart || testPartitionEnd <= i);
if (ValidRowIndicatorParameter.ActualValue != null) {
string indicatorVar = ValidRowIndicatorParameter.ActualValue.Value;
var problemData = ProblemDataParameter.ActualValue;
var indicatorRow = problemData.Dataset.GetReadOnlyDoubleValues(indicatorVar);
rows = rows.Where(r => !indicatorRow[r].IsAlmost(0.0));
}
return rows;
}
[ThreadStatic]
private static double[] cache;
protected static void CalculateWithScaling(IEnumerable targetValues, IEnumerable estimatedValues,
double lowerEstimationLimit, double upperEstimationLimit,
IOnlineCalculator calculator, int maxRows) {
if (cache == null || cache.Length < maxRows) {
cache = new double[maxRows];
}
// calculate linear scaling
int i = 0;
var linearScalingCalculator = new OnlineLinearScalingParameterCalculator();
var targetValuesEnumerator = targetValues.GetEnumerator();
var estimatedValuesEnumerator = estimatedValues.GetEnumerator();
while (targetValuesEnumerator.MoveNext() & estimatedValuesEnumerator.MoveNext()) {
double target = targetValuesEnumerator.Current;
double estimated = estimatedValuesEnumerator.Current;
cache[i] = estimated;
if (!double.IsNaN(estimated) && !double.IsInfinity(estimated))
linearScalingCalculator.Add(estimated, target);
i++;
}
if (linearScalingCalculator.ErrorState == OnlineCalculatorError.None && (targetValuesEnumerator.MoveNext() || estimatedValuesEnumerator.MoveNext()))
throw new ArgumentException("Number of elements in target and estimated values enumeration do not match.");
double alpha = linearScalingCalculator.Alpha;
double beta = linearScalingCalculator.Beta;
if (linearScalingCalculator.ErrorState != OnlineCalculatorError.None) {
alpha = 0.0;
beta = 1.0;
}
//calculate the quality by using the passed online calculator
targetValuesEnumerator = targetValues.GetEnumerator();
var scaledBoundedEstimatedValuesEnumerator = Enumerable.Range(0, i).Select(x => cache[x] * beta + alpha)
.LimitToRange(lowerEstimationLimit, upperEstimationLimit).GetEnumerator();
while (targetValuesEnumerator.MoveNext() & scaledBoundedEstimatedValuesEnumerator.MoveNext()) {
calculator.Add(targetValuesEnumerator.Current, scaledBoundedEstimatedValuesEnumerator.Current);
}
}
}
}