#region License Information /* HeuristicLab * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Operators; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Random; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { [StorableClass] public abstract class SymbolicDataAnalysisEvaluator : InstrumentedOperator, ISymbolicDataAnalysisEvaluator, ISymbolicDataAnalysisInterpreterOperator, ISymbolicDataAnalysisBoundedOperator, IStochasticOperator where T : class, IDataAnalysisProblemData { private const string RandomParameterName = "Random"; private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree"; private const string SymbolicDataAnalysisTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter"; private const string ProblemDataParameterName = "ProblemData"; private const string EstimationLimitsParameterName = "EstimationLimits"; private const string EvaluationPartitionParameterName = "EvaluationPartition"; private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples"; private const string ApplyLinearScalingParameterName = "ApplyLinearScaling"; private const string ValidRowIndicatorParameterName = "ValidRowIndicator"; public override bool CanChangeName { get { return false; } } #region parameter properties ILookupParameter IStochasticOperator.RandomParameter { get { return RandomParameter; } } public IValueLookupParameter RandomParameter { get { return (IValueLookupParameter)Parameters[RandomParameterName]; } } public ILookupParameter SymbolicExpressionTreeParameter { get { return (ILookupParameter)Parameters[SymbolicExpressionTreeParameterName]; } } public ILookupParameter SymbolicDataAnalysisTreeInterpreterParameter { get { return (ILookupParameter)Parameters[SymbolicDataAnalysisTreeInterpreterParameterName]; } } public IValueLookupParameter ProblemDataParameter { get { return (IValueLookupParameter)Parameters[ProblemDataParameterName]; } } public IValueLookupParameter EvaluationPartitionParameter { get { return (IValueLookupParameter)Parameters[EvaluationPartitionParameterName]; } } public IValueLookupParameter EstimationLimitsParameter { get { return (IValueLookupParameter)Parameters[EstimationLimitsParameterName]; } } public IValueLookupParameter RelativeNumberOfEvaluatedSamplesParameter { get { return (IValueLookupParameter)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; } } public ILookupParameter ApplyLinearScalingParameter { get { return (ILookupParameter)Parameters[ApplyLinearScalingParameterName]; } } public IValueLookupParameter ValidRowIndicatorParameter { get { return (IValueLookupParameter)Parameters[ValidRowIndicatorParameterName]; } } #endregion [StorableConstructor] protected SymbolicDataAnalysisEvaluator(bool deserializing) : base(deserializing) { } protected SymbolicDataAnalysisEvaluator(SymbolicDataAnalysisEvaluator original, Cloner cloner) : base(original, cloner) { } public SymbolicDataAnalysisEvaluator() : base() { Parameters.Add(new ValueLookupParameter(RandomParameterName, "The random generator to use.")); Parameters.Add(new LookupParameter(SymbolicDataAnalysisTreeInterpreterParameterName, "The interpreter that should be used to calculate the output values of the symbolic data analysis tree.")); Parameters.Add(new LookupParameter(SymbolicExpressionTreeParameterName, "The symbolic data analysis solution encoded as a symbolic expression tree.")); Parameters.Add(new ValueLookupParameter(ProblemDataParameterName, "The problem data on which the symbolic data analysis solution should be evaluated.")); Parameters.Add(new ValueLookupParameter(EvaluationPartitionParameterName, "The start index of the dataset partition on which the symbolic data analysis solution should be evaluated.")); Parameters.Add(new ValueLookupParameter(EstimationLimitsParameterName, "The upper and lower limit that should be used as cut off value for the output values of symbolic data analysis trees.")); Parameters.Add(new ValueLookupParameter(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.")); Parameters.Add(new LookupParameter(ApplyLinearScalingParameterName, "Flag that indicates if the individual should be linearly scaled before evaluating.")); Parameters.Add(new ValueLookupParameter(ValidRowIndicatorParameterName, "An indicator variable in the data set that specifies which rows should be evaluated (those for which the indicator <> 0) (optional).")); } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { if (Parameters.ContainsKey(ApplyLinearScalingParameterName) && !(Parameters[ApplyLinearScalingParameterName] is LookupParameter)) Parameters.Remove(ApplyLinearScalingParameterName); if (!Parameters.ContainsKey(ApplyLinearScalingParameterName)) Parameters.Add(new LookupParameter(ApplyLinearScalingParameterName, "Flag that indicates if the individual should be linearly scaled before evaluating.")); if (!Parameters.ContainsKey(ValidRowIndicatorParameterName)) Parameters.Add(new ValueLookupParameter(ValidRowIndicatorParameterName, "An indicator variable in the data set that specifies which rows should be evaluated (those for which the indicator <> 0) (optional).")); } protected IEnumerable GenerateRowsToEvaluate() { return GenerateRowsToEvaluate(RelativeNumberOfEvaluatedSamplesParameter.ActualValue.Value); } protected IEnumerable GenerateRowsToEvaluate(double percentageOfRows) { IEnumerable rows; int samplesStart = EvaluationPartitionParameter.ActualValue.Start; int samplesEnd = EvaluationPartitionParameter.ActualValue.End; int testPartitionStart = ProblemDataParameter.ActualValue.TestPartition.Start; int testPartitionEnd = ProblemDataParameter.ActualValue.TestPartition.End; if (samplesEnd < samplesStart) throw new ArgumentException("Start value is larger than end value."); if (percentageOfRows.IsAlmost(1.0)) rows = Enumerable.Range(samplesStart, samplesEnd - samplesStart); else { int seed = RandomParameter.ActualValue.Next(); int count = (int)((samplesEnd - samplesStart) * percentageOfRows); if (count == 0) count = 1; rows = RandomEnumerable.SampleRandomNumbers(seed, samplesStart, samplesEnd, count); } rows = rows.Where(i => i < testPartitionStart || testPartitionEnd <= i); if (ValidRowIndicatorParameter.ActualValue != null) { string indicatorVar = ValidRowIndicatorParameter.ActualValue.Value; var problemData = ProblemDataParameter.ActualValue; var indicatorRow = problemData.Dataset.GetReadOnlyDoubleValues(indicatorVar); rows = rows.Where(r => !indicatorRow[r].IsAlmost(0.0)); } return rows; } [ThreadStatic] private static double[] cache; protected static void CalculateWithScaling(IEnumerable targetValues, IEnumerable estimatedValues, double lowerEstimationLimit, double upperEstimationLimit, IOnlineCalculator calculator, int maxRows) { if (cache == null || cache.Length < maxRows) { cache = new double[maxRows]; } // calculate linear scaling int i = 0; var linearScalingCalculator = new OnlineLinearScalingParameterCalculator(); var targetValuesEnumerator = targetValues.GetEnumerator(); var estimatedValuesEnumerator = estimatedValues.GetEnumerator(); while (targetValuesEnumerator.MoveNext() & estimatedValuesEnumerator.MoveNext()) { double target = targetValuesEnumerator.Current; double estimated = estimatedValuesEnumerator.Current; cache[i] = estimated; if (!double.IsNaN(estimated) && !double.IsInfinity(estimated)) linearScalingCalculator.Add(estimated, target); i++; } if (linearScalingCalculator.ErrorState == OnlineCalculatorError.None && (targetValuesEnumerator.MoveNext() || estimatedValuesEnumerator.MoveNext())) throw new ArgumentException("Number of elements in target and estimated values enumeration do not match."); double alpha = linearScalingCalculator.Alpha; double beta = linearScalingCalculator.Beta; if (linearScalingCalculator.ErrorState != OnlineCalculatorError.None) { alpha = 0.0; beta = 1.0; } //calculate the quality by using the passed online calculator targetValuesEnumerator = targetValues.GetEnumerator(); var scaledBoundedEstimatedValuesEnumerator = Enumerable.Range(0, i).Select(x => cache[x] * beta + alpha) .LimitToRange(lowerEstimationLimit, upperEstimationLimit).GetEnumerator(); while (targetValuesEnumerator.MoveNext() & scaledBoundedEstimatedValuesEnumerator.MoveNext()) { calculator.Add(targetValuesEnumerator.Current, scaledBoundedEstimatedValuesEnumerator.Current); } } } }