Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
11/15/12 16:47:25 (12 years ago)
Author:
mkommend
Message:

#1763: merged changes from trunk into the tree simplifier branch.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.TreeSimplifier/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Evaluators/SymbolicDataAnalysisEvaluator.cs

    r7259 r8915  
    4444    private const string EvaluationPartitionParameterName = "EvaluationPartition";
    4545    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
     46    private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
     47    private const string ValidRowIndicatorParameterName = "ValidRowIndicator";
    4648
    4749    public override bool CanChangeName { get { return false; } }
     
    7072      get { return (IValueLookupParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
    7173    }
     74    public ILookupParameter<BoolValue> ApplyLinearScalingParameter {
     75      get { return (ILookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
     76    }
     77    public IValueLookupParameter<StringValue> ValidRowIndicatorParameter {
     78      get { return (IValueLookupParameter<StringValue>)Parameters[ValidRowIndicatorParameterName]; }
     79    }
    7280    #endregion
    7381
     
    8795      Parameters.Add(new ValueLookupParameter<DoubleLimit>(EstimationLimitsParameterName, "The upper and lower limit that should be used as cut off value for the output values of symbolic data analysis trees."));
    8896      Parameters.Add(new ValueLookupParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index."));
     97      Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName, "Flag that indicates if the individual should be linearly scaled before evaluating."));
     98      Parameters.Add(new ValueLookupParameter<StringValue>(ValidRowIndicatorParameterName, "An indicator variable in the data set that specifies which rows should be evaluated (those for which the indicator <> 0) (optional)."));
     99    }
     100
     101    [StorableHook(HookType.AfterDeserialization)]
     102    private void AfterDeserialization() {
     103      if (Parameters.ContainsKey(ApplyLinearScalingParameterName) && !(Parameters[ApplyLinearScalingParameterName] is LookupParameter<BoolValue>))
     104        Parameters.Remove(ApplyLinearScalingParameterName);
     105      if (!Parameters.ContainsKey(ApplyLinearScalingParameterName))
     106        Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName, "Flag that indicates if the individual should be linearly scaled before evaluating."));
     107      if (!Parameters.ContainsKey(ValidRowIndicatorParameterName))
     108        Parameters.Add(new ValueLookupParameter<StringValue>(ValidRowIndicatorParameterName, "An indicator variable in the data set that specifies which rows should be evaluated (those for which the indicator <> 0) (optional)."));
    89109    }
    90110
     
    94114
    95115    protected IEnumerable<int> GenerateRowsToEvaluate(double percentageOfRows) {
    96 
    97 
    98116      IEnumerable<int> rows;
    99117      int samplesStart = EvaluationPartitionParameter.ActualValue.Start;
     
    101119      int testPartitionStart = ProblemDataParameter.ActualValue.TestPartition.Start;
    102120      int testPartitionEnd = ProblemDataParameter.ActualValue.TestPartition.End;
    103 
    104121      if (samplesEnd < samplesStart) throw new ArgumentException("Start value is larger than end value.");
    105122
     
    113130      }
    114131
    115       return rows.Where(i => i < testPartitionStart || testPartitionEnd <= i);
     132      rows = rows.Where(i => i < testPartitionStart || testPartitionEnd <= i);
     133      if (ValidRowIndicatorParameter.ActualValue != null) {
     134        string indicatorVar = ValidRowIndicatorParameter.ActualValue.Value;
     135        var problemData = ProblemDataParameter.ActualValue;
     136        var indicatorRow = problemData.Dataset.GetReadOnlyDoubleValues(indicatorVar);
     137        rows = rows.Where(r => !indicatorRow[r].IsAlmost(0.0));
     138      }
     139      return rows;
     140    }
     141
     142    [ThreadStatic]
     143    private static double[] cache;
     144    protected static void CalculateWithScaling(IEnumerable<double> targetValues, IEnumerable<double> estimatedValues,
     145      double lowerEstimationLimit, double upperEstimationLimit,
     146      IOnlineCalculator calculator, int maxRows) {
     147      if (cache == null || cache.GetLength(0) < maxRows) {
     148        cache = new double[maxRows];
     149      }
     150
     151      // calculate linear scaling
     152      // the static methods of the calculator are not used because the evaluated values should be cached for performance reasons
     153      int i = 0;
     154      var linearScalingCalculator = new OnlineLinearScalingParameterCalculator();
     155      var targetValuesEnumerator = targetValues.GetEnumerator();
     156      var estimatedValuesEnumerator = estimatedValues.GetEnumerator();
     157      while (targetValuesEnumerator.MoveNext() & estimatedValuesEnumerator.MoveNext()) {
     158        double target = targetValuesEnumerator.Current;
     159        double estimated = estimatedValuesEnumerator.Current;
     160        cache[i] = estimated;
     161        if (!double.IsNaN(estimated) && !double.IsInfinity(estimated))
     162          linearScalingCalculator.Add(estimated, target);
     163        i++;
     164      }
     165      if (linearScalingCalculator.ErrorState == OnlineCalculatorError.None && (targetValuesEnumerator.MoveNext() || estimatedValuesEnumerator.MoveNext()))
     166        throw new ArgumentException("Number of elements in target and estimated values enumeration do not match.");
     167
     168      double alpha = linearScalingCalculator.Alpha;
     169      double beta = linearScalingCalculator.Beta;
     170      if (linearScalingCalculator.ErrorState != OnlineCalculatorError.None) {
     171        alpha = 0.0;
     172        beta = 1.0;
     173      }
     174
     175      //calculate the quality by using the passed online calculator
     176      targetValuesEnumerator = targetValues.GetEnumerator();
     177      var scaledBoundedEstimatedValuesEnumerator = Enumerable.Range(0, i).Select(x => cache[x] * beta + alpha)
     178        .LimitToRange(lowerEstimationLimit, upperEstimationLimit).GetEnumerator();
     179
     180      while (targetValuesEnumerator.MoveNext() & scaledBoundedEstimatedValuesEnumerator.MoveNext()) {
     181        calculator.Add(targetValuesEnumerator.Current, scaledBoundedEstimatedValuesEnumerator.Current);
     182      }
    116183    }
    117184  }
Note: See TracChangeset for help on using the changeset viewer.