Free cookie consent management tool by TermsFeed Policy Generator

Changeset 4034 for trunk


Ignore:
Timestamp:
07/14/10 10:45:41 (14 years ago)
Author:
mkommend
Message:

implemented first version of partially evaluation of samples (ticket #1082)

Location:
trunk/sources
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs

    r4022 r4034  
    260260          lowerEstimationLimit, upperEstimationLimit,
    261261          ProblemData.Dataset, targetVariable,
    262           validationStart, validationEnd);
     262         Enumerable.Range(validationStart, validationEnd - validationStart));
    263263
    264264        if (validationMse < bestValidationMse) {
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionTournamentPruning.cs

    r4028 r4034  
    216216      int originalSize = tree.Size;
    217217      double originalMse = SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(interpreter, tree,
    218         lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, samplesStart, samplesEnd);
     218        lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, Enumerable.Range(samplesStart, samplesEnd - samplesStart));
    219219
    220220      int minPrunedSize = (int)(originalSize * (1 - maxPruningRatio));
     
    252252
    253253            double prunedMse = SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(interpreter, clonedTree,
    254         lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, samplesStart, samplesEnd);
     254        lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, Enumerable.Range(samplesStart, samplesEnd - samplesStart));
    255255            double prunedSize = clonedTree.Size;
    256256            // MSE of the pruned tree is larger than the original tree in most cases
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionEvaluator.cs

    r3982 r4034  
    4040  [StorableClass]
    4141  public abstract class SymbolicRegressionEvaluator : SingleSuccessorOperator, ISymbolicRegressionEvaluator {
     42    private const string RandomParameterName = "Random";
    4243    private const string QualityParameterName = "Quality";
    4344    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
     
    4647    private const string SamplesStartParameterName = "SamplesStart";
    4748    private const string SamplesEndParameterName = "SamplesEnd";
     49    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
    4850    #region ISymbolicRegressionEvaluator Members
    4951
     
    7274    }
    7375
     76    public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
     77      get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
     78    }
     79
     80    public ILookupParameter<IRandom> RandomParameter {
     81      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
     82    }
     83
    7484    #endregion
    7585    #region properties
     86    public IRandom Random {
     87      get { return RandomParameter.ActualValue; }
     88    }
    7689    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
    7790      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
     
    89102      get { return SamplesEndParameter.ActualValue; }
    90103    }
     104
     105    public PercentValue RelativeNumberOfEvaluatedSamples {
     106      get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
     107    }
    91108    #endregion
    92109
    93110    public SymbolicRegressionEvaluator()
    94111      : base() {
     112      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use."));
    95113      Parameters.Add(new LookupParameter<DoubleValue>(QualityParameterName, "The quality of the evaluated symbolic regression solution."));
    96114      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used to calculate the output values of the symbolic expression tree."));
     
    99117      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesStartParameterName, "The start index of the dataset partition on which the symbolic regression solution should be evaluated."));
    100118      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesEndParameterName, "The end index of the dataset partition on which the symbolic regression solution should be evaluated."));
     119      Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
     120    }
     121
     122    [StorableConstructor]
     123    protected SymbolicRegressionEvaluator(bool deserializing) : base(deserializing) { }
     124    [StorableHook(Persistence.Default.CompositeSerializers.Storable.HookType.AfterDeserialization)]
     125    private void AfterDeserialization() {
     126      if (!Parameters.ContainsKey(RelativeNumberOfEvaluatedSamplesParameterName))
     127        Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
     128      if (!Parameters.ContainsKey(RandomParameterName))
     129        Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use."));
    101130    }
    102131
    103132    public override IOperation Apply() {
    104       QualityParameter.ActualValue = new DoubleValue(Evaluate(SymbolicExpressionTreeInterpreter, SymbolicExpressionTree, RegressionProblemData.Dataset,
    105         RegressionProblemData.TargetVariable, SamplesStart, SamplesEnd));
     133      IEnumerable<int> rows = GenerateRowsToEvaluate(RelativeNumberOfEvaluatedSamples.Value, SamplesStart.Value, SamplesEnd.Value);
     134      double quality = Evaluate(SymbolicExpressionTreeInterpreter, SymbolicExpressionTree, RegressionProblemData.Dataset,
     135        RegressionProblemData.TargetVariable, rows);
     136      QualityParameter.ActualValue = new DoubleValue(quality);
    106137      return base.Apply();
     138    }
     139
     140
     141    //algorithm taken from progamming pearls page 127
     142    private IEnumerable<int> GenerateRowsToEvaluate(double relativeAmount, int start, int end) {
     143      int count = (int)((end - start) * relativeAmount);
     144      if (count == 0) count = 1;
     145
     146      int remaining = end - start;
     147      for (int i = start; i < end && count > 0; i++) {
     148        double probabilty = Random.NextDouble();
     149        if (probabilty < ((double)count) / remaining) {
     150          count--;
     151          yield return i;
     152        }
     153        remaining--;
     154      }
    107155    }
    108156
     
    111159      Dataset dataset,
    112160      StringValue targetVariable,
    113       IntValue samplesStart, IntValue samplesEnd);
     161      IEnumerable<int> rows);
    114162  }
    115163}
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionMeanSquaredErrorEvaluator.cs

    r3996 r4034  
    6666    }
    6767
    68     protected override double Evaluate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, Dataset dataset, StringValue targetVariable, IntValue samplesStart, IntValue samplesEnd) {
    69       double mse = Calculate(interpreter, solution, LowerEstimationLimit.Value, UpperEstimationLimit.Value, dataset, targetVariable.Value, samplesStart.Value, samplesEnd.Value);
     68    protected override double Evaluate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, Dataset dataset, StringValue targetVariable, IEnumerable<int> rows) {
     69      double mse = Calculate(interpreter, solution, LowerEstimationLimit.Value, UpperEstimationLimit.Value, dataset, targetVariable.Value, rows);
    7070      return mse;
    7171    }
    7272
    73     public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, int start, int end) {
    74       IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start));
    75       IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, start, end);
     73    public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows) {
     74      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, rows);
     75      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, rows);
    7676      IEnumerator<double> originalEnumerator = originalValues.GetEnumerator();
    7777      IEnumerator<double> estimatedEnumerator = estimatedValues.GetEnumerator();
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionScaledMeanSquaredErrorEvaluator.cs

    r4027 r4034  
    6666    }
    6767
    68     protected override double Evaluate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, Dataset dataset, StringValue targetVariable, IntValue samplesStart, IntValue samplesEnd) {
     68    protected override double Evaluate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, Dataset dataset, StringValue targetVariable, IEnumerable<int> rows) {
    6969      double alpha, beta;
    70       double mse = Calculate(interpreter, solution, LowerEstimationLimit.Value, UpperEstimationLimit.Value, dataset, targetVariable.Value, samplesStart.Value, samplesEnd.Value, out beta, out alpha);
     70      double mse = Calculate(interpreter, solution, LowerEstimationLimit.Value, UpperEstimationLimit.Value, dataset, targetVariable.Value, rows, out beta, out alpha);
    7171      AlphaParameter.ActualValue = new DoubleValue(alpha);
    7272      BetaParameter.ActualValue = new DoubleValue(beta);
     
    7474    }
    7575
    76     public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, int start, int end, out double beta, out double alpha) {
    77       IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, start, end);
    78       IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start));
     76    public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, out double beta, out double alpha) {
     77      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable,rows);
     78      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, rows);
    7979      CalculateScalingParameters(originalValues, estimatedValues, out beta, out alpha);
    8080
    81       return CalculateWithScaling(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, start, end, beta, alpha);
     81      return CalculateWithScaling(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, dataset, targetVariable, rows, beta, alpha);
    8282    }
    8383
    84     public static double CalculateWithScaling(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, int start, int end, double beta, double alpha) {
    85       IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start));
    86       IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, start, end);
     84    public static double CalculateWithScaling(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, IEnumerable<int> rows, double beta, double alpha) {
     85      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, rows);
     86      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, rows);
    8787      IEnumerator<double> originalEnumerator = originalValues.GetEnumerator();
    8888      IEnumerator<double> estimatedEnumerator = estimatedValues.GetEnumerator();
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Views/3.3/Symbolic/InteractiveSymbolicRegressionSolutionSimplifierView.cs

    r3985 r4034  
    9191        root.AddSubTree(start);
    9292        start.AddSubTree(simplifiedExpressionTree.Root);
     93        int samplesStart = Content.ProblemData.TrainingSamplesStart.Value;
     94        int samplesEnd = Content.ProblemData.TrainingSamplesEnd.Value;
    9395        double originalTrainingMeanSquaredError = SymbolicRegressionMeanSquaredErrorEvaluator.Calculate(
    9496            Content.Model.Interpreter, new SymbolicExpressionTree(root), Content.LowerEstimationLimit, Content.UpperEstimationLimit,
    9597            Content.ProblemData.Dataset, Content.ProblemData.TargetVariable.Value,
    96             Content.ProblemData.TrainingSamplesStart.Value, Content.ProblemData.TrainingSamplesEnd.Value);
     98            Enumerable.Range(samplesStart, samplesEnd - samplesStart));
    9799
    98100        this.CalculateReplacementNodes();
     
    112114      SymbolicExpressionTree tree = new SymbolicExpressionTree(root);
    113115      foreach (SymbolicExpressionTreeNode node in this.simplifiedExpressionTree.IterateNodesPrefix()) {
    114         while(start.SubTrees.Count > 0) start.RemoveSubTree(0);
     116        while (start.SubTrees.Count > 0) start.RemoveSubTree(0);
    115117        start.AddSubTree(node);
    116118        double constantTreeNodeValue = interpreter.GetSymbolicExpressionTreeValues(tree, Content.ProblemData.Dataset, trainingSamples).Median();
     
    123125      foreach (SymbolicExpressionTreeNode childNode in currentTreeNode.SubTrees.ToList()) {
    124126        SwitchNode(currentTreeNode, childNode, replacementNodes[childNode]);
     127        int samplesStart = Content.ProblemData.TrainingSamplesStart.Value;
     128        int samplesEnd = Content.ProblemData.TrainingSamplesEnd.Value;
    125129        double newTrainingMeanSquaredError = SymbolicRegressionMeanSquaredErrorEvaluator.Calculate(
    126130          Content.Model.Interpreter, tree,
    127131          Content.LowerEstimationLimit, Content.UpperEstimationLimit,
    128132          Content.ProblemData.Dataset, Content.ProblemData.TargetVariable.Value,
    129           Content.ProblemData.TrainingSamplesStart.Value, Content.ProblemData.TrainingSamplesEnd.Value);
     133          Enumerable.Range(samplesStart, samplesEnd - samplesStart));
    130134        nodeImpacts[childNode] = newTrainingMeanSquaredError / originalTrainingMeanSquaredError;
    131135        SwitchNode(currentTreeNode, replacementNodes[childNode], childNode);
Note: See TracChangeset for help on using the changeset viewer.