Changeset 3531


Ignore:
Timestamp:
04/26/10 11:05:28 (12 years ago)
Author:
gkronber
Message:

Added operator for calculation of relative variable frequencies. #938 (Data types and operators for regression problems)

Location:
trunk/sources
Files:
3 edited
1 copied

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/HeuristicLab.Problems.DataAnalysis.Regression-3.3.csproj

    r3442 r3531  
    103103  </ItemGroup>
    104104  <ItemGroup>
     105    <ProjectReference Include="..\..\HeuristicLab.Analysis\3.3\HeuristicLab.Analysis-3.3.csproj">
     106      <Project>{887425B4-4348-49ED-A457-B7D2C26DDBF9}</Project>
     107      <Name>HeuristicLab.Analysis-3.3</Name>
     108    </ProjectReference>
    105109    <ProjectReference Include="..\..\HeuristicLab.Collections\3.3\HeuristicLab.Collections-3.3.csproj">
    106110      <Project>{958B43BC-CC5C-4FA2-8628-2B3B01D890B6}</Project>
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/BestValidationSymbolicRegressionSolutionVisualizer.cs

    r3513 r3531  
    3131using HeuristicLab.Problems.DataAnalysis.Evaluators;
    3232using HeuristicLab.Problems.DataAnalysis.Symbolic;
     33using System.Collections.Generic;
     34using HeuristicLab.Analysis;
    3335
    3436namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
     
    4951    private const string QualityParameterName = "Quality";
    5052    private const string ResultsParameterName = "Results";
     53    private const string VariableFrequenciesParameterName = "VariableFrequencies";
    5154
    5255    #region parameter properties
     
    8790      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
    8891    }
     92    public ILookupParameter<DataTable> VariableFrequenciesParameter {
     93      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
     94    }
     95
    8996    #endregion
    9097
     
    104111    public IntValue ValidationSamplesEnd {
    105112      get { return ValidationSamplesEndParameter.ActualValue; }
     113    }
     114    public DataTable VariableFrequencies {
     115      get { return VariableFrequenciesParameter.ActualValue; }
     116      set { VariableFrequenciesParameter.ActualValue = value; }
    106117    }
    107118    #endregion
     
    118129      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The end index of the validation partition (part of the training partition)."));
    119130      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestValidationSolutionParameterName, "The best symbolic expression tree based on the validation data for the symbolic regression problem."));
     131      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The relative variable reference frequencies aggregated over the whole population."));
    120132      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection of the algorithm."));
    121133    }
     
    124136      ItemArray<SymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
    125137      DataAnalysisProblemData problemData = DataAnalysisProblemDataParameter.ActualValue;
    126 
     138      #region update variable frequencies
     139      var inputVariables = problemData.InputVariables.Select(x => x.Value);
     140      if (VariableFrequencies == null) {
     141        VariableFrequencies = new DataTable("Variable Frequencies", "Relative frequency of variable references aggregated over the whole population.");
     142        AddResult("VariableFrequencies", VariableFrequencies);
     143        // add a data row for each input variable
     144        foreach (var inputVariable in inputVariables)
     145          VariableFrequencies.Rows.Add(new DataRow(inputVariable));
     146      }
     147      foreach (var pair in VariableFrequencyAnalyser.CalculateVariableFrequencies(expressions, inputVariables)) {
     148        VariableFrequencies.Rows[pair.Key].Values.Add(pair.Value);
     149      }
     150      #endregion
     151
     152      #region determination of validation-best solution
    127153      int validationSamplesStart = ValidationSamplesStart.Value;
    128154      int validationSamplesEnd = ValidationSamplesEnd.Value;
     
    142168
    143169      SymbolicRegressionSolution bestOfRunSolution = BestValidationSolutionParameter.ActualValue;
     170      #endregion
     171      #region update of validation-best solution
    144172      if (bestOfRunSolution == null) {
    145173        // no best of run solution yet -> make a solution from the currentBestExpression
     
    153181        }
    154182      }
    155 
    156 
     183      #endregion
    157184      return base.Apply();
    158185    }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/HeuristicLab.Problems.DataAnalysis-3.3.csproj

    r3462 r3531  
    112112    <Compile Include="Symbolic\Symbols\Variable.cs" />
    113113    <Compile Include="Symbolic\Symbols\VariableTreeNode.cs" />
     114    <Compile Include="Symbolic\VariableFrequencyAnalyser.cs" />
    114115  </ItemGroup>
    115116  <ItemGroup>
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Symbolic/VariableFrequencyAnalyser.cs

    r3529 r3531  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2424using HeuristicLab.Core;
    2525using HeuristicLab.Data;
    26 using HeuristicLab.GP.Interfaces;
    2726using System;
     27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     28using HeuristicLab.Operators;
     29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
     30using HeuristicLab.Parameters;
     31using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
    2832
    29 namespace HeuristicLab.GP.StructureIdentification {
    30   /// <summary>
    31   /// Creates accumulated frequencies of variable-symbols over the whole population.
    32   /// </summary>
    33   public class VariableFrequencyAnalyser : OperatorBase {
    34     public override string Description {
    35       get {
    36         return @"Creates accumulated frequencies of variable-symbols over the whole population.";
    37       }
     33namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
     34  [Item("VariableFrequencyAnalyser", "Calculates the accumulated frequencies of variable-symbols over the whole population.")]
     35  [StorableClass]
     36  public abstract class VariableFrequencyAnalyser : SingleSuccessorOperator {
     37    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
     38    private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
     39    private const string VariableFrequenciesParameterName = "VariableFrequencies";
     40
     41    #region parameter properties
     42    public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter {
     43      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; }
    3844    }
     45    public ILookupParameter<ItemArray<SymbolicExpressionTree>> SymbolicExpressionTreeParameter {
     46      get { return (ILookupParameter<ItemArray<SymbolicExpressionTree>>)Parameters[SymbolicExpressionTreeParameterName]; }
     47    }
     48    public ILookupParameter<DoubleMatrix> VariableFrequenciesParameter {
     49      get { return (ILookupParameter<DoubleMatrix>)Parameters[VariableFrequenciesParameterName]; }
     50    }
     51    #endregion
     52    #region properties
     53    public DataAnalysisProblemData DataAnalysisProblemData {
     54      get { return DataAnalysisProblemDataParameter.ActualValue; }
     55    }
     56    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTrees {
     57      get { return SymbolicExpressionTreeParameter.ActualValue; }
     58    }
     59    public DoubleMatrix VariableFrequencies {
     60      get { return VariableFrequenciesParameter.ActualValue; }
     61      set { VariableFrequenciesParameter.ActualValue = value; }
     62    }
     63    #endregion
    3964    public VariableFrequencyAnalyser()
    4065      : base() {
    41       AddVariableInfo(new VariableInfo("InputVariables", "The input variables", typeof(ItemList), VariableKind.In));
    42       AddVariableInfo(new VariableInfo("FunctionTree", "The tree to analyse", typeof(IGeneticProgrammingModel), VariableKind.In));
    43       AddVariableInfo(new VariableInfo("VariableFrequency", "The accumulated variable-frequencies over the whole population.", typeof(ItemList<ItemList>), VariableKind.New | VariableKind.Out));
     66      Parameters.Add(new SubScopesLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees that should be analyzed."));
     67      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The problem data on which the for which the symbolic expression tree is a solution."));
     68      Parameters.Add(new LookupParameter<DoubleMatrix>(VariableFrequenciesParameterName, "The relative variable reference frequencies aggregated over the whole population."));
    4469    }
    4570
    46     public override IOperation Apply(IScope scope) {
    47       ItemList<ItemList> frequenciesList = GetVariableValue<ItemList<ItemList>>("VariableFrequency", scope, false, false);
    48       ItemList inputVariables = GetVariableValue<ItemList>("InputVariables", scope, true);
    49       if (frequenciesList == null) {
    50         frequenciesList = new ItemList<ItemList>();
    51         // first line should contain a list of variables
    52         ItemList varList = new ItemList();
    53         foreach (var inputVariable in inputVariables) {
    54           varList.Add(inputVariable);
    55         }
    56         frequenciesList.Add(varList);
    57         IVariableInfo info = GetVariableInfo("VariableFrequency");
    58         if (info.Local)
    59           AddVariable(new HeuristicLab.Core.Variable(info.ActualName, frequenciesList));
    60         else
    61           scope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(info.FormalName), frequenciesList));
     71    public override IOperation Apply() {
     72      var inputVariables = DataAnalysisProblemData.InputVariables.Select(x => x.Value);
     73      if (VariableFrequencies == null) {
     74        VariableFrequencies = new DoubleMatrix(0, 1, inputVariables);
    6275      }
    63       double[] frequencySum = new double[inputVariables.Count()];
    64       int variableNodesSum = 0;
    65       foreach (var subScope in scope.SubScopes) {
    66         IGeneticProgrammingModel gpModel = GetVariableValue<IGeneticProgrammingModel>("FunctionTree", subScope, false);
    67         var subScopeFrequencies = GetFrequencies(gpModel.FunctionTree, inputVariables);
    68         if (subScopeFrequencies.Count() != frequencySum.Length) throw new InvalidProgramException();
    69         int i = 0;
    70         foreach (var freq in subScopeFrequencies) {
    71           frequencySum[i++] += freq;
    72         }
    73         variableNodesSum += CountVariableNodes(gpModel.FunctionTree);
     76      ((IStringConvertibleMatrix)VariableFrequencies).Rows = VariableFrequencies.Rows + 1;
     77      int lastRowIndex = VariableFrequencies.Rows - 1;
     78      var columnNames = VariableFrequencies.ColumnNames.ToList();
     79      foreach (var pair in CalculateVariableFrequencies(SymbolicExpressionTrees, inputVariables)) {
     80        int columnIndex = columnNames.IndexOf(pair.Key);
     81        VariableFrequencies[lastRowIndex, columnIndex] = pair.Value;
    7482      }
    75       ItemList freqList = new ItemList();
    76       for (int i = 0; i < frequencySum.Length; i++) {
    77         freqList.Add(new DoubleData(frequencySum[i] / variableNodesSum));
    78       }
    79       frequenciesList.Add(freqList);
    8083      return null;
    8184    }
    8285
    83     private int CountVariableNodes(IFunctionTree tree) {
    84       return (from x in FunctionTreeIterator.IteratePostfix(tree)
    85               where x is VariableFunctionTree
    86               select 1).Sum();
     86    public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<SymbolicExpressionTree> trees, IEnumerable<string> inputVariables) {
     87      int totalVariableReferences = 0;
     88      Dictionary<string, double> variableReferencesSum = new Dictionary<string, double>();
     89      foreach (var inputVariable in inputVariables)
     90        variableReferencesSum[inputVariable] = 0.0;
     91      foreach (var tree in trees) {
     92        var variableReferences = GetVariableReferenceCount(tree, inputVariables);
     93        foreach (var pair in variableReferences) {
     94          variableReferencesSum[pair.Key] += pair.Value;
     95        }
     96        totalVariableReferences += GetTotalVariableReferencesCount(tree);
     97      }
     98      foreach (string inputVariable in inputVariables) {
     99        double relFreq = variableReferencesSum[inputVariable] / (double)totalVariableReferences;
     100        yield return new KeyValuePair<string, double>(inputVariable, relFreq);
     101      }
    87102    }
    88103
    89     private static IEnumerable<double> GetFrequencies(IFunctionTree tree, ItemList inputVariables) {
    90       var groupedFuns = (from node in FunctionTreeIterator.IteratePostfix(tree)
    91                          let varNode = node as VariableFunctionTree
    92                          where varNode != null
    93                          select varNode.VariableName).GroupBy(x => x);
     104    private static int GetTotalVariableReferencesCount(SymbolicExpressionTree tree) {
     105      return tree.IterateNodesPrefix().OfType<VariableTreeNode>().Count();
     106    }
    94107
    95       foreach (var inputVariable in inputVariables.Cast<StringData>()) {
     108    private static IEnumerable<KeyValuePair<string, int>> GetVariableReferenceCount(SymbolicExpressionTree tree, IEnumerable<string> inputVariables) {
     109      var groupedFuns = (from node in tree.IterateNodesPrefix().OfType<VariableTreeNode>()
     110                         select node.VariableName).GroupBy(x => x);
     111
     112      foreach (var inputVariable in inputVariables) {
    96113        var matchingFuns = from g in groupedFuns
    97                            where g.Key == inputVariable.Data
     114                           where g.Key == inputVariable
    98115                           select g.Count();
    99         if (matchingFuns.Count() == 0) yield return 0.0;
     116        if (matchingFuns.Count() == 0) yield return new KeyValuePair<string, int>(inputVariable, 0);
    100117        else {
    101           yield return matchingFuns.Single(); // / (double)gpModel.Size;
     118          yield return new KeyValuePair<string, int>(inputVariable, matchingFuns.Single());
    102119        }
    103120      }
Note: See TracChangeset for help on using the changeset viewer.