Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
04/26/10 11:05:28 (15 years ago)
Author:
gkronber
Message:

Added operator for calculation of relative variable frequencies. #938 (Data types and operators for regression problems)

File:
1 copied

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Symbolic/VariableFrequencyAnalyser.cs

    r3529 r3531  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2424using HeuristicLab.Core;
    2525using HeuristicLab.Data;
    26 using HeuristicLab.GP.Interfaces;
    2726using System;
     27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     28using HeuristicLab.Operators;
     29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
     30using HeuristicLab.Parameters;
     31using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
    2832
    29 namespace HeuristicLab.GP.StructureIdentification {
    30   /// <summary>
    31   /// Creates accumulated frequencies of variable-symbols over the whole population.
    32   /// </summary>
    33   public class VariableFrequencyAnalyser : OperatorBase {
    34     public override string Description {
    35       get {
    36         return @"Creates accumulated frequencies of variable-symbols over the whole population.";
    37       }
     33namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
     34  [Item("VariableFrequencyAnalyser", "Calculates the accumulated frequencies of variable-symbols over the whole population.")]
     35  [StorableClass]
     36  public abstract class VariableFrequencyAnalyser : SingleSuccessorOperator {
     37    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
     38    private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
     39    private const string VariableFrequenciesParameterName = "VariableFrequencies";
     40
     41    #region parameter properties
     42    public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter {
     43      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; }
    3844    }
     45    public ILookupParameter<ItemArray<SymbolicExpressionTree>> SymbolicExpressionTreeParameter {
     46      get { return (ILookupParameter<ItemArray<SymbolicExpressionTree>>)Parameters[SymbolicExpressionTreeParameterName]; }
     47    }
     48    public ILookupParameter<DoubleMatrix> VariableFrequenciesParameter {
     49      get { return (ILookupParameter<DoubleMatrix>)Parameters[VariableFrequenciesParameterName]; }
     50    }
     51    #endregion
     52    #region properties
     53    public DataAnalysisProblemData DataAnalysisProblemData {
     54      get { return DataAnalysisProblemDataParameter.ActualValue; }
     55    }
     56    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTrees {
     57      get { return SymbolicExpressionTreeParameter.ActualValue; }
     58    }
     59    public DoubleMatrix VariableFrequencies {
     60      get { return VariableFrequenciesParameter.ActualValue; }
     61      set { VariableFrequenciesParameter.ActualValue = value; }
     62    }
     63    #endregion
    3964    public VariableFrequencyAnalyser()
    4065      : base() {
    41       AddVariableInfo(new VariableInfo("InputVariables", "The input variables", typeof(ItemList), VariableKind.In));
    42       AddVariableInfo(new VariableInfo("FunctionTree", "The tree to analyse", typeof(IGeneticProgrammingModel), VariableKind.In));
    43       AddVariableInfo(new VariableInfo("VariableFrequency", "The accumulated variable-frequencies over the whole population.", typeof(ItemList<ItemList>), VariableKind.New | VariableKind.Out));
     66      Parameters.Add(new SubScopesLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees that should be analyzed."));
     67      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The problem data on which the for which the symbolic expression tree is a solution."));
     68      Parameters.Add(new LookupParameter<DoubleMatrix>(VariableFrequenciesParameterName, "The relative variable reference frequencies aggregated over the whole population."));
    4469    }
    4570
    46     public override IOperation Apply(IScope scope) {
    47       ItemList<ItemList> frequenciesList = GetVariableValue<ItemList<ItemList>>("VariableFrequency", scope, false, false);
    48       ItemList inputVariables = GetVariableValue<ItemList>("InputVariables", scope, true);
    49       if (frequenciesList == null) {
    50         frequenciesList = new ItemList<ItemList>();
    51         // first line should contain a list of variables
    52         ItemList varList = new ItemList();
    53         foreach (var inputVariable in inputVariables) {
    54           varList.Add(inputVariable);
    55         }
    56         frequenciesList.Add(varList);
    57         IVariableInfo info = GetVariableInfo("VariableFrequency");
    58         if (info.Local)
    59           AddVariable(new HeuristicLab.Core.Variable(info.ActualName, frequenciesList));
    60         else
    61           scope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(info.FormalName), frequenciesList));
     71    public override IOperation Apply() {
     72      var inputVariables = DataAnalysisProblemData.InputVariables.Select(x => x.Value);
     73      if (VariableFrequencies == null) {
     74        VariableFrequencies = new DoubleMatrix(0, 1, inputVariables);
    6275      }
    63       double[] frequencySum = new double[inputVariables.Count()];
    64       int variableNodesSum = 0;
    65       foreach (var subScope in scope.SubScopes) {
    66         IGeneticProgrammingModel gpModel = GetVariableValue<IGeneticProgrammingModel>("FunctionTree", subScope, false);
    67         var subScopeFrequencies = GetFrequencies(gpModel.FunctionTree, inputVariables);
    68         if (subScopeFrequencies.Count() != frequencySum.Length) throw new InvalidProgramException();
    69         int i = 0;
    70         foreach (var freq in subScopeFrequencies) {
    71           frequencySum[i++] += freq;
    72         }
    73         variableNodesSum += CountVariableNodes(gpModel.FunctionTree);
     76      ((IStringConvertibleMatrix)VariableFrequencies).Rows = VariableFrequencies.Rows + 1;
     77      int lastRowIndex = VariableFrequencies.Rows - 1;
     78      var columnNames = VariableFrequencies.ColumnNames.ToList();
     79      foreach (var pair in CalculateVariableFrequencies(SymbolicExpressionTrees, inputVariables)) {
     80        int columnIndex = columnNames.IndexOf(pair.Key);
     81        VariableFrequencies[lastRowIndex, columnIndex] = pair.Value;
    7482      }
    75       ItemList freqList = new ItemList();
    76       for (int i = 0; i < frequencySum.Length; i++) {
    77         freqList.Add(new DoubleData(frequencySum[i] / variableNodesSum));
    78       }
    79       frequenciesList.Add(freqList);
    8083      return null;
    8184    }
    8285
    83     private int CountVariableNodes(IFunctionTree tree) {
    84       return (from x in FunctionTreeIterator.IteratePostfix(tree)
    85               where x is VariableFunctionTree
    86               select 1).Sum();
     86    public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<SymbolicExpressionTree> trees, IEnumerable<string> inputVariables) {
     87      int totalVariableReferences = 0;
     88      Dictionary<string, double> variableReferencesSum = new Dictionary<string, double>();
     89      foreach (var inputVariable in inputVariables)
     90        variableReferencesSum[inputVariable] = 0.0;
     91      foreach (var tree in trees) {
     92        var variableReferences = GetVariableReferenceCount(tree, inputVariables);
     93        foreach (var pair in variableReferences) {
     94          variableReferencesSum[pair.Key] += pair.Value;
     95        }
     96        totalVariableReferences += GetTotalVariableReferencesCount(tree);
     97      }
     98      foreach (string inputVariable in inputVariables) {
     99        double relFreq = variableReferencesSum[inputVariable] / (double)totalVariableReferences;
     100        yield return new KeyValuePair<string, double>(inputVariable, relFreq);
     101      }
    87102    }
    88103
    89     private static IEnumerable<double> GetFrequencies(IFunctionTree tree, ItemList inputVariables) {
    90       var groupedFuns = (from node in FunctionTreeIterator.IteratePostfix(tree)
    91                          let varNode = node as VariableFunctionTree
    92                          where varNode != null
    93                          select varNode.VariableName).GroupBy(x => x);
     104    private static int GetTotalVariableReferencesCount(SymbolicExpressionTree tree) {
     105      return tree.IterateNodesPrefix().OfType<VariableTreeNode>().Count();
     106    }
    94107
    95       foreach (var inputVariable in inputVariables.Cast<StringData>()) {
     108    private static IEnumerable<KeyValuePair<string, int>> GetVariableReferenceCount(SymbolicExpressionTree tree, IEnumerable<string> inputVariables) {
     109      var groupedFuns = (from node in tree.IterateNodesPrefix().OfType<VariableTreeNode>()
     110                         select node.VariableName).GroupBy(x => x);
     111
     112      foreach (var inputVariable in inputVariables) {
    96113        var matchingFuns = from g in groupedFuns
    97                            where g.Key == inputVariable.Data
     114                           where g.Key == inputVariable
    98115                           select g.Count();
    99         if (matchingFuns.Count() == 0) yield return 0.0;
     116        if (matchingFuns.Count() == 0) yield return new KeyValuePair<string, int>(inputVariable, 0);
    100117        else {
    101           yield return matchingFuns.Single(); // / (double)gpModel.Size;
     118          yield return new KeyValuePair<string, int>(inputVariable, matchingFuns.Single());
    102119        }
    103120      }
Note: See TracChangeset for help on using the changeset viewer.