Free cookie consent management tool by TermsFeed Policy Generator

Changeset 5971


Ignore:
Timestamp:
04/07/11 07:49:15 (13 years ago)
Author:
gkronber
Message:

#1418 added a parameter to the symbol frequency analyzer in symbolic expression tree encoding which indicates if the frequencies of symbols with the same name but different number of subtrees should be aggregated.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Encodings.SymbolicExpressionTreeEncoding/3.4/Analyzers/SymbolicExpressionSymbolFrequencyAnalyzer.cs

    r5809 r5971  
    2929using HeuristicLab.Parameters;
    3030using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     31using HeuristicLab.Data;
    3132
    3233namespace HeuristicLab.Encodings.SymbolicExpressionTreeEncoding {
     
    4041    private const string ResultsParameterName = "Results";
    4142    private const string SymbolFrequenciesParameterName = "SymbolFrequencies";
     43    private const string AggregateSymbolsWithDifferentSubtreeCountParameterName = "AggregateSymbolsWithDifferentSubtreeCount";
    4244
    4345    #region parameter properties
     
    5153      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
    5254    }
     55    public IValueParameter<BoolValue> AggregateSymbolsWithDifferentSubtreeCountParameter {
     56      get { return (IValueParameter<BoolValue>)Parameters[AggregateSymbolsWithDifferentSubtreeCountParameterName]; }
     57    }
    5358    #endregion
    5459    #region properties
    55     public DataTable SymbolFrequencies {
    56       get { return SymbolFrequenciesParameter.ActualValue; }
    57       set { SymbolFrequenciesParameter.ActualValue = value; }
     60    public BoolValue AggregrateSymbolsWithDifferentSubtreeCount {
     61      get { return AggregateSymbolsWithDifferentSubtreeCountParameter.Value; }
     62      set { AggregateSymbolsWithDifferentSubtreeCountParameter.Value = value; }
    5863    }
    5964    #endregion
     
    6570      : base() {
    6671      Parameters.Add(new ScopeTreeLookupParameter<ISymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
    67       Parameters.Add(new ValueLookupParameter<DataTable>(SymbolFrequenciesParameterName, "The data table to store the symbol frequencies."));
     72      Parameters.Add(new LookupParameter<DataTable>(SymbolFrequenciesParameterName, "The data table to store the symbol frequencies."));
    6873      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the symbol frequencies should be stored."));
     74      Parameters.Add(new ValueParameter<BoolValue>(AggregateSymbolsWithDifferentSubtreeCountParameterName, "Flag that indicates if the frequencies of symbols with the same name but different number of sub-trees should be aggregated.", new BoolValue(true)));
    6975    }
    7076    public override IDeepCloneable Clone(Cloner cloner) {
     
    7581      ItemArray<ISymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
    7682      ResultCollection results = ResultsParameter.ActualValue;
     83      DataTable symbolFrequencies = SymbolFrequenciesParameter.ActualValue;
     84      if (symbolFrequencies == null) {
     85        symbolFrequencies = new DataTable("Symbol frequencies", "Relative frequency of symbols aggregated over the whole population.");
     86        symbolFrequencies.VisualProperties.YAxisTitle = "Relative Symbol Frequency";
    7787
    78       if (SymbolFrequencies == null) {
    79         SymbolFrequencies = new DataTable("Symbol frequencies", "Relative frequency of symbols aggregated over the whole population.");
    80         SymbolFrequencies.VisualProperties.YAxisTitle = "Relative Symbol Frequency";
    81         results.Add(new Result("Symbol frequencies", SymbolFrequencies));
     88        SymbolFrequenciesParameter.ActualValue = symbolFrequencies;
     89        results.Add(new Result("Symbol frequencies", symbolFrequencies));
    8290      }
    8391
    8492      // all rows must have the same number of values so we can just take the first
    85       int numberOfValues = SymbolFrequencies.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First();
     93      int numberOfValues = symbolFrequencies.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First();
    8694
    87       foreach (var pair in SymbolicExpressionSymbolFrequencyAnalyzer.CalculateSymbolFrequencies(expressions)) {
    88         if (!SymbolFrequencies.Rows.ContainsKey(pair.Key)) {
     95      foreach (var pair in SymbolicExpressionSymbolFrequencyAnalyzer.CalculateSymbolFrequencies(expressions, AggregrateSymbolsWithDifferentSubtreeCount.Value)) {
     96        if (!symbolFrequencies.Rows.ContainsKey(pair.Key)) {
    8997          // initialize a new row for the symbol and pad with zeros
    9098          DataRow row = new DataRow(pair.Key, "", Enumerable.Repeat(0.0, numberOfValues));
    9199          row.VisualProperties.StartIndexZero = true;
    92           SymbolFrequencies.Rows.Add(row);
     100          symbolFrequencies.Rows.Add(row);
    93101        }
    94         SymbolFrequencies.Rows[pair.Key].Values.Add(pair.Value);
     102        symbolFrequencies.Rows[pair.Key].Values.Add(pair.Value);
    95103      }
    96104
    97105      // add a zero for each data row that was not modified in the previous loop
    98       foreach (var row in SymbolFrequencies.Rows.Where(r => r.Values.Count != numberOfValues + 1))
     106      foreach (var row in symbolFrequencies.Rows.Where(r => r.Values.Count != numberOfValues + 1))
    99107        row.Values.Add(0.0);
    100108
     
    102110    }
    103111
    104     public static IEnumerable<KeyValuePair<string, double>> CalculateSymbolFrequencies(IEnumerable<ISymbolicExpressionTree> trees) {
     112    public static IEnumerable<KeyValuePair<string, double>> CalculateSymbolFrequencies(IEnumerable<ISymbolicExpressionTree> trees, bool aggregateDifferentNumberOfSubtrees = true) {
    105113      Dictionary<string, double> symbolFrequencies = new Dictionary<string, double>();
    106114      int totalNumberOfSymbols = 0;
     
    108116      foreach (var tree in trees) {
    109117        foreach (var node in tree.IterateNodesPrefix()) {
    110           if (symbolFrequencies.ContainsKey(node.Symbol.Name)) symbolFrequencies[node.Symbol.Name] += 1;
    111           else symbolFrequencies.Add(node.Symbol.Name, 1);
     118          string symbolName;
     119          if (aggregateDifferentNumberOfSubtrees) symbolName = node.Symbol.Name;
     120          else symbolName = node.Symbol.Name + "-" + node.SubtreesCount;
     121          if (symbolFrequencies.ContainsKey(symbolName)) symbolFrequencies[symbolName] += 1;
     122          else symbolFrequencies.Add(symbolName, 1);
    112123          totalNumberOfSymbols++;
    113124        }
Note: See TracChangeset for help on using the changeset viewer.