#region License Information /* HeuristicLab * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System.Collections.Generic; using System.Linq; using HeuristicLab.Core; using HeuristicLab.Data; using System; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Operators; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Parameters; using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { [Item("VariableFrequencyAnalyser", "Calculates the accumulated frequencies of variable-symbols over the whole population.")] [StorableClass] public abstract class VariableFrequencyAnalyser : SingleSuccessorOperator { private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree"; private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData"; private const string VariableFrequenciesParameterName = "VariableFrequencies"; #region parameter properties public ILookupParameter DataAnalysisProblemDataParameter { get { return (ILookupParameter)Parameters[DataAnalysisProblemDataParameterName]; } } public ILookupParameter> SymbolicExpressionTreeParameter { get { return (ILookupParameter>)Parameters[SymbolicExpressionTreeParameterName]; } } public ILookupParameter VariableFrequenciesParameter { get { return (ILookupParameter)Parameters[VariableFrequenciesParameterName]; } } #endregion #region properties public DataAnalysisProblemData DataAnalysisProblemData { get { return DataAnalysisProblemDataParameter.ActualValue; } } public ItemArray SymbolicExpressionTrees { get { return SymbolicExpressionTreeParameter.ActualValue; } } public DoubleMatrix VariableFrequencies { get { return VariableFrequenciesParameter.ActualValue; } set { VariableFrequenciesParameter.ActualValue = value; } } #endregion public VariableFrequencyAnalyser() : base() { Parameters.Add(new ScopeTreeLookupParameter(SymbolicExpressionTreeParameterName, "The symbolic expression trees that should be analyzed.")); Parameters.Add(new LookupParameter(DataAnalysisProblemDataParameterName, "The problem data on which the for which the symbolic expression tree is a solution.")); Parameters.Add(new LookupParameter(VariableFrequenciesParameterName, "The relative variable reference frequencies aggregated over the whole population.")); } public override IOperation Apply() { var inputVariables = DataAnalysisProblemData.InputVariables.Select(x => x.Value); if (VariableFrequencies == null) { VariableFrequencies = new DoubleMatrix(0, 1, inputVariables); } ((IStringConvertibleMatrix)VariableFrequencies).Rows = VariableFrequencies.Rows + 1; int lastRowIndex = VariableFrequencies.Rows - 1; var columnNames = VariableFrequencies.ColumnNames.ToList(); foreach (var pair in CalculateVariableFrequencies(SymbolicExpressionTrees, inputVariables)) { int columnIndex = columnNames.IndexOf(pair.Key); VariableFrequencies[lastRowIndex, columnIndex] = pair.Value; } return null; } public static IEnumerable> CalculateVariableFrequencies(IEnumerable trees, IEnumerable inputVariables) { int totalVariableReferences = 0; Dictionary variableReferencesSum = new Dictionary(); Dictionary variableFrequencies = new Dictionary(); foreach (var inputVariable in inputVariables) variableReferencesSum[inputVariable] = 0.0; foreach (var tree in trees) { var variableReferences = GetVariableReferenceCount(tree, inputVariables); foreach (var pair in variableReferences) { variableReferencesSum[pair.Key] += pair.Value; } totalVariableReferences += GetTotalVariableReferencesCount(tree); } foreach (string inputVariable in inputVariables) { double relFreq = variableReferencesSum[inputVariable] / (double)totalVariableReferences; variableFrequencies.Add(inputVariable, relFreq); } return variableFrequencies; } private static int GetTotalVariableReferencesCount(SymbolicExpressionTree tree) { return tree.IterateNodesPrefix().OfType().Count(); } private static IEnumerable> GetVariableReferenceCount(SymbolicExpressionTree tree, IEnumerable inputVariables) { Dictionary references = new Dictionary(); var groupedFuns = (from node in tree.IterateNodesPrefix().OfType() select node.VariableName) .GroupBy(x => x) .Select(g => new { Key = g.Key, Count = g.Count() }) .ToArray(); foreach (var inputVariable in inputVariables) { var matchingFuns = from g in groupedFuns where g.Key == inputVariable select g.Count; if (matchingFuns.Count() == 0) references.Add(inputVariable, 0); else { references.Add(inputVariable, matchingFuns.Single()); } } return references; } } }