#region License Information /* HeuristicLab * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using DataRow = HeuristicLab.Analysis.DataRow; using DataTable = HeuristicLab.Analysis.DataTable; namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Analyzers.BuidingBlocks { [Item("Poly-10 building blocks analyzer", "An analyzer which attempts to identify parts of the Poly-10 formula")] [StorableClass] public class SymbolicDataAnalysisPoly10Analyzer : SymbolicDataAnalysisAnalyzer { private const string SymbolicDataAnalysisTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter"; private const string ProblemDataParameterName = "ProblemData"; private const string GenerationsParameterName = "Generations"; private const string PhenotypicSimilarityThresholdParameterName = "PhenotypicSimilarityThreshold"; private const string UpdateCounterParameterName = "UpdateCounter"; private const string UpdateIntervalParameterName = "UpdateInterval"; private const string BuildingBlocksFrequenciesTableName = "Building blocks frequencies"; // store evaluations of building blocks for phenotypic matching private readonly Dictionary> evaluationMap = new Dictionary>(); private readonly Dictionary fragmentMap = new Dictionary(); private readonly Dictionary prettyLabels = new Dictionary(); private readonly SymbolicExpressionImporter importer = new SymbolicExpressionImporter(); #region Parameters public IValueParameter PhenotypicSimilarityThresholdParameter { get { return (IValueParameter)Parameters[PhenotypicSimilarityThresholdParameterName]; } } public ILookupParameter SymbolicDataAnalysisTreeInterpreterParameter { get { return (ILookupParameter)Parameters[SymbolicDataAnalysisTreeInterpreterParameterName]; } } public ILookupParameter ProblemDataParameter { get { return (ILookupParameter)Parameters[ProblemDataParameterName]; } } public ILookupParameter GenerationsParameter { get { return (ILookupParameter)Parameters[GenerationsParameterName]; } } public ValueParameter UpdateCounterParameter { get { return (ValueParameter)Parameters[UpdateCounterParameterName]; } } public ValueParameter UpdateIntervalParameter { get { return (ValueParameter)Parameters[UpdateIntervalParameterName]; } } #endregion #region Parameter properties public double PhenotypicSimilarityThreshold { get { return PhenotypicSimilarityThresholdParameter.Value.Value; } set { PhenotypicSimilarityThresholdParameter.Value.Value = value; } } public int UpdateCounter { get { return UpdateCounterParameter.Value.Value; } set { UpdateCounterParameter.Value.Value = value; } } public int UpdateInterval { get { return UpdateIntervalParameter.Value.Value; } set { UpdateIntervalParameter.Value.Value = value; } } #endregion public SymbolicDataAnalysisPoly10Analyzer() { #region Add parameters Parameters.Add(new LookupParameter(ProblemDataParameterName)); Parameters.Add(new LookupParameter(SymbolicDataAnalysisTreeInterpreterParameterName)); Parameters.Add(new LookupParameter(GenerationsParameterName)); Parameters.Add(new ValueParameter(PhenotypicSimilarityThresholdParameterName, "The phenotypic similarity threshold", new DoubleValue(0.9))); Parameters.Add(new ValueParameter(UpdateCounterParameterName, new IntValue(0))); Parameters.Add(new ValueParameter(UpdateIntervalParameterName, new IntValue(1))); #endregion } [StorableConstructor] protected SymbolicDataAnalysisPoly10Analyzer(bool deserializing) : base(deserializing) { } protected SymbolicDataAnalysisPoly10Analyzer(SymbolicDataAnalysisPoly10Analyzer original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new SymbolicDataAnalysisPoly10Analyzer(this, cloner); } new public bool EnabledByDefault { get { return false; } } public override IOperation Apply() { #region Update counter & update interval UpdateCounter++; if (UpdateCounter != UpdateInterval) { return base.Apply(); } UpdateCounter = 0; #endregion int generations = GenerationsParameter.ActualValue.Value; if (generations == 0) InitializeBuildingBlockCollection(); var results = ResultCollectionParameter.ActualValue; var trees = SymbolicExpressionTreeParameter.ActualValue; var interpreter = (SymbolicDataAnalysisExpressionTreeLinearInterpreter)SymbolicDataAnalysisTreeInterpreterParameter.ActualValue; var dataset = ProblemDataParameter.ActualValue.Dataset; var rows = ProblemDataParameter.ActualValue.TrainingIndices.ToList(); var bbFrequencies = evaluationMap.Keys.ToDictionary(x => x, x => 0); foreach (var key in evaluationMap.Keys) { var bb = fragmentMap[key]; int len = bb.GetLength(); foreach (var t in trees) { var root = t.Root.GetSubtree(0).GetSubtree(0); var nodes = root.IterateNodesPrefix().Where(x => x.GetLength() > len).ToList(); for (int i = 0; i < nodes.Count; ++i) { var s = nodes[i]; var values = interpreter.GetValues(s, dataset, rows); OnlineCalculatorError error; var r = OnlinePearsonsRCalculator.Calculate(values, evaluationMap[key], out error); var r2 = error == OnlineCalculatorError.None ? r * r : double.NaN; if (!double.IsNaN(r2) && r2 >= PhenotypicSimilarityThreshold) { bbFrequencies[key]++; i += s.GetLength(); } } } } var table = (DataTable)results[BuildingBlocksFrequenciesTableName].Value; foreach (var pair in bbFrequencies) { var formatter = new SymbolicExpressionTreeStringFormatter(); // var label = formatter.Format(fragmentMap[pair.Key]) + "(" + prettyLabels[pair.Key] + ")"; var label = prettyLabels[pair.Key]; if (table.Rows.ContainsKey(label)) { var row = table.Rows[label]; row.Values.Add(pair.Value); } } return base.Apply(); } private void InitializeBuildingBlockCollection() { #region Add building blocks // building blocks const string x1 = "(variable 1 X1)"; const string x2 = "(variable 1 X2)"; const string x3 = "(variable 1 X3)"; const string x4 = "(variable 1 X4)"; const string x5 = "(variable 1 X5)"; const string x6 = "(variable 1 X6)"; const string x7 = "(variable 1 X7)"; // x8 is never used in the formula // const string x8 = "(variable 1 X8)"; const string x9 = "(variable 1 X9)"; const string x10 = "(variable 1 X10)"; string s1 = String.Format("(* {0} {1})", x1, x2); string s2 = String.Format("(* {0} {1})", x3, x4); string s3 = String.Format("(* {0} {1})", x5, x6); string s4 = String.Format("(* (* {0} {1}) {2})", x1, x7, x9); string s5 = String.Format("(* (* {0} {1}) {2})", x3, x6, x10); string s6 = String.Format("(+ {0} {1})", s1, s2); // x1x2 + x3x4 string s7 = String.Format("(+ {0} {1})", s1, s3); // x1x2 + x5x6 string s8 = String.Format("(+ {0} {1})", s2, s3); // x3x4 + x5x6 string s9 = String.Format("(+ (+ {0} {1}) {2})", s1, s2, s3); // x1x2 + x3x4 + x5x6 string s10 = String.Format("(+ (+ {0} {1}) {2})", s4, s5, s9); // x1x2 + x3x4 + x5x6 + x1x7x9 + x3x6x10 prettyLabels[s1] = "X1*X2"; prettyLabels[s2] = "X3*X4"; prettyLabels[s3] = "X5*X6"; prettyLabels[s4] = "X1*X7*X9"; prettyLabels[s5] = "X3*X6*X10"; prettyLabels[s6] = prettyLabels[s1] + " + " + prettyLabels[s2]; prettyLabels[s7] = prettyLabels[s1] + " + " + prettyLabels[s3]; prettyLabels[s8] = prettyLabels[s2] + " + " + prettyLabels[s3]; prettyLabels[s9] = prettyLabels[s1] + " + " + prettyLabels[s2] + " + " + prettyLabels[s3]; prettyLabels[s10] = prettyLabels[s9] + " + " + prettyLabels[s4] + " + " + prettyLabels[s5]; #endregion var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue; var dataset = ProblemDataParameter.ActualValue.Dataset; var rows = ProblemDataParameter.ActualValue.TrainingIndices.ToList(); foreach (var s in new[] { s1, s2, s3, s4, s5, s6, s7, s8, s9, s10 }) { if (evaluationMap.ContainsKey(s)) continue; var t = importer.Import(s); evaluationMap.Add(s, interpreter.GetSymbolicExpressionTreeValues(t, dataset, rows).ToList()); fragmentMap.Add(s, t.Root.GetSubtree(0).GetSubtree(0)); } var results = ResultCollectionParameter.ActualValue; DataTable table; if (!results.ContainsKey(BuildingBlocksFrequenciesTableName)) { table = new DataTable(BuildingBlocksFrequenciesTableName); results.Add(new Result(BuildingBlocksFrequenciesTableName, table)); } else { table = (DataTable)results[BuildingBlocksFrequenciesTableName].Value; } table.Rows.Clear(); foreach (var key in evaluationMap.Keys) { table.Rows.Add(new DataRow(prettyLabels[key]) { VisualProperties = { StartIndexZero = true } }); } } } }