using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Operators; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis.Symbolic; using GeneticExchangeMapType = HeuristicLab.Core.IItemList; namespace HeuristicLab.EvolutionaryTracking.Operators { [Item("SymbolicExpressionTreeSymbolGraphBuilder", "An operator that builds a graph of pair-wise symbol relationships from the trees in the population.")] [StorableClass] class SymbolicExpressionTreeSymbolGraphBuilder : SingleSuccessorOperator { #region Parameter names private const string GlobalGeneticExchangeMapParameterName = "GlobalGeneticExchangeMap"; private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree"; private const string ResultsParameterName = "Results"; #endregion #region Parameter properties public ILookupParameter GlobalGeneticExchangeMapParameter { get { return (LookupParameter)Parameters[GlobalGeneticExchangeMapParameterName]; } } #endregion #region Properties public GeneticExchangeMapType GlobalGeneticExchangeMap { get { return GlobalGeneticExchangeMapParameter.ActualValue; } } #endregion [StorableConstructor] private SymbolicExpressionTreeSymbolGraphBuilder(bool deserializing) : base(deserializing) { } private SymbolicExpressionTreeSymbolGraphBuilder(SymbolicExpressionTreeSymbolGraphBuilder original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new SymbolicExpressionTreeSymbolGraphBuilder(this, cloner); } public SymbolicExpressionTreeSymbolGraphBuilder() { Parameters.Add(new ScopeTreeLookupParameter(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze.")); Parameters.Add(new LookupParameter(GlobalGeneticExchangeMapParameterName, "Global map of genetic exchanges.")); Parameters.Add(new LookupParameter(ResultsParameterName, "The results collection where the analysis values should be stored.")); } #region Parameters public IScopeTreeLookupParameter SymbolicExpressionTreeParameter { get { return (IScopeTreeLookupParameter)Parameters[SymbolicExpressionTreeParameterName]; } } public ILookupParameter ResultsParameter { get { return (LookupParameter)Parameters[ResultsParameterName]; } } #endregion [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { if (!Parameters.ContainsKey(ResultsParameterName)) Parameters.Add(new LookupParameter(ResultsParameterName, "The results collection where the analysis values should be stored.")); if (!Parameters.ContainsKey(GlobalGeneticExchangeMapParameterName)) Parameters.Add(new LookupParameter(GlobalGeneticExchangeMapParameterName, "Global map of genetic exchanges.")); } public override IOperation Apply() { if (!Parameters.ContainsKey("Results")) return base.Apply(); var trees = SymbolicExpressionTreeParameter.ActualValue.ToList(); var results = ResultsParameter.ActualValue; // if (!results.ContainsKey("SymbolGraph")) { // var graph = CreateSymbolGraph(trees); // results.Add(new Result("SymbolGraph", graph)); // } else { // var graph = results["SymbolGraph"].Value as SymbolGraph; // var transactions = GlobalGeneticExchangeMap.Select(t => t as GeneticExchange); // UpdateGraph(graph, transactions); // } var graph = CreateSymbolGraph(trees); if (!results.ContainsKey("SymbolGraph")) { results.Add(new Result("SymbolGraph", graph)); } else { results["SymbolGraph"].Value = graph; } return base.Apply(); } private SymbolGraph CreateSymbolGraph(IEnumerable trees) { var list = trees as IList ?? trees.ToList(); var nodes = list.SelectMany(x => x.Root.GetSubtree(0).GetSubtree(0).IterateNodesBreadth()).ToList(); var graph = new SymbolGraph(); #region create symbol graph foreach (var node in nodes) { var nodeLabel = node.Label(); if (!graph.ContainsKey(nodeLabel)) { graph.AddNode(new SymbolNode { Label = nodeLabel, Symbol = node.Symbol, Weight = 1.0, AverageArity = node.SubtreeCount }); } else { double avgArity = graph.GetNode(nodeLabel).AverageArity; double weight = graph.GetNode(nodeLabel).Weight; graph.GetNode(nodeLabel).AverageArity = (avgArity * weight + node.SubtreeCount) / (weight + 1); graph.GetNode(nodeLabel).Weight += 1.0; } } foreach (var node in nodes) { var n = graph.GetNode(node.Symbol.Name); for (int i = 0; i != node.SubtreeCount; ++i) { var child = node.GetSubtree(i); var c = graph.GetNode(child.Label()); if (n == null || c == null) throw new Exception("The node couldn't be found in the graph."); // should never happen if (n.OutEdges == null) { n.AddForwardArc(c, 1.0); } else { var arc = n.OutEdges.Find(x => x.Target == c); if (arc == null) n.AddForwardArc(c, 1); else arc.Weight += 1.0; } if (c.Positions == null) { c.Positions = new Dictionary { { i, 1 } }; } else if (c.Positions.ContainsKey(i)) { c.Positions[i] += 1; } else { c.Positions.Add(i, 1); } } } #endregion return graph; } private static void UpdateGraph(SymbolGraph graph, IEnumerable exchanges) { // we have the graph, and we have the list of genetic exchanges which allows us to update graph vertex and arc weights exchanges = exchanges as IList ?? exchanges.ToList(); var inFragments = exchanges.Select(t => t.FragmentIn).ToList(); var outFragments = exchanges.Select(t => t.FragmentOut).ToList(); // the idea is to add to the symbol graph everything that goes in (inFragments) and // subtract everything that goes out (outFragments) // might not be 100% correct because the trees change as well foreach (var f in inFragments) { foreach (var node in f.Root.IterateNodesPrefix()) { var parentLabel = node.Label(); var parentVertex = graph.GetNode(parentLabel); if (parentVertex == null) { // highly unlikely but it is possible graph.AddNode(new SymbolNode { Label = parentLabel, Symbol = node.Symbol, Weight = 1.0, AverageArity = node.SubtreeCount }); parentVertex = graph.GetNode(parentLabel); } else { parentVertex.Weight++; } for (int i = 0; i != node.SubtreeCount; ++i) { var child = node.GetSubtree(i); var childLabel = child.Label(); var childVertex = graph.GetNode(childLabel); if (childVertex == null) { // highly unlikely but it is possible graph.AddNode(new SymbolNode { Label = childLabel, Symbol = child.Symbol, Weight = 1.0, AverageArity = node.SubtreeCount }); childVertex = graph.GetNode(childLabel); } if (parentVertex.OutEdges == null) parentVertex.AddForwardArc(childVertex, 1.0); else { var arc = parentVertex.OutEdges.Find(x => x.Target == childVertex); if (arc == null) childVertex.AddForwardArc(childVertex, 1.0); else arc.Weight += 1.0; } if (childVertex.Positions == null) { childVertex.Positions = new Dictionary { { i, 1 } }; } else if (childVertex.Positions.ContainsKey(i)) { childVertex.Positions[i] += 1; } else { childVertex.Positions.Add(i, 1); } } } } foreach (var f in outFragments) { foreach (var node in f.Root.IterateNodesPrefix()) { var parentLabel = node.Label(); var parentVertex = graph.GetNode(parentLabel); if (parentVertex == null) throw new ArgumentException("parentVertex for node in outFragment can (should) not be null!"); parentVertex.Weight--; for (int i = 0; i != node.SubtreeCount; ++i) { var child = node.GetSubtree(i); var childVertex = graph.GetNode(child.Label()); if (childVertex == null) throw new ArgumentException("childVertex for node in outFragment can (should) not be null!"); // childVertex.Weight--; var arc = parentVertex.OutEdges.Find(x => x.Target == childVertex); if (arc == null) throw new Exception("An arc should already exist between the parent and the child."); arc.Weight -= 1.0; if (arc.Weight < 0.0) throw new ArgumentException("Weight cannot be negative!"); } } } } private static void NormalizeGraph(SymbolGraph graph) { var nodes = graph.Nodes; var sum = nodes.Sum(n => n.Weight); foreach (var n in nodes) { n.Weight /= sum; if (n.OutEdges == null || n.OutEdges.Count <= 0) continue; var arcSum = n.OutEdges.Sum(arc => arc.Weight); foreach (var arc in n.OutEdges) arc.Weight /= arcSum; } } private static void PruneGraph(SymbolGraph graph) { var arcs = graph.Nodes.Where(node => node.OutEdges != null).SelectMany(node => node.OutEdges).ToList(); var arcWeights = arcs.Select(arc => arc.Weight).ToList(); var median = arcWeights.Median(); foreach (var node in graph.Nodes.Where(n => n.OutEdges != null)) { node.OutEdges.RemoveAll(arc => arc.Weight < median || ((SymbolNode)arc.Target).Symbol is Constant); node.OutEdges.RemoveAll(arc => arc.Weight < median); } } } }