Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2288_HeuristicLab.VariableInteractionNetworks/HeuristicLab.VariableInteractionNetworks/3.3/VariableInteractionNetwork.cs @ 17495

Last change on this file since 17495 was 16966, checked in by jzenisek, 6 years ago

#2288

  • splitted creation of networks with/without possible cycles
  • included additional check to prevent errors within the impact calculation
File size: 21.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections;
24using System.Collections.Generic;
25using System.Diagnostics.Eventing.Reader;
26using System.Linq;
27using System.Text;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Data;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HEAL.Attic;
33
34namespace HeuristicLab.VariableInteractionNetworks {
35  [Item("VariableInteractionNetwork", "A graph representation of variables and their relationships.")]
36  [StorableType("CD0A2622-6872-4E61-BE01-E6AA9E41A60D")]
37  public class VariableInteractionNetwork : DirectedGraph {
38
39    public static VariableInteractionNetwork CreateSimpleNetwork(double[] nmse, DoubleMatrix variableImpacts,
40      double nmseThreshold = 0.2, double varImpactThreshold = 0.0) {
41      if (variableImpacts.Rows != variableImpacts.Columns) throw new ArgumentException();
42      var network = new VariableInteractionNetwork();
43      var targets = new Dictionary<string, double>();
44      string[] varNames = variableImpacts.RowNames.ToArray();
45      if (nmse.Length != varNames.Length) throw new ArgumentException();
46
47      for (int i = 0; i < varNames.Length; i++) {
48        var name = varNames[i];
49        var varVertex = new VariableNetworkNode() { Label = name, Weight = nmse[i] };
50        network.AddVertex(varVertex);
51        if (nmse[i] < nmseThreshold) {
52          targets.Add(name, nmse[i]);
53        }
54      }
55
56      // rel is updated (impacts which are represented in the network are set to zero)
57      var impacts = variableImpacts.CloneAsMatrix();
58      // make sure the diagonal is not considered
59      for (int i = 0; i < impacts.GetLength(0); i++) impacts[i, i] = double.NegativeInfinity;
60
61
62      for (int row = 0; row < impacts.GetLength(0); row++) {
63        if (!targets.ContainsKey(varNames[row])) continue;
64
65        var rowVector = Enumerable.Range(0, impacts.GetLength(0)).Select(col => impacts[row, col]).ToArray();
66        for (int col = 0; col < rowVector.Length; col++) {
67          double impact = rowVector[col];
68          if (impact > varImpactThreshold) {
69            var srcName = varNames[col];
70            var dstName = varNames[row];
71            var srcVertex = network.Vertices.Single(v => v.Label == srcName);
72            var dstVertex = network.Vertices.Single(v => v.Label == dstName);
73            var arc = network.AddArc(srcVertex, dstVertex);
74            arc.Weight = impact;
75          }
76        }
77      }
78
79      return network;
80    }
81
82    /// <summary>
83      /// Creates a simple network from a matrix of variable impacts (each row represents a target variable, each column represents an input variable)
84      /// For each target variable not more than one row can be defined (no junction nodes are build, cf. FromNmseAndVariableImpacts(..) for building more complex networks).
85      /// The network is acyclic. Values in the diagonal are ignored.
86      /// The algorithm starts with an empty network and incrementally adds next most relevant input variable for each target variable up to a given threshold
87      /// In each iteration cycles are broken by removing the weakest link.
88      /// </summary>
89      /// <param name="nmse">vector of NMSE values for each target variable</param>
90      /// <param name="variableImpacts">Variable impacts (smaller is lower impact). Row names and columns names should be set</param>
91      /// <param name="nmseThreshold">Threshold for NMSE values. Variables with a NMSE value larger than the threshold are considered as independent variables</param>
92      /// <param name="varImpactThreshold">Threshold for variable impact values. Impacts with a value smaller than the threshold are considered as independent</param>
93      /// <returns></returns>
94    public static VariableInteractionNetwork CreateSimpleAcyclicNetwork(double[] nmse, DoubleMatrix variableImpacts, double nmseThreshold = 0.2, double varImpactThreshold = 0.0) {
95      if (variableImpacts.Rows != variableImpacts.Columns) throw new ArgumentException();
96      var network = new VariableInteractionNetwork();
97      var targets = new Dictionary<string, double>();
98      string[] varNames = variableImpacts.RowNames.ToArray();
99      if (nmse.Length != varNames.Length) throw new ArgumentException();
100
101      for (int i = 0; i < varNames.Length; i++) {
102        var name = varNames[i];
103        var varVertex = new VariableNetworkNode() { Label = name, Weight = nmse[i] };
104        network.AddVertex(varVertex);
105        if (nmse[i] < nmseThreshold) {
106          targets.Add(name, nmse[i]);
107        }
108      }
109
110      // rel is updated (impacts which are represented in the network are set to zero)
111      var rel = variableImpacts.CloneAsMatrix();
112      // make sure the diagonal is not considered
113      for (int i = 0; i < rel.GetLength(0); i++) rel[i, i] = double.NegativeInfinity;
114
115      var addedArcs = AddArcs(network, rel, varNames, targets, varImpactThreshold);
116      while (addedArcs.Any()) {
117        var cycles = network.FindShortestCycles().ToList();
118        while (cycles.Any()) {
119          // delete weakest link
120          var weakestArc = cycles.SelectMany(cycle => network.ArcsForCycle(cycle)).OrderBy(a => a.Weight).First();
121          network.RemoveArc(weakestArc);
122
123          cycles = network.FindShortestCycles().ToList();
124        }
125
126        addedArcs = AddArcs(network, rel, varNames, targets, varImpactThreshold);
127      }
128
129      return network;
130    }
131
132    private static List<IArc> AddArcs(VariableInteractionNetwork network, double[,] impacts, string[] varNames, Dictionary<string, double> targets, double threshold = 0.0) {
133      var newArcs = new List<IArc>();
134      for (int row = 0; row < impacts.GetLength(0); row++) {
135        if (!targets.ContainsKey(varNames[row])) continue;
136
137        var rowVector = Enumerable.Range(0, impacts.GetLength(0)).Select(col => impacts[row, col]).ToArray();
138        var max = rowVector.Max();
139        if (max > threshold) {
140          var idxOfMax = Array.IndexOf<double>(rowVector, max);
141          impacts[row, idxOfMax] = double.NegativeInfinity;
142          var srcName = varNames[idxOfMax];
143          var dstName = varNames[row];
144          var srcVertex = network.Vertices.Single(v => v.Label == srcName);
145          var dstVertex = network.Vertices.Single(v => v.Label == dstName);
146          var arc = network.AddArc(srcVertex, dstVertex);
147          arc.Weight = max;
148          newArcs.Add(arc);
149        }
150      }
151      return newArcs;
152    }
153
154    /// <summary>
155    /// Creates a network from a matrix of variable impacts (each row represents a target variable, each column represents an input variable)
156    /// The network is acyclic. Values in the diagonal are ignored.
157    /// The algorithm starts with an empty network and incrementally adds next most relevant input variable for each target variable up to a given threshold
158    /// In each iteration cycles are broken by removing the weakest link.
159    /// </summary>
160    /// <param name="nmse">vector of NMSE values for each target variable</param>
161    /// <param name="variableImpacts">Variable impacts (smaller is lower impact). Row names and columns names should be set</param>
162    /// <param name="nmseThreshold">Threshold for NMSE values. Variables with a NMSE value larger than the threshold are considered as independent variables</param>
163    /// <param name="varImpactThreshold">Threshold for variable impact values. Impacts with a value smaller than the threshold are considered as independent</param>
164    /// <returns></returns>
165    public static VariableInteractionNetwork FromNmseAndVariableImpacts(double[] nmse, DoubleMatrix variableImpacts, double nmseThreshold = 0.2, double varImpactThreshold = 0.0) {
166      if (variableImpacts.Rows != variableImpacts.Columns) throw new ArgumentException();
167      var network = new VariableInteractionNetwork();
168
169      Dictionary<string, IVertex> name2funVertex = new Dictionary<string, IVertex>(); // store vertexes representing the function for each target so we can easily add incoming arcs later on
170      string[] varNames = variableImpacts.RowNames.ToArray();
171      if (nmse.Length != varNames.Length) throw new ArgumentException();
172
173      for (int i = 0; i < varNames.Length; i++) {
174        var name = varNames[i];
175        var varVertex = new VariableNetworkNode() { Label = name };
176        network.AddVertex(varVertex);
177        if (nmse[i] < nmseThreshold) {
178          var functionVertex = new JunctionNetworkNode() { Label = "f_" + name };
179          name2funVertex.Add(name, functionVertex);
180          network.AddVertex(functionVertex);
181          var predArc = network.AddArc(functionVertex, varVertex);
182          predArc.Weight = double.PositiveInfinity; // never delete arcs from f_x -> x (representing output of a function)
183        }
184      }
185
186      // rel is updated (impacts which are represented in the network are set to zero)
187      var rel = variableImpacts.CloneAsMatrix();
188      // make sure the diagonal is not considered
189      for (int i = 0; i < rel.GetLength(0); i++) rel[i, i] = double.NegativeInfinity;
190
191      var addedArcs = AddArcs(network, rel, varNames, name2funVertex, varImpactThreshold);
192      while (addedArcs.Any()) {
193        var cycles = network.FindShortestCycles().ToList();
194        while (cycles.Any()) {
195          // delete weakest link
196          var weakestArc = cycles.SelectMany(cycle => network.ArcsForCycle(cycle)).OrderBy(a => a.Weight).First();
197          network.RemoveArc(weakestArc);
198
199          cycles = network.FindShortestCycles().ToList();
200        }
201
202        addedArcs = AddArcs(network, rel, varNames, name2funVertex, varImpactThreshold);
203      }
204
205      return network;
206    }
207
208    /// <summary>
209    /// Produces a combined network from two networks.
210    /// The set of nodes of the new network is the union of the node sets of the two input networks.
211    /// The set of edges of the new network is the union of the edge sets of the two input networks.
212    /// Added and removed nodes and edges are marked so that it is possible to visualize the network difference using graphviz
213    /// </summary>
214    /// <returns></returns>
215    public static VariableInteractionNetwork CalculateNetworkDiff(
216      VariableInteractionNetwork from,
217      VariableInteractionNetwork to) {
218      var g = new VariableInteractionNetwork();
219
220      // add nodes which are in both networks
221      foreach (var node in from.Vertices.Intersect(to.Vertices, new VertexLabelComparer())) {
222        g.AddVertex((IVertex)node.Clone());
223      }
224      // add nodes only in from network
225      foreach (var node in from.Vertices.Except(to.Vertices, new VertexLabelComparer())) {
226        var fromVertex = (IVertex)node.Clone();
227        fromVertex.Label += " (removed)";
228        g.AddVertex(fromVertex);
229      }
230      // add nodes only in to network
231      foreach (var node in to.Vertices.Except(from.Vertices, new VertexLabelComparer())) {
232        var fromVertex = (IVertex)node.Clone();
233        fromVertex.Label += " (added)";
234        g.AddVertex(fromVertex);
235      }
236
237      // add edges which are in both networks
238      foreach (var arc in from.Arcs.Intersect(to.Arcs, new ArcComparer())) {
239        g.AddArc(
240          g.Vertices.Single(v => arc.Source.Label == v.Label),
241          g.Vertices.Single(v => arc.Target.Label == v.Label)
242        );
243      }
244      // add edges only in from network
245      foreach (var arc in from.Arcs.Except(to.Arcs, new ArcComparer())) {
246        var fromVertex =
247          g.Vertices.Single(v => v.Label == arc.Source.Label || v.Label == arc.Source.Label + " (removed)");
248        var toVertex =
249          g.Vertices.Single(v => v.Label == arc.Target.Label || v.Label == arc.Target.Label + " (removed)");
250        var newArc = g.AddArc(
251          fromVertex,
252          toVertex
253        );
254        newArc.Label += " (removed)";
255      }
256      // add arcs only in to network
257      foreach (var arc in to.Arcs.Except(from.Arcs, new ArcComparer())) {
258        var fromVertex =
259          g.Vertices.Single(v => v.Label == arc.Source.Label || v.Label == arc.Source.Label + " (added)");
260        var toVertex =
261          g.Vertices.Single(v => v.Label == arc.Target.Label || v.Label == arc.Target.Label + " (added)");
262        var newArc = g.AddArc(
263          fromVertex,
264          toVertex
265        );
266        newArc.Label += " (added)";
267      }
268      return g;
269    }
270
271
272    private static List<IArc> AddArcs(VariableInteractionNetwork network, double[,] impacts, string[] varNames, Dictionary<string, IVertex> name2funVertex, double threshold = 0.0) {
273      var newArcs = new List<IArc>();
274      for (int row = 0; row < impacts.GetLength(0); row++) {
275        if (!name2funVertex.ContainsKey(varNames[row])) continue; // this variable does not have an associated function (considered as independent)
276
277        var rowVector = Enumerable.Range(0, impacts.GetLength(0)).Select(col => impacts[row, col]).ToArray();
278        var max = rowVector.Max();
279        if (max > threshold) {
280          var idxOfMax = Array.IndexOf<double>(rowVector, max);
281          impacts[row, idxOfMax] = double.NegativeInfinity; // edge is not considered anymore
282          var srcName = varNames[idxOfMax];
283          var dstName = varNames[row];
284          var vertex = network.Vertices.Single(v => v.Label == srcName);
285          var arc = network.AddArc(vertex, name2funVertex[dstName]);
286          arc.Weight = max;
287          newArcs.Add(arc);
288        }
289      }
290      return newArcs;
291    }
292
293    [StorableConstructor]
294    public VariableInteractionNetwork(StorableConstructorFlag _) : base(_) { }
295
296    public VariableInteractionNetwork() { }
297
298    protected VariableInteractionNetwork(VariableInteractionNetwork original, Cloner cloner) : base(original, cloner) { }
299
300    public override IDeepCloneable Clone(Cloner cloner) {
301      return new VariableInteractionNetwork(this, cloner);
302    }
303    private IList<IArc> ArcsForCycle(IList<IVertex> cycle) {
304      var res = new List<IArc>();
305      foreach (var t in cycle.Zip(cycle.Skip(1), Tuple.Create)) {
306        var src = t.Item1;
307        var dst = t.Item2;
308        var arc = Arcs.Single(a => a.Source == src && a.Target == dst);
309        res.Add(arc);
310      }
311      return res;
312    }
313
314
315    // finds the shortest cycles in the graph and returns all sub-graphs containing only the nodes / edges within the cycle
316    public IEnumerable<IList<IVertex>> FindShortestCycles() {
317      foreach (var startVariable in base.Vertices.OfType<VariableNetworkNode>()) {
318        foreach (var cycle in FindShortestCycles(startVariable))
319          yield return cycle;
320      }
321    }
322
323    private IEnumerable<IList<IVertex>> FindShortestCycles(VariableNetworkNode startVariable) {
324      var q = new Queue<List<IVertex>>(); // queue of paths
325      var path = new List<IVertex>();
326      var cycles = new List<List<IVertex>>();
327      var maxPathLength = base.Vertices.Count();
328
329      path.Add(startVariable);
330      q.Enqueue(new List<IVertex>(path));
331
332      FindShortestCycles(q, maxPathLength, cycles);
333      return cycles;
334    }
335
336    // TODO efficiency
337    private void FindShortestCycles(Queue<List<IVertex>> queue, int maxPathLength, List<List<IVertex>> cycles) {
338      while (queue.Any()) {
339        var path = queue.Dequeue();
340        if (path.Count > 1 && path.First() == path.Last()) {
341          cycles.Add(new List<IVertex>(path)); // found a cycle
342        } else if (path.Count >= maxPathLength) {
343          continue;
344        } else {
345          var lastVert = path.Last();
346          var neighbours = base.Arcs.Where(a => a.Source == lastVert).Select(a => a.Target);
347          foreach (var neighbour in neighbours) {
348            queue.Enqueue(new List<IVertex>(path.Concat(new IVertex[] { neighbour })));
349          }
350        }
351      }
352    }
353
354    public DoubleMatrix GetWeightsMatrix() {
355      var names = Vertices.OfType<VariableNetworkNode>()
356        .Select(v => v.Label)
357        .OrderBy(s => s, new NaturalStringComparer()).ToArray();
358      var w = new double[names.Length, names.Length];
359
360      var name2idx = new Dictionary<string, int>();
361      for (int i = 0; i < names.Length; i++) {
362        name2idx.Add(names[i], i);
363      }
364
365      foreach (var arc in Arcs) {
366        // only consider arcs going into a junction node
367        var target = arc.Target as JunctionNetworkNode;
368        if (target != null) {
369          var srcVarName = arc.Source.Label;
370          // each function node must have exactly one outgoing arc
371          var dstVarName = arc.Target.OutArcs.Single().Target.Label;
372
373          w[name2idx[dstVarName], name2idx[srcVarName]] = arc.Weight;
374        }
375      }
376
377
378      return new DoubleMatrix(w, names, names);
379    }
380
381    public DoubleMatrix GetSimpleWeightsMatrix() {
382      var names = Vertices.OfType<VariableNetworkNode>()
383        .Select(v => v.Label)
384        .OrderBy(s => s, new NaturalStringComparer()).ToArray();
385      var w = new double[names.Length, names.Length];
386
387      var name2idx = new Dictionary<string, int>();
388      for (int i = 0; i < names.Length; i++) {
389        name2idx.Add(names[i], i);
390      }
391
392      foreach (var arc in Arcs) {
393        if (arc.Target != null) {
394          var srcVarName = arc.Source.Label;
395          var dstVarName = arc.Target.Label;
396          w[name2idx[dstVarName], name2idx[srcVarName]] = arc.Weight;
397        }
398      }
399
400      return new DoubleMatrix(w, names, names);
401    }
402
403    public string ToGraphVizString() {
404      Func<string, string> NodeAndEdgeColor = (str) => {
405        if (string.IsNullOrEmpty(str)) return "black";
406        else if (str.Contains("removed")) return "red";
407        else if (str.Contains("added")) return "green";
408        else return "black";
409      };
410
411      var sb = new StringBuilder();
412      sb.AppendLine("digraph {");
413      sb.AppendLine("rankdir=LR");
414      foreach (var v in Vertices.OfType<VariableNetworkNode>()) {
415        sb.AppendFormat("\"{0}\" [shape=oval, color={1}]", v.Label, NodeAndEdgeColor(v.Label)).AppendLine();
416      }
417      foreach (var v in Vertices.OfType<JunctionNetworkNode>()) {
418        sb.AppendFormat("\"{0}\" [shape=box, color={1}]", v.Label, NodeAndEdgeColor(v.Label)).AppendLine();
419      }
420      foreach (var arc in Arcs) {
421        sb.AppendFormat("\"{0}\"->\"{1}\" [color=\"{3}\"]", arc.Source.Label, arc.Target.Label, arc.Label, NodeAndEdgeColor(arc.Label)).AppendLine();
422      }
423      sb.AppendLine("}");
424      return sb.ToString();
425    }
426  }
427
428  public class VertexLabelComparer : IEqualityComparer<IVertex> {
429    public bool Equals(IVertex x, IVertex y) {
430      if (x == null && y == null) return true;
431      if (x != null && y != null) {
432        return x.Label == y.Label;
433      } else return false;
434    }
435
436    public int GetHashCode(IVertex obj) {
437      return obj.Label.GetHashCode();
438    }
439  }
440
441  public class ArcComparer : IEqualityComparer<IArc> {
442    public bool Equals(IArc x, IArc y) {
443      if (x == null && y == null) return true;
444      if (x != null && y != null) {
445        return x.Source.Label == y.Source.Label && x.Target.Label == y.Target.Label;
446      } else return false;
447    }
448
449    public int GetHashCode(IArc obj) {
450      return obj.Source.Label.GetHashCode() ^ obj.Target.Label.GetHashCode();
451    }
452  }
453
454  [Item("VariableNetworkNode", "A graph vertex which represents a symbolic regression variable.")]
455  [StorableType("95E27B45-DD4B-4C32-AC5E-40A4714EA6F7")]
456  public class VariableNetworkNode : Vertex<IDeepCloneable>, INetworkNode {
457    public VariableNetworkNode() {
458      Id = Guid.NewGuid().ToString();
459    }
460
461    public VariableNetworkNode(VariableNetworkNode original, Cloner cloner) : base(original, cloner) {
462      Id = original.Id;
463      Description = original.Description;
464    }
465
466    public override IDeepCloneable Clone(Cloner cloner) {
467      return new VariableNetworkNode(this, cloner);
468    }
469
470    public string Id { get; }
471    public string Description { get; set; }
472  }
473
474  [Item("FunctionNetworkNode", "A graph vertex representing a junction node.")]
475  [StorableType("8D4E55AC-EBF8-49BA-8361-FC51EF3BE990")]
476  public class JunctionNetworkNode : Vertex<IDeepCloneable>, INetworkNode {
477    public JunctionNetworkNode() {
478      Id = Guid.NewGuid().ToString();
479    }
480
481    public JunctionNetworkNode(JunctionNetworkNode original, Cloner cloner) : base(original, cloner) {
482      Id = original.Id;
483      Description = original.Description;
484    }
485
486    public override IDeepCloneable Clone(Cloner cloner) {
487      return new JunctionNetworkNode(this, cloner);
488    }
489
490    public string Id { get; }
491    public string Description { get; set; }
492  }
493}
Note: See TracBrowser for help on using the repository browser.