Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2695_dataset-ids/HeuristicLab.Problems.Instances.DIMACS/3.3/GcolParser.cs @ 16245

Last change on this file since 16245 was 15172, checked in by abeham, 7 years ago

#2736:

  • Implemented review comments
  • I had to restructure the parser to a greater extent because I found out that some instances defined nodes without edges (which I decided to filter)
  • I added a unit test that loads all instances
  • I also added export of instances
File size: 6.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2017 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Globalization;
25using System.IO;
26using System.Linq;
27using System.Text;
28
29namespace HeuristicLab.Problems.Instances.DIMACS {
30  public class GcolParser {
31    private enum LineType { Unknown, Comment, Problem, Edge }
32
33    public string Comments { get; private set; }
34    public int Nodes { get; private set; }
35    public int Edges { get; private set; }
36    public IEnumerable<Tuple<int, int>> AdjacencyList {
37      get { return edges.Select(x => Tuple.Create(x.First.Id, x.Second.Id)); }
38    }
39
40    public int UnconnectedNodes { get; private set; }
41
42    private Node[] nodes;
43    private HashSet<Edge> edges;
44   
45    public GcolParser() {
46      Reset();
47    }
48
49    public void Reset() {
50      Nodes = 0;
51      Edges = 0;
52      nodes = new Node[0];
53      edges = new HashSet<Edge>();
54    }
55
56    public void Parse(string file) {
57      using (Stream stream = new FileStream(file, FileMode.Open, FileAccess.Read)) {
58        Parse(stream);
59      }
60    }
61
62    /// <summary>
63    /// Reads from the given stream data which is expected to be in the GCOL format:
64    /// http://prolland.free.fr/works/research/dsat/dimacs.html
65    /// The number of nodes and edges are calculated from the actual edges defined.
66    /// The number of nodes stated must be a maximum number.
67    /// Nodes without edges are not considered.
68    /// </summary>
69    /// <remarks>
70    /// The stream is not closed or disposed. The caller has to take care of that.
71    /// </remarks>
72    /// <param name="stream">The stream to read data from.</param>
73    public void Parse(Stream stream) {
74      char[] delim = new char[] { ' ', '\t' };
75      var comments = new StringBuilder();
76      var reader = new StreamReader(stream);
77      while (!reader.EndOfStream) {
78        var line = reader.ReadLine().Trim();
79        switch (GetLineType(line)) {
80          case LineType.Comment:
81            comments.AppendLine(line);
82            break;
83          case LineType.Problem:
84            var pData = line.Split(delim, StringSplitOptions.RemoveEmptyEntries);
85            if (pData.Length < 3)
86              throw new InvalidOperationException("Invalid problem entry: " + line + Environment.NewLine + "Must be p FORMAT NODES [EDGES]");
87            if (!pData[1].Equals("edge", StringComparison.InvariantCultureIgnoreCase)
88              && !pData[1].Equals("edges", StringComparison.InvariantCultureIgnoreCase))
89              throw new InvalidOperationException("Unsupported problem format type " + pData[1]);
90            Nodes = int.Parse(pData[2], CultureInfo.InvariantCulture.NumberFormat);
91            nodes = new Node[Nodes];
92            break;
93          case LineType.Edge:
94            if (nodes == null) throw new InvalidOperationException("Missing \"p\" line before \"e\" lines.");
95            var eData = line.Split(delim, StringSplitOptions.RemoveEmptyEntries);
96            var src = int.Parse(eData[1], CultureInfo.InvariantCulture.NumberFormat);
97            var tgt = int.Parse(eData[2], CultureInfo.InvariantCulture.NumberFormat);
98            if (src > Nodes || tgt > Nodes)
99              throw new InvalidOperationException("An edge cannot be declared between " + src + " and " + tgt + " because only " + Nodes + " nodes were defined.");
100            src--; // node indices are given 1-based in the files
101            tgt--; // node indices are given 1-based in the files
102            if (src > tgt) {
103              var h = src;
104              src = tgt;
105              tgt = h;
106            }
107            if (nodes[src] == null) nodes[src] = new Node();
108            if (nodes[tgt] == null) nodes[tgt] = new Node();
109            if (edges.Add(new Edge(nodes[src], nodes[tgt]))) Edges++;
110            break;
111        }
112      }
113
114      if (nodes.Length == 0) throw new InvalidOperationException("There were no nodes defined.");
115
116      // Give identies to the nodes and filter nodes without edges
117      Nodes = 0; UnconnectedNodes = 0;
118      for (var n = 0; n < nodes.Length; n++) {
119        if (nodes[n] != null) {
120          nodes[n].Id = Nodes;
121          Nodes++;
122        } else UnconnectedNodes++;
123      }
124      if (UnconnectedNodes > 0) {
125        comments.AppendLine();
126        comments.Append("There were ");
127        comments.Append(UnconnectedNodes);
128        comments.AppendLine(" unconnected nodes that have been removed compared to the original file.");
129      }
130      Comments = comments.ToString();
131    }
132
133    private LineType GetLineType(string line) {
134      if (string.IsNullOrEmpty(line)) return LineType.Unknown;
135      var first = line[0];
136      switch (first) {
137        case 'c': return LineType.Comment;
138        case 'p': return LineType.Problem;
139        case 'e': return LineType.Edge;
140        default: throw new InvalidOperationException("This is not a valid .col file");
141      }
142    }
143
144    private class Node {
145      public int Id { get; set; }
146      public Node() { }
147    }
148
149    private class Edge {
150      public Node First { get; set; }
151      public Node Second { get; set; }
152      public Edge(Node first, Node second) {
153        First = first;
154        Second = second;
155      }
156
157      public override bool Equals(object obj) {
158        var other = (obj as Edge);
159        if (other == null) return false;
160        return First == other.First && Second == other.Second
161          || First == other.Second && Second == other.First;
162      }
163
164      public override int GetHashCode() {
165        return First.GetHashCode() ^ Second.GetHashCode();
166      }
167    }
168  }
169}
Note: See TracBrowser for help on using the repository browser.