Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/HeuristicLab.Problems.Instances.TSPLIB/3.3/TSPLIBParser.cs @ 16683

Last change on this file since 16683 was 16565, checked in by gkronber, 6 years ago

#2520: merged changes from PersistenceOverhaul branch (r16451:16564) into trunk

File size: 21.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Globalization;
25using System.IO;
26using System.Linq;
27
28namespace HeuristicLab.Problems.Instances.TSPLIB {
29
30  #region Public Enums
31  public enum TSPLIBTypes {
32    UNKNOWN = 0,
33    TSP = 1,
34    ATSP = 2,
35    SOP = 3,
36    HCP = 4,
37    CVRP = 5,
38    TOUR = 6
39  }
40
41  public enum TSPLIBEdgeWeightTypes {
42    UNKNOWN = 0,
43    EXPLICIT = 1,
44    EUC_2D = 2,
45    EUC_3D = 3,
46    MAX_2D = 4,
47    MAX_3D = 5,
48    MAN_2D = 6,
49    MAN_3D = 7,
50    CEIL_2D = 8,
51    GEO = 9,
52    ATT = 10,
53    XRAY1 = 11,
54    XRAY2 = 12,
55    SPECIAL = 13
56  }
57
58  public enum TSPLIBDisplayDataTypes {
59    UNKNOWN = 0,
60    COORD_DISPLAY = 1,
61    TWOD_DISPLAY = 2,
62    NO_DISPLAY = 3
63  }
64  #endregion
65
66  public class TSPLIBParser {
67
68    public string Name { get; private set; }
69    public TSPLIBTypes Type { get; private set; }
70    public string Comment { get; private set; }
71    public int Dimension { get; private set; }
72    public double? Capacity { get; private set; }
73    public TSPLIBEdgeWeightTypes EdgeWeightType { get; private set; }
74    public TSPLIBDisplayDataTypes DisplayDataType { get; private set; }
75    public double[,] Vertices { get; private set; }
76    public double[,] DisplayVertices { get; private set; }
77    public double[,] Distances { get; private set; }
78    public int[,] FixedEdges { get; private set; }
79    public int[] Depots { get; private set; }
80    public double[] Demands { get; private set; }
81    public int[][] Tour { get; private set; }
82
83    private static readonly char sectionSeparator = ':';
84    private static readonly char[] itemSeparator = new char[] { ' ', '\t' };
85
86    #region Private Enums
87    private enum TSPLIBSections {
88      UNKNOWN = 0,
89      EOF = 1,
90      NAME = 2,
91      TYPE = 3,
92      COMMENT = 4,
93      DIMENSION = 5,
94      CAPACITY = 6,
95      EDGE_WEIGHT_TYPE = 7,
96      EDGE_WEIGHT_FORMAT = 8,
97      EDGE_DATA_FORMAT = 9,
98      NODE_COORD_TYPE = 10,
99      DISPLAY_DATA_TYPE = 11,
100      NODE_COORD_SECTION = 12,
101      DEPOT_SECTION = 13,
102      DEMAND_SECTION = 14,
103      EDGE_DATA_SECTION = 15,
104      FIXED_EDGES_SECTION = 16,
105      DISPLAY_DATA_SECTION = 17,
106      TOUR_SECTION = 18,
107      EDGE_WEIGHT_SECTION = 19
108    }
109    private enum TSPLIBEdgeWeightFormats {
110      UNKNWON = 0,
111      FUNCTION = 1,
112      FULL_MATRIX = 2,
113      UPPER_ROW = 3,
114      LOWER_ROW = 4,
115      UPPER_DIAG_ROW = 5,
116      LOWER_DIAG_ROW = 6,
117      UPPER_COL = 7,
118      LOWER_COL = 8,
119      UPPER_DIAG_COL = 9,
120      LOWER_DIAG_COL = 10
121    }
122    private enum TSPLIBEdgeWeightDataFormats {
123      UNKNOWN = 0,
124      EDGE_LIST = 1,
125      ADJ_LIST = 2
126    }
127    private enum TSLPLIBNodeCoordTypes {
128      UNKNOWN = 0,
129      TWOD_COORDS = 1,
130      THREED_COORDS = 2,
131      NO_COORDS = 3
132    }
133    #endregion
134
135    private StreamReader source;
136    private int currentLineNumber;
137
138    private TSPLIBEdgeWeightFormats edgeWeightFormat;
139    private TSPLIBEdgeWeightDataFormats edgeWeightDataFormat;
140    private TSLPLIBNodeCoordTypes nodeCoordType;
141    private TSPLIBDisplayDataTypes displayDataType;
142
143    private TSPLIBParser() {
144      Name = String.Empty;
145      Comment = String.Empty;
146      Type = TSPLIBTypes.UNKNOWN;
147      EdgeWeightType = TSPLIBEdgeWeightTypes.UNKNOWN;
148
149      edgeWeightFormat = TSPLIBEdgeWeightFormats.UNKNWON;
150      edgeWeightDataFormat = TSPLIBEdgeWeightDataFormats.UNKNOWN;
151      nodeCoordType = TSLPLIBNodeCoordTypes.UNKNOWN;
152      displayDataType = TSPLIBDisplayDataTypes.UNKNOWN;
153    }
154
155    public TSPLIBParser(String path)
156      : this() {
157      source = new StreamReader(path);
158    }
159
160    public TSPLIBParser(Stream stream)
161      : this() {
162      source = new StreamReader(stream);
163    }
164
165    /// <summary>
166    /// Reads the TSPLIB file and parses the elements.
167    /// </summary>
168    /// <exception cref="InvalidDataException">Thrown if the file has an invalid format or contains invalid data.</exception>
169    public void Parse() {
170      TSPLIBSections section;
171      string line;
172      currentLineNumber = 0;
173
174      try {
175        do {
176          line = NextLine();
177          string value;
178          section = GetSection(line, out value);
179
180          switch (section) {
181            case TSPLIBSections.UNKNOWN:
182              break;
183            case TSPLIBSections.NAME:
184              ReadName(value);
185              break;
186            case TSPLIBSections.TYPE:
187              ReadType(value);
188              break;
189            case TSPLIBSections.COMMENT:
190              ReadComment(value);
191              break;
192            case TSPLIBSections.DIMENSION:
193              ReadDimension(value);
194              break;
195            case TSPLIBSections.CAPACITY:
196              ReadCapacity(value);
197              break;
198            case TSPLIBSections.EDGE_WEIGHT_TYPE:
199              ReadEdgeWeightType(value);
200              break;
201            case TSPLIBSections.EDGE_WEIGHT_FORMAT:
202              ReadEdgeWeightFormat(value);
203              break;
204            case TSPLIBSections.EDGE_DATA_FORMAT:
205              ReadEdgeWeightDataFormat(value);
206              break;
207            case TSPLIBSections.NODE_COORD_TYPE:
208              ReadNodeCoordType(value);
209              break;
210            case TSPLIBSections.DISPLAY_DATA_TYPE:
211              ReadDisplayDataType(value);
212              break;
213            case TSPLIBSections.NODE_COORD_SECTION:
214              ReadNodeCoordsSection();
215              break;
216            case TSPLIBSections.DEPOT_SECTION:
217              ReadDepotSection();
218              break;
219            case TSPLIBSections.DEMAND_SECTION:
220              ReadDemandSection();
221              break;
222            case TSPLIBSections.EDGE_DATA_SECTION:
223              ReadEdgeDataSection();
224              break;
225            case TSPLIBSections.FIXED_EDGES_SECTION:
226              ReadFixedEdgesSection();
227              break;
228            case TSPLIBSections.DISPLAY_DATA_SECTION:
229              ReadDisplayDataSection();
230              break;
231            case TSPLIBSections.TOUR_SECTION:
232              ReadTourSection();
233              break;
234            case TSPLIBSections.EDGE_WEIGHT_SECTION:
235              ReadEdgeWeightSection();
236              break;
237            case TSPLIBSections.EOF:
238              break;
239            default:
240              throw new InvalidDataException("Input file contains unknown or unsupported section (" + line + ") in line " + currentLineNumber.ToString());
241          }
242        } while (!(section == TSPLIBSections.EOF || source.EndOfStream));
243      } finally {
244        source.Close();
245      }
246    }
247
248    private string NextLine() {
249      currentLineNumber++;
250      return source.ReadLine();
251    }
252
253    private TSPLIBSections GetSection(string line, out string value) {
254      value = String.Empty;
255
256      if (line == null)
257        return TSPLIBSections.UNKNOWN;
258
259      int sectionNameEnd = line.IndexOf(sectionSeparator);
260      if (sectionNameEnd == -1) sectionNameEnd = line.Length;
261
262      string sectionName = line.Substring(0, sectionNameEnd).Trim();
263      if (sectionNameEnd + 1 < line.Length)
264        value = line.Substring(sectionNameEnd + 1);
265
266      TSPLIBSections section;
267      if (Enum.TryParse(sectionName, out section)) {
268        return section;
269      } else return TSPLIBSections.UNKNOWN;
270    }
271
272    private void ReadName(string value) {
273      Name += value.Trim();
274    }
275
276    private void ReadType(string value) {
277      TSPLIBTypes t;
278      if (Enum.TryParse(value.Trim().ToUpper(), out t))
279        Type = t;
280      else Type = TSPLIBTypes.UNKNOWN;
281    }
282
283    private void ReadComment(string value) {
284      Comment += value.Trim() + Environment.NewLine;
285    }
286
287    private void ReadCapacity(string value) {
288      double c;
289      if (double.TryParse(value.Trim(), NumberStyles.Any, CultureInfo.InvariantCulture.NumberFormat, out c))
290        Capacity = c;
291      else throw new InvalidDataException("Parsing the capacity in line " + currentLineNumber + " failed. It is not recognized as double value.");
292    }
293
294    private void ReadDimension(string value) {
295      int dimension;
296      if (int.TryParse(value.Trim(), out dimension))
297        Dimension = dimension;
298      else throw new InvalidDataException("Parsing the dimension in line " + currentLineNumber + " failed. It is not recognized as an integer number.");
299    }
300
301    private void ReadEdgeWeightType(string value) {
302      TSPLIBEdgeWeightTypes e;
303      if (Enum.TryParse(value.Trim().ToUpper(), out e))
304        EdgeWeightType = e;
305      else throw new InvalidDataException("Input file contains an unsupported edge weight type (" + value + ") in line " + currentLineNumber + ".");
306    }
307
308    private void ReadEdgeWeightFormat(string value) {
309      TSPLIBEdgeWeightFormats e;
310      if (Enum.TryParse(value.Trim().ToUpper(), out e))
311        edgeWeightFormat = e;
312      else throw new InvalidDataException("Input file contains an unsupported edge weight format (" + value + ") in line " + currentLineNumber + ".");
313    }
314
315    private void ReadEdgeWeightDataFormat(string value) {
316      TSPLIBEdgeWeightDataFormats e;
317      if (Enum.TryParse(value.Trim().ToUpper(), out e))
318        edgeWeightDataFormat = e;
319      else throw new InvalidDataException("Input file contains an unsupported edge weight data format (" + value + ") in line " + currentLineNumber + ".");
320    }
321
322    private void ReadNodeCoordType(string value) {
323      TSLPLIBNodeCoordTypes n;
324      if (Enum.TryParse(value.Trim().ToUpper(), out n))
325        nodeCoordType = n;
326      else throw new InvalidDataException("Input file contains an unsupported node coordinates type (" + value + ") in line " + currentLineNumber + ".");
327    }
328
329    private void ReadDisplayDataType(string value) {
330      TSPLIBDisplayDataTypes d;
331      if (Enum.TryParse(value.Trim().ToUpper(), out d))
332        displayDataType = d;
333      else throw new InvalidDataException("Input file contains an unsupported display data type (" + value + ") in line " + currentLineNumber + ".");
334    }
335
336    private void ReadNodeCoordsSection() {
337      if (Dimension == 0)
338        throw new InvalidDataException("Input file does not contain dimension information.");
339      switch (nodeCoordType) {
340        case TSLPLIBNodeCoordTypes.NO_COORDS:
341          return;
342        case TSLPLIBNodeCoordTypes.UNKNOWN: // It's a pity that there is a documented standard which is ignored in most files.
343        case TSLPLIBNodeCoordTypes.TWOD_COORDS:
344          Vertices = new double[Dimension, 2];
345          break;
346        case TSLPLIBNodeCoordTypes.THREED_COORDS:
347          Vertices = new double[Dimension, 3];
348          break;
349        default:
350          throw new InvalidDataException("Input files does not specify a valid node coord type.");
351      }
352
353      for (int i = 0; i < Dimension; i++) {
354        string line = NextLine();
355        string[] tokens = line.Split(itemSeparator, StringSplitOptions.RemoveEmptyEntries);
356
357        if (tokens.Length != Vertices.GetLength(1) + 1)
358          throw new InvalidDataException("Input file contains invalid number of node coordinates in line " + currentLineNumber + ".");
359
360        int node;
361        if (int.TryParse(tokens[0], out node)) {
362          for (int j = 0; j < Vertices.GetLength(1); j++)
363            Vertices[node - 1, j] = double.Parse(tokens[j + 1], CultureInfo.InvariantCulture.NumberFormat);
364        } else throw new InvalidDataException("Input file does not specify a valid node in line " + currentLineNumber + ".");
365      }
366    }
367
368    private void ReadDepotSection() {
369      var depots = new List<int>();
370      do {
371        string line = NextLine();
372        int node;
373        if (!int.TryParse(line, out node))
374          throw new InvalidDataException("Input file contains an unknown depot entry at line " + currentLineNumber + ".");
375        if (node == -1) break;
376        depots.Add(node);
377      } while (true);
378      Depots = depots.ToArray();
379    }
380
381    private void ReadDemandSection() {
382      if (Dimension == 0)
383        throw new InvalidDataException("Input file does not contain dimension information.");
384      Demands = new double[Dimension];
385      for (int i = 0; i < Dimension; i++) {
386        string[] tokens = NextLine().Split(itemSeparator, StringSplitOptions.RemoveEmptyEntries);
387        int node;
388        if (int.TryParse(tokens[0], out node)) {
389          double demand;
390          if (double.TryParse(tokens[1], NumberStyles.Any, CultureInfo.InvariantCulture.NumberFormat, out demand))
391            Demands[node - 1] = demand;
392          else throw new InvalidDataException("Input file contains invalid demand information in line " + currentLineNumber + ".");
393        } else throw new InvalidDataException("Input file contains invalid node information in line " + currentLineNumber + ".");
394      }
395    }
396
397    private void ReadEdgeDataSection() {
398      throw new NotSupportedException("Files with an edge data section are not supported.");
399    }
400
401    private void ReadFixedEdgesSection() {
402      var edges = new List<Tuple<int, int>>();
403      bool finished = false;
404      while (!finished) {
405        string line = NextLine();
406        string[] tokens = line.Split(itemSeparator, StringSplitOptions.RemoveEmptyEntries);
407        if (tokens.Length == 1) {
408          int number;
409          if (!int.TryParse(tokens[0], NumberStyles.Integer, CultureInfo.InvariantCulture.NumberFormat, out number))
410            throw new InvalidDataException("Input file does not end the fixed edges section with \"-1\" in line " + currentLineNumber + ".");
411          if (number != -1)
412            throw new InvalidDataException("Input file must end the fixed edges section with a -1 in line " + currentLineNumber + ".");
413          finished = true;
414        } else {
415          if (tokens.Length != 2)
416            throw new InvalidDataException("Input file contains an error in line " + currentLineNumber + ", exactly two nodes need to be given in each line.");
417          int node1 = int.Parse(tokens[0], CultureInfo.InvariantCulture.NumberFormat) - 1;
418          int node2 = int.Parse(tokens[1], CultureInfo.InvariantCulture.NumberFormat) - 1;
419          edges.Add(Tuple.Create(node1, node2));
420        }
421      }
422      FixedEdges = new int[edges.Count, 2];
423      for (int i = 0; i < edges.Count; i++) {
424        FixedEdges[i, 0] = edges[i].Item1;
425        FixedEdges[i, 1] = edges[i].Item2;
426      }
427    }
428
429    private void ReadDisplayDataSection() {
430      if (Dimension == 0)
431        throw new InvalidDataException("Input file does not contain dimension information.");
432
433      switch (displayDataType) {
434        case TSPLIBDisplayDataTypes.NO_DISPLAY:
435        case TSPLIBDisplayDataTypes.COORD_DISPLAY:
436          return;
437        case TSPLIBDisplayDataTypes.TWOD_DISPLAY:
438          DisplayVertices = new double[Dimension, 2];
439          break;
440        default:
441          throw new InvalidDataException("Input files does not specify a valid display data type.");
442      }
443
444      for (int i = 0; i < Dimension; i++) {
445        string line = NextLine();
446        string[] tokens = line.Split(itemSeparator, StringSplitOptions.RemoveEmptyEntries);
447
448        if (tokens.Length != DisplayVertices.GetLength(1) + 1)
449          throw new InvalidDataException("Input file contains invalid number of display data coordinates in line " + currentLineNumber + ".");
450
451        int node;
452        if (int.TryParse(tokens[0], out node)) {
453          for (int j = 0; j < DisplayVertices.GetLength(1); j++)
454            DisplayVertices[node - 1, j] = double.Parse(tokens[j + 1], CultureInfo.InvariantCulture.NumberFormat);
455        } else throw new InvalidDataException("Input file does not specify a valid node in line " + currentLineNumber + ".");
456      }
457    }
458
459    /// <summary>
460    /// Parses the tour section.
461    /// </summary>
462    /// <remarks>
463    /// Unfortunately, none of the given files for the TSP follow the description
464    /// in the TSPLIB documentation.
465    /// The faulty files use only one -1 to terminate the section as well as the tour
466    /// whereas the documentation says that one -1 terminates the tour and another -1
467    /// terminates the section.
468    /// So the parser peeks at the next character after a -1. If the next character
469    /// is an E as in EOF or the stream ends, the parser ends. Otherwise it will
470    /// continue to read tours until a -1 is followed by a -1.
471    /// </remarks>
472    private void ReadTourSection() {
473      if (Dimension == 0)
474        throw new InvalidDataException("Input file does not contain dimension information.");
475
476      var tours = new List<List<int>>();
477      do {
478        var line = NextLine();
479        if (String.IsNullOrEmpty(line)) break;
480        string[] nodes = line.Split(itemSeparator, StringSplitOptions.RemoveEmptyEntries);
481        if (!nodes.Any()) break;
482
483        bool finished = false;
484        foreach (var nodeString in nodes) {
485          int node = int.Parse(nodeString, CultureInfo.InvariantCulture.NumberFormat);
486          if (node == -1) {
487            finished = (tours.Any() && tours.Last().Count == 0) // -1 followed by -1
488              || (source.BaseStream.CanSeek && source.Peek() == -1)
489              || source.Peek() == (int)'E';
490            if (finished) break;
491            tours.Add(new List<int>());
492          } else {
493            if (!tours.Any()) tours.Add(new List<int>());
494            tours.Last().Add(node - 1);
495          }
496        }
497        if (finished) break;
498      } while (true);
499      if (tours.Last().Count == 0) tours.RemoveAt(tours.Count - 1);
500      Tour = tours.Select(x => x.ToArray()).ToArray();
501    }
502
503    private void ReadEdgeWeightSection() {
504      if (Dimension == 0)
505        throw new InvalidDataException("Input file does not contain dimension information.");
506
507      if (edgeWeightFormat == TSPLIBEdgeWeightFormats.UNKNWON)
508        throw new InvalidDataException("Input file does not specify an edge weight format.");
509
510      if (edgeWeightFormat == TSPLIBEdgeWeightFormats.FUNCTION)
511        return;
512
513      Distances = new double[Dimension, Dimension];
514
515      bool triangular = edgeWeightFormat != TSPLIBEdgeWeightFormats.FULL_MATRIX;
516      bool upperTriangular = edgeWeightFormat == TSPLIBEdgeWeightFormats.UPPER_COL
517        || edgeWeightFormat == TSPLIBEdgeWeightFormats.UPPER_DIAG_COL
518        || edgeWeightFormat == TSPLIBEdgeWeightFormats.UPPER_DIAG_ROW
519        || edgeWeightFormat == TSPLIBEdgeWeightFormats.UPPER_ROW;
520      bool diagonal = edgeWeightFormat == TSPLIBEdgeWeightFormats.LOWER_DIAG_COL
521        || edgeWeightFormat == TSPLIBEdgeWeightFormats.LOWER_DIAG_ROW
522        || edgeWeightFormat == TSPLIBEdgeWeightFormats.UPPER_DIAG_COL
523        || edgeWeightFormat == TSPLIBEdgeWeightFormats.UPPER_DIAG_ROW;
524      bool rowWise = edgeWeightFormat == TSPLIBEdgeWeightFormats.LOWER_DIAG_ROW
525        || edgeWeightFormat == TSPLIBEdgeWeightFormats.LOWER_ROW
526        || edgeWeightFormat == TSPLIBEdgeWeightFormats.UPPER_DIAG_ROW
527        || edgeWeightFormat == TSPLIBEdgeWeightFormats.UPPER_ROW;
528
529      int dim1 = triangular && !upperTriangular ? (diagonal ? 0 : 1) : 0;
530      int dim2 = triangular && upperTriangular ? (diagonal ? 0 : 1) : 0;
531      bool finished = false;
532      while (!finished) {
533        string line = NextLine();
534        string[] weights = line.Split(itemSeparator, StringSplitOptions.RemoveEmptyEntries);
535        foreach (var weightString in weights) {
536          double weight;
537          if (!double.TryParse(weightString, NumberStyles.Any, CultureInfo.InvariantCulture.NumberFormat, out weight))
538            throw new InvalidDataException("Input file contains unreadable weight information (" + weightString + ") in line " + currentLineNumber + ".");
539
540          if (triangular) {
541            Distances[dim1, dim2] = weight;
542            Distances[dim2, dim1] = weight;
543            if (upperTriangular) {
544              if (rowWise) {
545                dim2++;
546                if (dim2 == Dimension) {
547                  dim1++;
548                  if (diagonal) dim2 = dim1;
549                  else dim2 = dim1 + 1;
550                }
551              } else { // column-wise
552                dim1++;
553                if (diagonal && dim1 == dim2 + 1
554                  || !diagonal && dim1 == dim2) {
555                  dim2++;
556                  dim1 = 0;
557                }
558              }
559            } else { // lower-triangular
560              if (rowWise) {
561                dim2++;
562                if (diagonal && dim2 == dim1 + 1
563                  || !diagonal && dim2 == dim1) {
564                  dim1++;
565                  dim2 = 0;
566                }
567              } else { // column-wise
568                dim1++;
569                if (dim1 == Dimension) {
570                  dim2++;
571                  if (diagonal) dim1 = dim2;
572                  else dim1 = dim2 + 1;
573                }
574              }
575            }
576          } else { // full-matrix
577            Distances[dim1, dim2] = weight;
578            dim2++;
579            if (dim2 == Dimension) {
580              dim1++;
581              dim2 = 0;
582            }
583          }
584        }
585        finished = dim1 == Dimension || dim2 == Dimension;
586      }
587    }
588
589  }
590}
Note: See TracBrowser for help on using the repository browser.