1 | /*
|
---|
2 | * License: CPOL
|
---|
3 | * Source: https://www.codeproject.com/Tips/823670/Csharp-Light-and-Fast-CSV-Parser
|
---|
4 | * Author: Yuriy Magurdumov
|
---|
5 | */
|
---|
6 |
|
---|
7 | using System;
|
---|
8 | using System.Collections.Generic;
|
---|
9 | using System.Text;
|
---|
10 |
|
---|
11 | namespace HeuristicLab.BenchmarkSuite {
|
---|
12 | using System.IO;
|
---|
13 |
|
---|
14 | public static class CsvParser {
|
---|
15 | private static Tuple<T, IEnumerable<T>> HeadAndTail<T>(this IEnumerable<T> source) {
|
---|
16 | if (source == null)
|
---|
17 | throw new ArgumentNullException("source");
|
---|
18 | var en = source.GetEnumerator();
|
---|
19 | en.MoveNext();
|
---|
20 | return Tuple.Create(en.Current, EnumerateTail(en));
|
---|
21 | }
|
---|
22 |
|
---|
23 | private static IEnumerable<T> EnumerateTail<T>(IEnumerator<T> en) {
|
---|
24 | while (en.MoveNext()) yield return en.Current;
|
---|
25 | }
|
---|
26 |
|
---|
27 | public static IEnumerable<IList<string>> Parse(string content, char delimiter, char qualifier) {
|
---|
28 | using (var reader = new StringReader(content))
|
---|
29 | return Parse(reader, delimiter, qualifier);
|
---|
30 | }
|
---|
31 |
|
---|
32 | public static Tuple<IList<string>, IEnumerable<IList<string>>> ParseHeadAndTail(TextReader reader, char delimiter, char qualifier) {
|
---|
33 | return HeadAndTail(Parse(reader, delimiter, qualifier));
|
---|
34 | }
|
---|
35 |
|
---|
36 | public static IEnumerable<IList<string>> Parse(TextReader reader, char delimiter, char qualifier) {
|
---|
37 | var inQuote = false;
|
---|
38 | var record = new List<string>();
|
---|
39 | var sb = new StringBuilder();
|
---|
40 |
|
---|
41 | while (reader.Peek() != -1) {
|
---|
42 | var readChar = (char)reader.Read();
|
---|
43 |
|
---|
44 | if (readChar == '\n' || (readChar == '\r' && (char)reader.Peek() == '\n')) {
|
---|
45 | // If it's a \r\n combo consume the \n part and throw it away.
|
---|
46 | if (readChar == '\r')
|
---|
47 | reader.Read();
|
---|
48 |
|
---|
49 | if (inQuote) {
|
---|
50 | if (readChar == '\r')
|
---|
51 | sb.Append('\r');
|
---|
52 | sb.Append('\n');
|
---|
53 | } else {
|
---|
54 | if (record.Count > 0 || sb.Length > 0) {
|
---|
55 | record.Add(sb.ToString());
|
---|
56 | sb.Clear();
|
---|
57 | }
|
---|
58 |
|
---|
59 | if (record.Count > 0)
|
---|
60 | yield return record;
|
---|
61 |
|
---|
62 | record = new List<string>(record.Count);
|
---|
63 | }
|
---|
64 | } else if (sb.Length == 0 && !inQuote) {
|
---|
65 | if (readChar == qualifier)
|
---|
66 | inQuote = true;
|
---|
67 | else if (readChar == delimiter) {
|
---|
68 | record.Add(sb.ToString());
|
---|
69 | sb.Clear();
|
---|
70 | //} else if (char.IsWhiteSpace(readChar)) { // Ignore leading whitespace
|
---|
71 | } else
|
---|
72 | sb.Append(readChar);
|
---|
73 | } else if (readChar == delimiter) {
|
---|
74 | if (inQuote)
|
---|
75 | sb.Append(delimiter);
|
---|
76 | else {
|
---|
77 | record.Add(sb.ToString());
|
---|
78 | sb.Clear();
|
---|
79 | }
|
---|
80 | } else if (readChar == qualifier) {
|
---|
81 | if (inQuote) {
|
---|
82 | if ((char)reader.Peek() == qualifier) {
|
---|
83 | reader.Read();
|
---|
84 | sb.Append(qualifier);
|
---|
85 | } else
|
---|
86 | inQuote = false;
|
---|
87 | } else
|
---|
88 | sb.Append(readChar);
|
---|
89 | } else
|
---|
90 | sb.Append(readChar);
|
---|
91 | }
|
---|
92 |
|
---|
93 | if (record.Count > 0 || sb.Length > 0)
|
---|
94 | record.Add(sb.ToString());
|
---|
95 |
|
---|
96 | if (record.Count > 0)
|
---|
97 | yield return record;
|
---|
98 | }
|
---|
99 | }
|
---|
100 | }
|
---|