- Timestamp:
- 06/11/13 21:41:08 (12 years ago)
- Location:
- trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3
- Files:
-
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/CSV/ClassifiactionCSVInstanceProvider.cs
r9456 r9608 59 59 TableFileParser csvFileParser = new TableFileParser(); 60 60 61 csvFileParser.Parse(path );61 csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path)); 62 62 63 63 Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ResourceClassificationInstanceProvider.cs
r9456 r9608 49 49 TableFileParser csvFileParser = new TableFileParser(); 50 50 using (Stream stream = instancesZipFile.GetInputStream(entry)) { 51 csvFileParser.Parse(stream, numberFormat, dateFormat, separator );51 csvFileParser.Parse(stream, numberFormat, dateFormat, separator, csvFileParser.AreColumnNamesInFirstLine(stream)); 52 52 } 53 53 -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/CSV/ClusteringCSVInstanceProvider.cs
r9456 r9608 58 58 public override IClusteringProblemData ImportData(string path) { 59 59 var csvFileParser = new TableFileParser(); 60 csvFileParser.Parse(path );60 csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path)); 61 61 62 62 Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/DataAnalysisCSVFormat.cs
r9456 r9608 27 27 public NumberFormatInfo NumberFormatInfo { get; set; } 28 28 public DateTimeFormatInfo DateTimeFormatInfo { get; set; } 29 public bool VariableNamesAvailable { get; set; } 29 30 } 30 31 } -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/DataAnalysisInstanceProvider.cs
r9456 r9608 38 38 public TData ImportData(string path, ImportType type, DataAnalysisCSVFormat csvFormat) { 39 39 TableFileParser csvFileParser = new TableFileParser(); 40 csvFileParser.Parse(path, csvFormat.NumberFormatInfo, csvFormat.DateTimeFormatInfo, csvFormat.Separator );40 csvFileParser.Parse(path, csvFormat.NumberFormatInfo, csvFormat.DateTimeFormatInfo, csvFormat.Separator, csvFormat.VariableNamesAvailable); 41 41 return ImportData(path, type, csvFileParser); 42 42 } -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/CSV/RegressionCSVInstanceProvider.cs
r9456 r9608 57 57 public override IRegressionProblemData ImportData(string path) { 58 58 TableFileParser csvFileParser = new TableFileParser(); 59 csvFileParser.Parse(path );59 csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path)); 60 60 61 61 Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/ResourceRegressionInstanceProvider.cs
r9456 r9608 49 49 TableFileParser csvFileParser = new TableFileParser(); 50 50 using (Stream stream = instancesZipFile.GetInputStream(entry)) { 51 csvFileParser.Parse(stream, numberFormat, dateFormat, separator );51 csvFileParser.Parse(stream, numberFormat, dateFormat, separator, csvFileParser.AreColumnNamesInFirstLine(stream)); 52 52 } 53 53 -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/TableFileParser.cs
r9457 r9608 75 75 } 76 76 77 public bool AreColumnNamesInFirstLine(string fileName) { 78 NumberFormatInfo numberFormat; 79 DateTimeFormatInfo dateTimeFormatInfo; 80 char separator; 81 DetermineFileFormat(fileName, out numberFormat, out dateTimeFormatInfo, out separator); 82 using (var stream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) { 83 return AreColumnNamesInFirstLine(stream, numberFormat, dateTimeFormatInfo, separator); 84 } 85 } 86 87 public bool AreColumnNamesInFirstLine(Stream stream) { 88 NumberFormatInfo numberFormat = NumberFormatInfo.InvariantInfo; 89 DateTimeFormatInfo dateTimeFormatInfo = DateTimeFormatInfo.InvariantInfo; 90 char separator = ','; 91 return AreColumnNamesInFirstLine(stream, numberFormat, dateTimeFormatInfo, separator); 92 } 93 94 public bool AreColumnNamesInFirstLine(string fileName, NumberFormatInfo numberFormat, 95 DateTimeFormatInfo dateTimeFormatInfo, char separator) { 96 using (var stream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) { 97 return AreColumnNamesInFirstLine(stream, numberFormat, dateTimeFormatInfo, separator); 98 } 99 } 100 101 public bool AreColumnNamesInFirstLine(Stream stream, NumberFormatInfo numberFormat, 102 DateTimeFormatInfo dateTimeFormatInfo, char separator) { 103 using (StreamReader reader = new StreamReader(stream)) { 104 tokenizer = new Tokenizer(reader, numberFormat, dateTimeFormatInfo, separator); 105 return tokenizer.Peek().type != TokenTypeEnum.Double; 106 } 107 } 108 77 109 /// <summary> 78 110 /// Parses a file and determines the format first 79 111 /// </summary> 80 112 /// <param name="fileName">file which is parsed</param> 81 public void Parse(string fileName) { 113 /// <param name="columnNamesInFirstLine"></param> 114 public void Parse(string fileName, bool columnNamesInFirstLine) { 82 115 NumberFormatInfo numberFormat; 83 116 DateTimeFormatInfo dateTimeFormatInfo; 84 117 char separator; 85 DetermineFileFormat( new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite), out numberFormat, out dateTimeFormatInfo, out separator);86 Parse(new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite), numberFormat, dateTimeFormatInfo, separator );118 DetermineFileFormat(fileName, out numberFormat, out dateTimeFormatInfo, out separator); 119 Parse(new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite), numberFormat, dateTimeFormatInfo, separator, columnNamesInFirstLine); 87 120 } 88 121 … … 94 127 /// <param name="dateTimeFormatInfo">Format of datetime</param> 95 128 /// <param name="separator">defines the separator</param> 96 public void Parse(string fileName, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator) { 97 Parse(new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite), numberFormat, dateTimeFormatInfo, separator); 129 /// <param name="columnNamesInFirstLine"></param> 130 public void Parse(string fileName, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator, bool columnNamesInFirstLine) { 131 using (var stream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) { 132 Parse(stream, numberFormat, dateTimeFormatInfo, separator, columnNamesInFirstLine); 133 } 98 134 } 99 135 … … 102 138 /// </summary> 103 139 /// <param name="stream">stream which is parsed</param> 104 public void Parse(Stream stream) { 140 /// <param name="columnNamesInFirstLine"></param> 141 public void Parse(Stream stream, bool columnNamesInFirstLine) { 105 142 NumberFormatInfo numberFormat = NumberFormatInfo.InvariantInfo; 106 143 DateTimeFormatInfo dateTimeFormatInfo = DateTimeFormatInfo.InvariantInfo; 107 144 char separator = ','; 108 Parse(stream, numberFormat, dateTimeFormatInfo, separator );145 Parse(stream, numberFormat, dateTimeFormatInfo, separator, columnNamesInFirstLine); 109 146 } 110 147 … … 116 153 /// <param name="dateTimeFormatInfo">Format of datetime</param> 117 154 /// <param name="separator">defines the separator</param> 118 public void Parse(Stream stream, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator) { 155 /// <param name="columnNamesInFirstLine"></param> 156 public void Parse(Stream stream, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator, bool columnNamesInFirstLine) { 119 157 using (StreamReader reader = new StreamReader(stream)) { 120 158 tokenizer = new Tokenizer(reader, numberFormat, dateTimeFormatInfo, separator); 121 159 // parse the file 122 Parse( );160 Parse(columnNamesInFirstLine); 123 161 } 124 162 … … 340 378 if (strToken.Equals(INTERNAL_SEPARATOR)) { 341 379 return SeparatorToken; 342 } else if (double.TryParse(strToken, NumberStyles.Float , numberFormatInfo, out token.doubleValue)) {380 } else if (double.TryParse(strToken, NumberStyles.Float | NumberStyles.AllowTrailingSign, numberFormatInfo, out token.doubleValue)) { 343 381 token.type = TokenTypeEnum.Double; 344 382 return token; … … 372 410 373 411 #region parsing 374 private void Parse() { 375 ParseVariableNames(); 376 if (!tokenizer.HasNext()) Error("Couldn't parse data values. Probably because of incorrect number format (the parser expects english number format with a '.' as decimal separator).", "", tokenizer.CurrentLineNumber); 412 private void Parse(bool columnNamesInFirstLine) { 413 if (columnNamesInFirstLine) { 414 ParseVariableNames(); 415 if (!tokenizer.HasNext()) 416 Error( 417 "Couldn't parse data values. Probably because of incorrect number format (the parser expects english number format with a '.' as decimal separator).", 418 "", tokenizer.CurrentLineNumber); 419 } 377 420 ParseValues(); 378 421 if (rowValues.Count == 0) Error("Couldn't parse data values. Probably because of incorrect number format (the parser expects english number format with a '.' as decimal separator).", "", tokenizer.CurrentLineNumber); … … 423 466 424 467 private void ParseVariableNames() { 425 //if first token is double no variables names are given426 if (tokenizer.Peek().type == TokenTypeEnum.Double) return;427 428 468 // the first line must contain variable names 429 469 List<Token> tokens = new List<Token>(); -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/TimeSeries/CSV/TimeSeriesPrognosisCSVInstanceProvider.cs
r9456 r9608 57 57 public override ITimeSeriesPrognosisProblemData ImportData(string path) { 58 58 TableFileParser csvFileParser = new TableFileParser(); 59 csvFileParser.Parse(path );59 csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path)); 60 60 61 61 Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
Note: See TracChangeset
for help on using the changeset viewer.