Changeset 273 for trunk/sources/HeuristicLab.DataAnalysis
- Timestamp:
- 05/29/08 18:05:50 (17 years ago)
- Location:
- trunk/sources/HeuristicLab.DataAnalysis
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.DataAnalysis/DatasetParser.cs
r272 r273 28 28 namespace HeuristicLab.DataAnalysis { 29 29 public class DatasetParser { 30 private const string PROBLEMNAME = "PROBLEMNAME"; 31 private const string VARIABLENAMES = "VARIABLENAMES"; 32 private const string TARGETVARIABLE = "TARGETVARIABLE"; 33 private const string MAXIMUMTREEHEIGHT = "MAXIMUMTREEHEIGHT"; 34 private const string MAXIMUMTREESIZE = "MAXIMUMTREESIZE"; 35 private const string TRAININGSAMPLESSTART = "TRAININGSAMPLESSTART"; 36 private const string TRAININGSAMPLESEND = "TRAININGSAMPLESEND"; 30 37 private Tokenizer tokenizer; 31 38 private Dictionary<string, List<Token>> metadata; … … 53 60 public string ProblemName { 54 61 get { 55 return metadata["PROBLEMNAME"][0].stringValue; 62 if(metadata.ContainsKey(PROBLEMNAME)) { 63 return metadata[PROBLEMNAME][0].stringValue; 64 } else return "-"; 56 65 } 57 66 } … … 59 68 public string[] VariableNames { 60 69 get { 61 List<Token> nameList = metadata["VARIABLENAMES"]; 62 string[] names = new string[nameList.Count]; 63 for(int i = 0; i < names.Length; i++) { 64 names[i] = nameList[i].stringValue; 65 } 66 67 return names; 70 if(metadata.ContainsKey(VARIABLENAMES)) { 71 List<Token> nameList = metadata[VARIABLENAMES]; 72 string[] names = new string[nameList.Count]; 73 for(int i = 0; i < names.Length; i++) { 74 names[i] = nameList[i].stringValue; 75 } 76 return names; 77 } else { 78 string[] names = new string[columns]; 79 for(int i = 0; i < names.Length; i++) { 80 names[i] = "X" + i.ToString("000"); 81 } 82 return names; 83 } 68 84 } 69 85 } … … 71 87 public int TargetVariable { 72 88 get { 73 return metadata["TARGETVARIABLE"][0].intValue; 89 if(metadata.ContainsKey(TARGETVARIABLE)) { 90 return metadata[TARGETVARIABLE][0].intValue; 91 } else return 0; // default is the first column 74 92 } 75 93 } … … 77 95 public int MaxTreeHeight { 78 96 get { 79 return metadata["MAXIMUMTREEHEIGHT"][0].intValue; 97 if(metadata.ContainsKey(MAXIMUMTREEHEIGHT)) { 98 return metadata[MAXIMUMTREEHEIGHT][0].intValue; 99 } else return 0; 80 100 } 81 101 } … … 83 103 public int MaxTreeSize { 84 104 get { 85 return metadata["MAXIMUMTREESIZE"][0].intValue; 105 if(metadata.ContainsKey(MAXIMUMTREESIZE)) { 106 return metadata[MAXIMUMTREESIZE][0].intValue; 107 } else return 0; 86 108 } 87 109 } … … 89 111 public int TrainingSamplesStart { 90 112 get { 91 if(!metadata.ContainsKey("TRAININGSAMPLESSTART")) return 0; 92 else return metadata["TRAININGSAMPLESSTART"][0].intValue; 113 if(metadata.ContainsKey(TRAININGSAMPLESSTART)) { 114 return metadata[TRAININGSAMPLESSTART][0].intValue; 115 } else return 0; 93 116 } 94 117 } … … 96 119 public int TrainingSamplesEnd { 97 120 get { 98 if(!metadata.ContainsKey("TRAININGSAMPLESEND")) return rows; 99 else return metadata["TRAININGSAMPLESEND"][0].intValue; 121 if(metadata.ContainsKey(TRAININGSAMPLESEND)) { 122 return metadata[TRAININGSAMPLESEND][0].intValue; 123 } else return rows; 100 124 } 101 125 } … … 279 303 if(samplesList.Count > 0 && samplesList[0].Count != row.Count) { 280 304 Error("The first row of the dataset has " + samplesList[0].Count + " columns." + 281 "\nLine " + tokenizer.CurrentLineNumber + " has " + row.Count + " columns." );305 "\nLine " + tokenizer.CurrentLineNumber + " has " + row.Count + " columns.", "", tokenizer.CurrentLineNumber); 282 306 } 283 307 } else if(samplesList.Count > 0) { … … 301 325 // when we are parsing non-strictly we also allow unreadable values inserting NAN instead 302 326 if(strict) { 303 Error("Unkown value " + current + " in line " + tokenizer.CurrentLineNumber + 304 "\n" + tokenizer.CurrentLine); 327 Error("Unexpected token.", current.stringValue, tokenizer.CurrentLineNumber); 305 328 } else { 306 329 row.Add(double.NaN); … … 316 339 Token nameToken = tokenizer.Next(); 317 340 if(nameToken.type != TokenTypeEnum.String) 318 throw new Exception("Expected a variable name; got " + nameToken + 319 "\nLine " + tokenizer.CurrentLineNumber + ": " + tokenizer.CurrentLine); 341 Error("Expected a variable name.", nameToken.stringValue, tokenizer.CurrentLineNumber); 320 342 321 343 Expect(Tokenizer.AssignmentToken); … … 335 357 Token actualToken = tokenizer.Next(); 336 358 if(actualToken != expectedToken) { 337 Error("Expected: " + expectedToken + " got: " + actualToken + 338 "\nLine " + tokenizer.CurrentLineNumber + ": " + tokenizer.CurrentLine); 339 } 340 } 341 342 private void Error(string message) { 343 throw new Exception("Error while parsing.\n" + message); 359 Error("Expected: " + expectedToken, actualToken.stringValue, tokenizer.CurrentLineNumber); 360 } 361 } 362 363 private void Error(string message, string token, int lineNumber) { 364 throw new DataFormatException("Error while parsing.\n" + message, token, lineNumber); 344 365 } 345 366 #endregion -
trunk/sources/HeuristicLab.DataAnalysis/HeuristicLab.DataAnalysis.csproj
r234 r273 51 51 </ItemGroup> 52 52 <ItemGroup> 53 <Compile Include="DataFormatException.cs" /> 53 54 <Compile Include="Dataset.cs" /> 54 55 <Compile Include="DatasetParser.cs" />
Note: See TracChangeset
for help on using the changeset viewer.