Changeset 16692 for branches/2521_ProblemRefactoring/HeuristicLab.Tests/HeuristicLab.Problems.Instances.DataAnalysis-3.3
- Timestamp:
- 03/18/19 17:24:30 (6 years ago)
- Location:
- branches/2521_ProblemRefactoring
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2521_ProblemRefactoring
- Property svn:ignore
-
old new 24 24 protoc.exe 25 25 obj 26 .vs
-
- Property svn:mergeinfo changed
- Property svn:ignore
-
branches/2521_ProblemRefactoring/HeuristicLab.Tests
- Property svn:mergeinfo changed
-
branches/2521_ProblemRefactoring/HeuristicLab.Tests/HeuristicLab.Problems.Instances.DataAnalysis-3.3/ClassificationInstanceProviderTest.cs
r12012 r16692 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 5Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. -
branches/2521_ProblemRefactoring/HeuristicLab.Tests/HeuristicLab.Problems.Instances.DataAnalysis-3.3/RegressionInstanceProviderTest.cs
r12012 r16692 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 5Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. -
branches/2521_ProblemRefactoring/HeuristicLab.Tests/HeuristicLab.Problems.Instances.DataAnalysis-3.3/TableFileParserTest.cs
r12012 r16692 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 5Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 21 21 22 22 using System; 23 using System.Collections.Generic; 24 using System.Globalization; 23 25 using System.IO; 26 using System.Text; 24 27 using Microsoft.VisualStudio.TestTools.UnitTesting; 25 28 … … 45 48 Assert.AreEqual(4, parser.Columns); 46 49 Assert.AreEqual(parser.Values[3][0], 3.14); 47 } finally { 50 } 51 finally { 48 52 File.Delete(tempFileName); 49 53 } … … 69 73 Assert.AreEqual(4, parser.Columns); 70 74 Assert.AreEqual(parser.Values[3][0], 3.14); 71 } finally { 75 } 76 finally { 72 77 File.Delete(tempFileName); 73 78 } … … 92 97 Assert.AreEqual(4, parser.Columns); 93 98 Assert.AreEqual(parser.Values[3][0], 3.14); 94 } finally { 99 } 100 finally { 95 101 File.Delete(tempFileName); 96 102 } … … 116 122 Assert.AreEqual(4, parser.Columns); 117 123 Assert.AreEqual(parser.Values[3][0], 3.14); 118 } finally { 124 } 125 finally { 119 126 File.Delete(tempFileName); 120 127 } … … 139 146 Assert.AreEqual(4, parser.Columns); 140 147 Assert.AreEqual((double)parser.Values[3][0], 3); 141 } finally { 148 } 149 finally { 142 150 File.Delete(tempFileName); 143 151 } … … 163 171 Assert.AreEqual(4, parser.Columns); 164 172 Assert.AreEqual((double)parser.Values[3][0], 3); 165 } finally { 173 } 174 finally { 166 175 File.Delete(tempFileName); 167 176 } … … 186 195 Assert.AreEqual(4, parser.Columns); 187 196 Assert.AreEqual((double)parser.Values[3][0], 3); 188 } finally { 197 } 198 finally { 189 199 File.Delete(tempFileName); 190 200 } … … 209 219 Assert.AreEqual(4, parser.Columns); 210 220 Assert.AreEqual((double)parser.Values[3][0], 3); 211 } finally { 221 } 222 finally { 212 223 File.Delete(tempFileName); 213 224 } … … 233 244 Assert.AreEqual(4, parser.Columns); 234 245 Assert.AreEqual((double)parser.Values[3][0], 3); 235 } finally { 246 } 247 finally { 236 248 File.Delete(tempFileName); 237 249 } … … 257 269 Assert.AreEqual(4, parser.Columns); 258 270 Assert.AreEqual((double)parser.Values[3][0], 3); 259 } finally { 271 } 272 finally { 260 273 File.Delete(tempFileName); 261 274 } … … 281 294 Assert.AreEqual(4, parser.Columns); 282 295 Assert.AreEqual((double)parser.Values[3][0], 3.14); 283 } finally { 296 } 297 finally { 284 298 File.Delete(tempFileName); 285 299 } … … 305 319 Assert.AreEqual(4, parser.Columns); 306 320 Assert.AreEqual((double)parser.Values[3][0], 3.14); 307 } finally { 321 } 322 finally { 308 323 File.Delete(tempFileName); 309 324 } … … 328 343 Assert.AreEqual(4, parser.Columns); 329 344 Assert.AreEqual((double)parser.Values[3][0], 3.14); 330 } finally { 345 } 346 finally { 331 347 File.Delete(tempFileName); 332 348 } … … 352 368 Assert.AreEqual(4, parser.Columns); 353 369 Assert.AreEqual((double)parser.Values[3][0], 3.14); 354 } finally { 370 } 371 finally { 355 372 File.Delete(tempFileName); 356 373 } … … 375 392 Assert.AreEqual(4, parser.Columns); 376 393 Assert.AreEqual((double)parser.Values[3][0], 3); 377 } finally { 394 } 395 finally { 378 396 File.Delete(tempFileName); 379 397 } … … 399 417 Assert.AreEqual(4, parser.Columns); 400 418 Assert.AreEqual((double)parser.Values[3][0], 3); 401 } finally { 419 } 420 finally { 402 421 File.Delete(tempFileName); 403 422 } … … 422 441 Assert.AreEqual(4, parser.Rows); 423 442 Assert.AreEqual(4, parser.Columns); 424 } finally { 443 } 444 finally { 425 445 File.Delete(tempFileName); 426 446 } … … 445 465 Assert.AreEqual(4, parser.Columns); 446 466 Assert.AreEqual((double)parser.Values[3][0], 3.14); 447 } finally { 467 } 468 finally { 448 469 File.Delete(tempFileName); 449 470 } … … 469 490 Assert.AreEqual(4, parser.Columns); 470 491 Assert.AreEqual((double)parser.Values[3][0], 3.14); 471 } finally { 492 } 493 finally { 472 494 File.Delete(tempFileName); 473 495 } … … 492 514 Assert.AreEqual(4, parser.Columns); 493 515 Assert.AreEqual((double)parser.Values[3][0], 3.14); 494 } finally { 516 } 517 finally { 495 518 File.Delete(tempFileName); 496 519 } … … 516 539 Assert.AreEqual(4, parser.Columns); 517 540 Assert.AreEqual((double)parser.Values[3][0], 3.14); 518 } finally { 541 } 542 finally { 519 543 File.Delete(tempFileName); 520 544 } … … 537 561 Assert.AreEqual(3, parser.Rows); 538 562 Assert.AreEqual(4507, parser.Columns); 539 } finally { 563 } 564 finally { 540 565 File.Delete(tempFileName); 541 566 } … … 560 585 Assert.AreEqual(4, parser.Columns); 561 586 Assert.AreEqual((double)parser.Values[3][0], 3); 562 } finally { 587 } 588 finally { 563 589 File.Delete(tempFileName); 564 590 } … … 584 610 Assert.AreEqual(4, parser.Columns); 585 611 Assert.AreEqual((double)parser.Values[3][0], 3); 586 } finally { 587 File.Delete(tempFileName); 612 } 613 finally { 614 File.Delete(tempFileName); 615 } 616 } 617 618 619 [TestMethod] 620 [TestCategory("Problems.Instances")] 621 [TestProperty("Time", "short")] 622 public void ParseWithColumnTypeConversionDE() { 623 // If first entry of a column can be parsed as a double we assume all values are doubles. 624 // However, if any of the following entries cannot be parsed as a double we convert the whole column to a string column. 625 // Special care needs to be taken with missing values, NaN (n.def.) and infinity values. 626 // We only support DE-DE and InvariantCulture number formats 627 string tempFileName = Path.GetTempFileName(); 628 var deCultureInfo = CultureInfo.GetCultureInfo("DE-DE"); 629 WriteToFile(tempFileName, 630 "str\tdbl\tdbl\tdbl" + Environment.NewLine + 631 "1,3\t1,3\t0\t3" + Environment.NewLine + 632 "1,3\t\t0\t0" + Environment.NewLine + 633 "s\t" + double.NaN.ToString(deCultureInfo) + "\t0\t0" + Environment.NewLine + // double.NaN might have a different string representation on different systems (even when using the same CultureInfo) 634 "s\t" + double.PositiveInfinity.ToString(deCultureInfo) + "\t0\t0" + Environment.NewLine + 635 "s\t" + double.NegativeInfinity.ToString(deCultureInfo) + "\t0\t0" + Environment.NewLine + 636 "s\t0\t0\t0"); 637 TableFileParser parser = new TableFileParser(); 638 try { 639 parser.Parse(tempFileName, 640 deCultureInfo.NumberFormat, 641 deCultureInfo.DateTimeFormat, 642 '\t', 643 true); 644 Assert.AreEqual(6, parser.Rows); 645 Assert.AreEqual(4, parser.Columns); 646 Assert.IsTrue(parser.Values[0] is List<string>); 647 Assert.IsTrue(parser.Values[1] is List<double>); 648 Assert.IsTrue(parser.Values[2] is List<double>); 649 Assert.IsTrue(parser.Values[3] is List<double>); 650 Assert.IsTrue(double.IsNaN((double)parser.Values[1][1])); // missing value 651 Assert.IsTrue(double.IsNaN((double)parser.Values[1][2])); 652 Assert.IsTrue(double.IsPositiveInfinity((double)parser.Values[1][3])); // NOTE: in DE-DE NumberFormat just "unendlich" is not allowed (compare with InvariantCulture) 653 Assert.IsTrue(double.IsNegativeInfinity((double)parser.Values[1][4])); 654 } 655 finally { 656 File.Delete(tempFileName); 657 } 658 } 659 660 [TestMethod] 661 [TestCategory("Problems.Instances")] 662 [TestProperty("Time", "short")] 663 public void ParseWithColumnTypeConversionInvariant() { 664 // see ParseWithColumnTypeConversionDE above 665 // same routine only using invariant culture 666 string tempFileName = Path.GetTempFileName(); 667 WriteToFile(tempFileName, 668 @"str,dbl,dbl,dbl 669 1.3,1.3,0,3 670 1.3,,0,0 671 s,NaN,0,0 672 s,Infinity,0,0 673 s,-Infinity,0,0 674 s,0,0,0"); 675 TableFileParser parser = new TableFileParser(); 676 try { 677 parser.Parse(tempFileName, 678 CultureInfo.InvariantCulture.NumberFormat, 679 CultureInfo.InvariantCulture.DateTimeFormat, 680 ',', 681 parser.AreColumnNamesInFirstLine(tempFileName)); 682 Assert.AreEqual(6, parser.Rows); 683 Assert.AreEqual(4, parser.Columns); 684 Assert.IsTrue(parser.Values[0] is List<string>); 685 Assert.IsTrue(parser.Values[1] is List<double>); 686 Assert.IsTrue(parser.Values[2] is List<double>); 687 Assert.IsTrue(parser.Values[3] is List<double>); 688 Assert.IsTrue(double.IsNaN((double)parser.Values[1][1])); // missing value 689 Assert.IsTrue(double.IsNaN((double)parser.Values[1][2])); 690 Assert.IsTrue(double.IsPositiveInfinity((double)parser.Values[1][3])); // NOTE: in InvariantCulture +Infinity is not allowed (compare with DE-DE) 691 Assert.IsTrue(double.IsNegativeInfinity((double)parser.Values[1][4])); 692 } 693 finally { 694 File.Delete(tempFileName); 695 } 696 } 697 698 699 [TestMethod] 700 [TestCategory("Problems.Instances")] 701 [TestProperty("Time", "short")] 702 public void ParseWithTypeConversion() { 703 // the parser tries to determine the column type (double, datetime, string) by looking at the values in the first few rows 704 // if the values are of a different type then the type of the column is converted 705 { 706 // case 1 707 // default for values is double and therefore a column with all missing values should be List<double> and contain NaN 708 var tmpFileName = Path.GetTempFileName(); 709 WriteToFile(tmpFileName, 710 @"stringCol,note 711 ,missing val 712 3.14,double 713 "); 714 715 TableFileParser parser = new TableFileParser(); 716 try { 717 parser.Parse(tmpFileName, 718 CultureInfo.InvariantCulture.NumberFormat, 719 CultureInfo.InvariantCulture.DateTimeFormat, 720 separator: ',', columnNamesInFirstLine: true); 721 Assert.IsTrue(parser.Values[0] is List<double>); 722 Assert.AreEqual(double.NaN, parser.Values[0][0]); 723 Assert.AreEqual(3.14, parser.Values[0][1]); 724 } 725 finally { 726 File.Delete(tmpFileName); 727 } 728 729 } 730 731 { 732 // case 2 733 // 'The first missing values are replaced with double.NaN while the last ones with string.Empty.' 734 735 var tmpFileName = Path.GetTempFileName(); 736 WriteToFile(tmpFileName, 737 @"stringCol,note 738 ,missing val 739 3.14, 740 ,missing val 741 str,a string --> column is converted to List<string> 742 ,missing val 743 "); 744 745 TableFileParser parser = new TableFileParser(); 746 try { 747 parser.Parse(tmpFileName, 748 CultureInfo.InvariantCulture.NumberFormat, 749 CultureInfo.InvariantCulture.DateTimeFormat, 750 separator: ',', columnNamesInFirstLine: true); 751 Assert.IsTrue(parser.Values[0] is List<string>); 752 Assert.AreEqual(string.Empty, parser.Values[0][0]); 753 Assert.AreEqual("3.14", parser.Values[0][1]); 754 Assert.AreEqual(string.Empty, parser.Values[0][2]); 755 Assert.AreEqual("str", parser.Values[0][3]); 756 Assert.AreEqual(string.Empty, parser.Values[0][4]); 757 } 758 finally { 759 File.Delete(tmpFileName); 760 } 761 } 762 763 { 764 // case 3 765 // DateTime conversion to strings 766 var tmpFileName = Path.GetTempFileName(); 767 WriteToFile(tmpFileName, 768 @"stringCol,note 769 ,missing val 770 3.1.2016, 771 ,missing val 772 str,a string --> column is converted to List<string> 773 ,missing val 774 "); 775 776 TableFileParser parser = new TableFileParser(); 777 try { 778 parser.Parse(tmpFileName, 779 CultureInfo.InvariantCulture.NumberFormat, 780 CultureInfo.InvariantCulture.DateTimeFormat, 781 separator: ',', columnNamesInFirstLine: true); 782 Assert.IsTrue(parser.Values[0] is List<string>); 783 Assert.AreEqual(string.Empty, parser.Values[0][0]); 784 Assert.AreEqual("3.1.2016", parser.Values[0][1]); 785 Assert.AreEqual(string.Empty, parser.Values[0][2]); 786 Assert.AreEqual("str", parser.Values[0][3]); 787 Assert.AreEqual(string.Empty, parser.Values[0][4]); 788 } 789 finally { 790 File.Delete(tmpFileName); 791 } 792 } 793 } 794 795 [TestMethod] 796 [TestCategory("Problems.Instances")] 797 [TestProperty("Time", "short")] 798 public void ParseDateTime() { 799 { 800 // case 1 dates and datetimes should be parsed as datetime column 801 var tmpFileName = Path.GetTempFileName(); 802 WriteToFile(tmpFileName, 803 @"stringCol,note 804 19.6.2016,date 805 19.6.2016 8:15,datetime 806 "); 807 808 TableFileParser parser = new TableFileParser(); 809 try { 810 parser.Parse(tmpFileName, 811 CultureInfo.GetCultureInfo("de-de").NumberFormat, 812 CultureInfo.GetCultureInfo("de-de").DateTimeFormat, 813 separator: ',', columnNamesInFirstLine: true); 814 Assert.IsTrue(parser.Values[0] is List<DateTime>); 815 Assert.AreEqual(new DateTime(2016, 6, 19), parser.Values[0][0]); 816 Assert.AreEqual(new DateTime(2016, 6, 19, 8, 15, 0), parser.Values[0][1]); 817 818 WriteToFile(tmpFileName, 819 @"stringCol,note 820 2016/6/19,date 821 2016/6/19 8:15,datetime 822 "); 823 824 parser.Parse(tmpFileName, 825 CultureInfo.InvariantCulture.NumberFormat, 826 CultureInfo.InvariantCulture.DateTimeFormat, 827 separator: ',', columnNamesInFirstLine: true); 828 Assert.IsTrue(parser.Values[0] is List<DateTime>); 829 Assert.AreEqual(new DateTime(2016, 6, 19), parser.Values[0][0]); 830 Assert.AreEqual(new DateTime(2016, 6, 19, 8, 15, 0), parser.Values[0][1]); 831 } 832 833 finally { 834 File.Delete(tmpFileName); 835 } 836 } 837 838 { 839 // case 2 never parse time values as datetimes 840 var tmpFileName = Path.GetTempFileName(); 841 WriteToFile(tmpFileName, 842 @"stringCol,note 843 8:15,time value 844 9:40,time value 845 "); 846 847 TableFileParser parser = new TableFileParser(); 848 try { 849 parser.Parse(tmpFileName, 850 CultureInfo.InvariantCulture.NumberFormat, 851 CultureInfo.InvariantCulture.DateTimeFormat, 852 separator: ',', columnNamesInFirstLine: true); 853 Assert.IsTrue(parser.Values[0] is List<string>); // time values should be parsed as strings 854 Assert.AreEqual("8:15", parser.Values[0][0]); 855 Assert.AreEqual("9:40", parser.Values[0][1]); 856 } 857 finally { 858 File.Delete(tmpFileName); 859 } 860 } 861 } 862 863 864 865 [TestMethod] 866 [TestCategory("Problems.Instances")] 867 [TestProperty("Time", "short")] 868 public void CheckTypeConversionAndLongFiles() { 869 { 870 // case 1 incorrect input after 500 rows should lead to exceptions 871 var tmpFileName = Path.GetTempFileName(); 872 // create input data 873 var sb = new StringBuilder(); 874 sb.AppendLine("col1,col2"); 875 for (int r = 0; r < 2000; r++) { 876 sb.AppendLine("3.15, 3.15"); 877 } 878 // add a row with only one input value 879 sb.AppendLine("3.15"); 880 881 WriteToFile(tmpFileName, sb.ToString()); 882 883 TableFileParser parser = new TableFileParser(); 884 try { 885 parser.Parse(tmpFileName, 886 CultureInfo.InvariantCulture.NumberFormat, 887 CultureInfo.InvariantCulture.DateTimeFormat, 888 separator: ',', columnNamesInFirstLine: true); 889 // Parse should fail with an exception 890 Assert.Fail("expected exception TableFileParser.DataFormatException"); 891 } 892 catch (IOException) { 893 // ignore the expected exception 894 } 895 896 finally { 897 File.Delete(tmpFileName); 898 } 899 } 900 { 901 // case 2 902 var tmpFileName = Path.GetTempFileName(); 903 // create input data 904 var sb = new StringBuilder(); 905 sb.AppendLine("doubleCol,stringCol"); 906 for (int r = 0; r < 2000; r++) { 907 sb.AppendLine("3.15, 3.15"); 908 } 909 // add a row with a string value --> the column should be converted to string 910 sb.AppendLine("3.15,str"); 911 912 WriteToFile(tmpFileName, sb.ToString()); 913 914 TableFileParser parser = new TableFileParser(); 915 try { 916 parser.Parse(tmpFileName, 917 CultureInfo.InvariantCulture.NumberFormat, 918 CultureInfo.InvariantCulture.DateTimeFormat, 919 separator: ',', columnNamesInFirstLine: true); 920 Assert.IsTrue(parser.Values[0] is List<double>); 921 Assert.IsTrue(parser.Values[1] is List<string>); 922 Assert.AreEqual(parser.Values[1][0], "3.15"); 923 Assert.AreEqual(parser.Values[1][2000], "str"); 924 } 925 926 finally { 927 File.Delete(tmpFileName); 928 } 588 929 } 589 930 }
Note: See TracChangeset
for help on using the changeset viewer.