Free cookie consent management tool by TermsFeed Policy Generator

Changeset 6440


Ignore:
Timestamp:
06/17/11 15:41:41 (13 years ago)
Author:
gkronber
Message:

#1553: implemented unit test to create and run the symbolic classification sample (mammographic masses)

Location:
trunk/sources
Files:
1 added
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationProblemData.cs

    r6223 r6440  
    185185
    186186    #region parameter properties
    187     public IValueParameter<StringValue> TargetVariableParameter {
    188       get { return (IValueParameter<StringValue>)Parameters[TargetVariableParameterName]; }
     187    public ConstrainedValueParameter<StringValue> TargetVariableParameter {
     188      get { return (ConstrainedValueParameter<StringValue>)Parameters[TargetVariableParameterName]; }
    189189    }
    190190    public IFixedValueParameter<StringMatrix> ClassNamesParameter {
  • trunk/sources/HeuristicLab/3.3/Tests/GeneticProgrammingSamplesTest.cs

    r6439 r6440  
    1717using HeuristicLab.Problems.DataAnalysis.Symbolic;
    1818using System.IO;
     19using HeuristicLab.Problems.DataAnalysis.Symbolic.Classification;
    1920
    2021namespace HeuristicLab_33.Tests {
     
    7273      #region problem configuration
    7374      SymbolicRegressionSingleObjectiveProblem symbRegProblem = new SymbolicRegressionSingleObjectiveProblem();
     75      symbRegProblem.Name = "Tower Symbolic Regression Problem";
     76      symbRegProblem.Description = "Tower Dataset (downloaded from: http://vanillamodeling.com/realproblems.html)";
    7477      // import and configure problem data
    7578      string filename = Path.GetTempFileName();
     
    139142      symbRegProblem.ValidationPartition.End = 4000;
    140143      symbRegProblem.RelativeNumberOfEvaluatedSamples.Value = 0.3;
     144      symbRegProblem.MaximumSymbolicExpressionTreeLength.Value = 150;
     145      symbRegProblem.MaximumSymbolicExpressionTreeDepth.Value = 12;
     146      symbRegProblem.MaximumFunctionDefinitions.Value = 0;
     147      symbRegProblem.MaximumFunctionArguments.Value = 0;
     148
    141149      symbRegProblem.EvaluatorParameter.Value = new SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator();
    142150      #endregion
     
    175183    }
    176184
     185    [TestMethod]
     186    public void CreateSymbolicClassificationSample() {
     187      GeneticAlgorithm ga = new GeneticAlgorithm();
     188      #region problem configuration
     189      SymbolicClassificationSingleObjectiveProblem symbClassProblem = new SymbolicClassificationSingleObjectiveProblem();
     190      symbClassProblem.Name = "Mammography Classification Problem";
     191      symbClassProblem.Description = "Mammography dataset imported from the UCI machine learning repository (http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass)";
     192      // import and configure problem data
     193      string filename = Path.GetTempFileName();
     194      using (var writer = File.CreateText(filename)) {
     195        writer.Write(HeuristicLab_33.Tests.Properties.Resources.MammographicMasses);
     196      }
     197      var mammoData = ClassificationProblemData.ImportFromFile(filename);
     198      mammoData.TargetVariableParameter.Value = mammoData.TargetVariableParameter.ValidValues
     199        .First(v => v.Value == "Severity");
     200      mammoData.InputVariables.SetItemCheckedState(
     201        mammoData.InputVariables.Single(x => x.Value == "BI-RADS"), false);
     202      mammoData.InputVariables.SetItemCheckedState(
     203        mammoData.InputVariables.Single(x => x.Value == "Age"), true);
     204      mammoData.InputVariables.SetItemCheckedState(
     205        mammoData.InputVariables.Single(x => x.Value == "Shape"), true);
     206      mammoData.InputVariables.SetItemCheckedState(
     207        mammoData.InputVariables.Single(x => x.Value == "Margin"), true);
     208      mammoData.InputVariables.SetItemCheckedState(
     209        mammoData.InputVariables.Single(x => x.Value == "Density"), true);
     210      mammoData.InputVariables.SetItemCheckedState(
     211        mammoData.InputVariables.Single(x => x.Value == "Severity"), false);
     212      mammoData.TrainingPartition.Start = 0;
     213      mammoData.TrainingPartition.End = 800;
     214      mammoData.TestPartition.Start = 800;
     215      mammoData.TestPartition.End = 961;
     216      mammoData.Name = "Data imported from mammographic_masses.csv";
     217      mammoData.Description = "Original dataset: http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass, missing values have been replaced with median values.";
     218      symbClassProblem.ProblemData = mammoData;
     219
     220      // configure grammar
     221      var grammar = new TypeCoherentExpressionGrammar();
     222      grammar.Symbols.OfType<Sine>().Single().InitialFrequency = 0.0;
     223      grammar.Symbols.OfType<Cosine>().Single().InitialFrequency = 0.0;
     224      grammar.Symbols.OfType<Tangent>().Single().InitialFrequency = 0.0;
     225      grammar.Symbols.OfType<Power>().Single().InitialFrequency = 0.0;
     226      grammar.Symbols.OfType<Root>().Single().InitialFrequency = 0.0;
     227      grammar.Symbols.OfType<TimeLag>().Single().InitialFrequency = 0.0;
     228      grammar.Symbols.OfType<Integral>().Single().InitialFrequency = 0.0;
     229      grammar.Symbols.OfType<Derivative>().Single().InitialFrequency = 0.0;
     230      grammar.Symbols.OfType<LaggedVariable>().Single().InitialFrequency = 0.0;
     231      grammar.Symbols.OfType<VariableCondition>().Single().InitialFrequency = 0.0;
     232      var varSymbol = grammar.Symbols.OfType<Variable>().Where(x => !(x is LaggedVariable)).Single();
     233      varSymbol.WeightMu = 1.0;
     234      varSymbol.WeightSigma = 1.0;
     235      varSymbol.WeightManipulatorMu = 0.0;
     236      varSymbol.WeightManipulatorSigma = 0.05;
     237      varSymbol.MultiplicativeWeightManipulatorSigma = 0.03;
     238      var constSymbol = grammar.Symbols.OfType<Constant>().Single();
     239      constSymbol.MaxValue = 20;
     240      constSymbol.MinValue = -20;
     241      constSymbol.ManipulatorMu = 0.0;
     242      constSymbol.ManipulatorSigma = 1;
     243      constSymbol.MultiplicativeManipulatorSigma = 0.03;
     244      symbClassProblem.SymbolicExpressionTreeGrammar = grammar;
     245
     246      // configure remaining problem parameters
     247      symbClassProblem.BestKnownQuality.Value = 0.0;
     248      symbClassProblem.FitnessCalculationPartition.Start = 0;
     249      symbClassProblem.FitnessCalculationPartition.End = 400;
     250      symbClassProblem.ValidationPartition.Start = 400;
     251      symbClassProblem.ValidationPartition.End = 800;
     252      symbClassProblem.RelativeNumberOfEvaluatedSamples.Value = 1;
     253      symbClassProblem.MaximumSymbolicExpressionTreeLength.Value = 100;
     254      symbClassProblem.MaximumSymbolicExpressionTreeDepth.Value = 10;
     255      symbClassProblem.MaximumFunctionDefinitions.Value = 0;
     256      symbClassProblem.MaximumFunctionArguments.Value = 0;
     257      symbClassProblem.EvaluatorParameter.Value = new SymbolicClassificationSingleObjectiveMeanSquaredErrorEvaluator();
     258      #endregion
     259      #region algorithm configuration
     260      ga.Problem = symbClassProblem;
     261      ga.Name = "Genetic Programming - Symbolic Classification";
     262      ga.Description = "A standard genetic programming algorithm to solve a classification problem (Mammographic+Mass dataset)";
     263      ga.Elites.Value = 1;
     264      ga.MaximumGenerations.Value = 100;
     265      ga.MutationProbability.Value = 0.15;
     266      ga.PopulationSize.Value = 1000;
     267      ga.Seed.Value = 0;
     268      ga.SetSeedRandomly.Value = false;
     269      var tSelector = ga.SelectorParameter.ValidValues
     270        .OfType<TournamentSelector>()
     271        .Single();
     272      tSelector.GroupSizeParameter.Value.Value = 5;
     273      ga.Selector = tSelector;
     274      var mutator = ga.MutatorParameter.ValidValues
     275        .OfType<MultiSymbolicExpressionTreeManipulator>()
     276        .Single();
     277      mutator.Operators.OfType<FullTreeShaker>().Single().ShakingFactor = 0.1;
     278      mutator.Operators.OfType<OnePointShaker>().Single().ShakingFactor = 1.0;
     279      ga.Mutator = mutator;
     280
     281      ga.Analyzer.Operators.SetItemCheckedState(
     282        ga.Analyzer.Operators
     283        .OfType<SymbolicClassificationSingleObjectiveOverfittingAnalyzer>()
     284        .Single(), false);
     285      ga.Engine = new ParallelEngine();
     286      #endregion
     287
     288      XmlGenerator.Serialize(ga, "../../SGP_SymbClass.hl");
     289
     290      RunAlgorithm(ga);
     291    }
     292
    177293
    178294    private void RunAlgorithm(IAlgorithm a) {
  • trunk/sources/HeuristicLab/3.3/Tests/HeuristicLab-3.3.Tests.csproj

    r6439 r6440  
    556556  </ItemGroup>
    557557  <ItemGroup>
     558    <Content Include="mammographic_masses.txt">
     559      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     560    </Content>
    558561    <Content Include="towerData.txt">
    559562      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
  • trunk/sources/HeuristicLab/3.3/Tests/Properties/Resources.Designer.cs

    r6439 r6440  
    6262       
    6363        /// <summary>
     64        ///   Looks up a localized string similar to BI-RADS; Age; Shape; Margin; Density; Severity
     65        ///5;67;3;5;3;1
     66        ///4;43;1;1;3;1
     67        ///5;58;4;5;3;1
     68        ///4;28;1;1;3;0
     69        ///5;74;1;5;3;1
     70        ///4;65;1;3;3;0
     71        ///4;70;3;3;3;0
     72        ///5;42;1;3;3;0
     73        ///5;57;1;5;3;1
     74        ///5;60;3;5;1;1
     75        ///5;76;1;4;3;1
     76        ///3;42;2;1;3;1
     77        ///4;64;1;3;3;0
     78        ///4;36;3;1;2;0
     79        ///4;60;2;1;2;0
     80        ///4;54;1;1;3;0
     81        ///3;52;3;4;3;0
     82        ///4;59;2;1;3;1
     83        ///4;54;1;1;3;1
     84        ///4;40;1;3;3;0
     85        ///4;66;3;3;1;1
     86        ///5;56;4;3;1;1
     87        ///4;43;1;3;3;0
     88        ///5;42;4;4;3;1
     89        ///4;59;2;4;3;1
     90        ///5;75;4;5;3;1
     91        ///2;66;1;1;3;0
     92        ///5;63;3;3;3;0
     93        ///5;45;4;5;3;1
     94        ///5;55;4;4;3;0
     95        ///4;46;1;5;2;0
     96        ///5;54;4;4;3;1
     97        ///5;57;4;4;3;1
     98        ///4; [rest of string was truncated]&quot;;.
     99        /// </summary>
     100        internal static string MammographicMasses {
     101            get {
     102                return ResourceManager.GetString("MammographicMasses", resourceCulture);
     103            }
     104        }
     105       
     106        /// <summary>
    64107        ///   Looks up a localized string similar to x1;x2;x3;x4;x5;x6;x7;x8;x9;x10;x11;x12;x13;x14;x15;x16;x17;x18;x19;x20;x21;x22;x23;x24;x25;towerResponse
    65108        ///28.0784464518229;13.939023844401;87.6339436848958;20.0777689615885;63.0026733398437;70.0181884765625;59.5551432291667;8.80629475911458;94.8240641276042;1.50430590311686;473.995963541667;165.433089192708;163.806526692708;161.395735677083;140.468115234375;129.394759114583;173.246484375;29.12998046875;7.02796274820964;124.702001953125;122.926342773437;51.7155436197917;90.4672119140625;129.585375976562;8.80 [rest of string was truncated]&quot;;.
  • trunk/sources/HeuristicLab/3.3/Tests/Properties/Resources.resx

    r6439 r6440  
    119119  </resheader>
    120120  <assembly alias="System.Windows.Forms" name="System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089" />
     121  <data name="MammographicMasses" type="System.Resources.ResXFileRef, System.Windows.Forms">
     122    <value>..\mammographic_masses.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252</value>
     123  </data>
    121124  <data name="TowerData" type="System.Resources.ResXFileRef, System.Windows.Forms">
    122125    <value>..\towerdata.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252</value>
Note: See TracChangeset for help on using the changeset viewer.