#region License Information
/* HeuristicLab
* Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using HEAL.Attic;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.Optimization;
using HeuristicLab.Parameters;
using System;
using System.Collections.Generic;
using System.Linq;
using DataRow = HeuristicLab.Analysis.DataRow;
using DataTable = HeuristicLab.Analysis.DataTable;
namespace HeuristicLab.Problems.DataAnalysis.Symbolic
{
[Item("Poly-10 building blocks analyzer", "An analyzer which attempts to identify parts of the Poly-10 formula")]
[StorableType("FA93D06D-B7CE-428A-8B22-ACB9A2BCE3CB")]
public class SymbolicDataAnalysisPoly10Analyzer : SymbolicDataAnalysisAnalyzer
{
private const string SymbolicDataAnalysisTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
private const string ProblemDataParameterName = "ProblemData";
private const string GenerationsParameterName = "Generations";
private const string PhenotypicSimilarityThresholdParameterName = "PhenotypicSimilarityThreshold";
private const string UpdateCounterParameterName = "UpdateCounter";
private const string UpdateIntervalParameterName = "UpdateInterval";
private const string BuildingBlocksFrequenciesTableName = "Building blocks frequencies";
// store evaluations of building blocks for phenotypic matching
private readonly Dictionary> evaluationMap = new Dictionary>();
private readonly Dictionary fragmentMap = new Dictionary();
private readonly Dictionary prettyLabels = new Dictionary();
private readonly SymbolicExpressionImporter importer = new SymbolicExpressionImporter();
#region Parameters
public IValueParameter PhenotypicSimilarityThresholdParameter {
get { return (IValueParameter)Parameters[PhenotypicSimilarityThresholdParameterName]; }
}
public ILookupParameter SymbolicDataAnalysisTreeInterpreterParameter {
get { return (ILookupParameter)Parameters[SymbolicDataAnalysisTreeInterpreterParameterName]; }
}
public ILookupParameter ProblemDataParameter {
get { return (ILookupParameter)Parameters[ProblemDataParameterName]; }
}
public ILookupParameter GenerationsParameter {
get { return (ILookupParameter)Parameters[GenerationsParameterName]; }
}
public ValueParameter UpdateCounterParameter {
get { return (ValueParameter)Parameters[UpdateCounterParameterName]; }
}
public ValueParameter UpdateIntervalParameter {
get { return (ValueParameter)Parameters[UpdateIntervalParameterName]; }
}
#endregion
#region Parameter properties
public double PhenotypicSimilarityThreshold {
get { return PhenotypicSimilarityThresholdParameter.Value.Value; }
set { PhenotypicSimilarityThresholdParameter.Value.Value = value; }
}
public int UpdateCounter {
get { return UpdateCounterParameter.Value.Value; }
set { UpdateCounterParameter.Value.Value = value; }
}
public int UpdateInterval {
get { return UpdateIntervalParameter.Value.Value; }
set { UpdateIntervalParameter.Value.Value = value; }
}
#endregion
public SymbolicDataAnalysisPoly10Analyzer()
{
#region Add parameters
Parameters.Add(new LookupParameter(ProblemDataParameterName));
Parameters.Add(new LookupParameter(SymbolicDataAnalysisTreeInterpreterParameterName));
Parameters.Add(new LookupParameter(GenerationsParameterName));
Parameters.Add(new ValueParameter(PhenotypicSimilarityThresholdParameterName, "The phenotypic similarity threshold", new DoubleValue(0.9)));
Parameters.Add(new ValueParameter(UpdateCounterParameterName, new IntValue(0)));
Parameters.Add(new ValueParameter(UpdateIntervalParameterName, new IntValue(1)));
#endregion
}
[StorableConstructor]
protected SymbolicDataAnalysisPoly10Analyzer(StorableConstructorFlag _) : base(_) { }
protected SymbolicDataAnalysisPoly10Analyzer(SymbolicDataAnalysisPoly10Analyzer original, Cloner cloner)
: base(original, cloner)
{
}
public override IDeepCloneable Clone(Cloner cloner)
{
return new SymbolicDataAnalysisPoly10Analyzer(this, cloner);
}
new public bool EnabledByDefault {
get { return false; }
}
public override IOperation Apply()
{
#region Update counter & update interval
UpdateCounter++;
if (UpdateCounter != UpdateInterval)
{
return base.Apply();
}
UpdateCounter = 0;
#endregion
int generations = GenerationsParameter.ActualValue.Value;
if (generations == 0)
InitializeBuildingBlockCollection();
var results = ResultCollectionParameter.ActualValue;
var trees = SymbolicExpressionTreeParameter.ActualValue;
var interpreter = (SymbolicDataAnalysisExpressionTreeLinearInterpreter)SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
var dataset = ProblemDataParameter.ActualValue.Dataset;
var rows = ProblemDataParameter.ActualValue.TrainingIndices.ToList();
var bbFrequencies = evaluationMap.Keys.ToDictionary(x => x, x => 0);
foreach (var key in evaluationMap.Keys)
{
var bb = fragmentMap[key];
int len = bb.GetLength();
foreach (var t in trees)
{
var root = t.Root.GetSubtree(0).GetSubtree(0);
var nodes = root.IterateNodesPrefix().Where(x => x.GetLength() > len).ToList();
for (int i = 0; i < nodes.Count; ++i)
{
var s = nodes[i];
var values = interpreter.GetValues(s, dataset, rows);
OnlineCalculatorError error;
var r = OnlinePearsonsRCalculator.Calculate(values, evaluationMap[key], out error);
var r2 = error == OnlineCalculatorError.None ? r * r : double.NaN;
if (!double.IsNaN(r2) && r2 >= PhenotypicSimilarityThreshold)
{
bbFrequencies[key]++;
i += s.GetLength();
}
}
}
}
var table = (DataTable)results[BuildingBlocksFrequenciesTableName].Value;
foreach (var pair in bbFrequencies)
{
var formatter = new SymbolicExpressionTreeStringFormatter();
// var label = formatter.Format(fragmentMap[pair.Key]) + "(" + prettyLabels[pair.Key] + ")";
var label = prettyLabels[pair.Key];
if (table.Rows.ContainsKey(label))
{
var row = table.Rows[label];
row.Values.Add(pair.Value);
}
}
return base.Apply();
}
private void InitializeBuildingBlockCollection()
{
#region Add building blocks
// building blocks
const string x1 = "(variable 1 X1)";
const string x2 = "(variable 1 X2)";
const string x3 = "(variable 1 X3)";
const string x4 = "(variable 1 X4)";
const string x5 = "(variable 1 X5)";
const string x6 = "(variable 1 X6)";
const string x7 = "(variable 1 X7)";
// x8 is never used in the formula
// const string x8 = "(variable 1 X8)";
const string x9 = "(variable 1 X9)";
const string x10 = "(variable 1 X10)";
string s1 = String.Format("(* {0} {1})", x1, x2);
string s2 = String.Format("(* {0} {1})", x3, x4);
string s3 = String.Format("(* {0} {1})", x5, x6);
string s4 = String.Format("(* (* {0} {1}) {2})", x1, x7, x9);
string s5 = String.Format("(* (* {0} {1}) {2})", x3, x6, x10);
string s6 = String.Format("(+ {0} {1})", s1, s2); // x1x2 + x3x4
string s7 = String.Format("(+ {0} {1})", s1, s3); // x1x2 + x5x6
string s8 = String.Format("(+ {0} {1})", s2, s3); // x3x4 + x5x6
string s9 = String.Format("(+ (+ {0} {1}) {2})", s1, s2, s3); // x1x2 + x3x4 + x5x6
string s10 = String.Format("(+ (+ {0} {1}) {2})", s4, s5, s9); // x1x2 + x3x4 + x5x6 + x1x7x9 + x3x6x10
prettyLabels[s1] = "X1*X2";
prettyLabels[s2] = "X3*X4";
prettyLabels[s3] = "X5*X6";
prettyLabels[s4] = "X1*X7*X9";
prettyLabels[s5] = "X3*X6*X10";
prettyLabels[s6] = prettyLabels[s1] + " + " + prettyLabels[s2];
prettyLabels[s7] = prettyLabels[s1] + " + " + prettyLabels[s3];
prettyLabels[s8] = prettyLabels[s2] + " + " + prettyLabels[s3];
prettyLabels[s9] = prettyLabels[s1] + " + " + prettyLabels[s2] + " + " + prettyLabels[s3];
prettyLabels[s10] = prettyLabels[s9] + " + " + prettyLabels[s4] + " + " + prettyLabels[s5];
#endregion
var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
var dataset = ProblemDataParameter.ActualValue.Dataset;
var rows = ProblemDataParameter.ActualValue.TrainingIndices.ToList();
foreach (var s in new[] { s1, s2, s3, s4, s5, s6, s7, s8, s9, s10 })
{
if (evaluationMap.ContainsKey(s)) continue;
var t = importer.Import(s);
evaluationMap.Add(s, interpreter.GetSymbolicExpressionTreeValues(t, dataset, rows).ToList());
fragmentMap.Add(s, t.Root.GetSubtree(0).GetSubtree(0));
}
var results = ResultCollectionParameter.ActualValue;
DataTable table;
if (!results.ContainsKey(BuildingBlocksFrequenciesTableName))
{
table = new DataTable(BuildingBlocksFrequenciesTableName);
results.Add(new Result(BuildingBlocksFrequenciesTableName, table));
}
else
{
table = (DataTable)results[BuildingBlocksFrequenciesTableName].Value;
}
table.Rows.Clear();
foreach (var key in evaluationMap.Keys)
{
table.Rows.Add(new DataRow(prettyLabels[key]) { VisualProperties = { StartIndexZero = true } });
}
}
}
}