Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Formatters/InfixExpressionFormatter.cs

Last change on this file was 18220, checked in by gkronber, 3 years ago

#3136: reintegrated structure-template GP branch into trunk

File size: 17.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Globalization;
25using System.Linq;
26using System.Text;
27using HEAL.Attic;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31
32namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
33  public static class BaseInfixExpressionFormatter {
34    /// <summary>
35    ///  Performs some basic re-writing steps to simplify the code for formatting. Tree is changed.
36    ///  Removes single-argument +, * which have no effect
37    ///  Removes SubFunctions (no effect)
38    ///  Replaces variables with coefficients by an explicitly multiplication
39    ///  Replaces single-argument / with 1 / (..)
40    ///  Replaces multi-argument +, *, /, - with nested binary operations
41    ///  Rotates operations to remove necessity to group sub-expressions
42    /// </summary>
43    /// <param name="tree">The tree is changed</param>
44    public static void ConvertToBinaryLeftAssoc(ISymbolicExpressionTree tree) {
45      ConvertToBinaryLeftAssocRec(tree.Root.GetSubtree(0), tree.Root.GetSubtree(0).GetSubtree(0));
46    }
47
48    private static void ConvertToBinaryLeftAssocRec(ISymbolicExpressionTreeNode parent, ISymbolicExpressionTreeNode n) {
49      // recurse post-order
50      foreach (var subtree in n.Subtrees.ToArray()) ConvertToBinaryLeftAssocRec(n, subtree); // ToArray required as n.Subtrees is changed in method
51
52      if (n is VariableTreeNode varTreeNode && varTreeNode.Weight != 1.0) {
53        var mul = new Multiplication().CreateTreeNode();
54        var num = (NumberTreeNode)new Number().CreateTreeNode();
55        num.Value = varTreeNode.Weight;
56        varTreeNode.Weight = 1.0;
57        parent.ReplaceSubtree(n, mul);
58        mul.AddSubtree(num);
59        mul.AddSubtree(varTreeNode);
60      } else if (n.Symbol is SubFunctionSymbol) {
61        parent.ReplaceSubtree(n, n.GetSubtree(0));
62      } else if (n.SubtreeCount == 1 && (n.Symbol is Addition || n.Symbol is Multiplication || n.Symbol is And || n.Symbol is Or || n.Symbol is Xor)) {
63        // single-argument addition or multiplication has no effect -> remove
64        parent.ReplaceSubtree(n, n.GetSubtree(0));
65      } else if (n.SubtreeCount == 1 && (n.Symbol is Division)) {
66        // single-argument division is 1/f(x)
67        n.InsertSubtree(0, new Constant() { Value = 1.0 }.CreateTreeNode());
68      } else if (n.SubtreeCount > 2 && IsLeftAssocOp(n.Symbol)) {
69        // multi-argument +, -, *, / are the same as multiple binary operations (left-associative)
70        var sy = n.Symbol;
71
72        var additionalTrees = n.Subtrees.Skip(2).ToArray();
73        while (n.SubtreeCount > 2) n.RemoveSubtree(2); // keep only the first two arguments
74
75        var childIdx = parent.IndexOfSubtree(n);
76        parent.RemoveSubtree(childIdx);
77        var newChild = n;
78        // build a tree bottom to top, each time adding a subtree on the right
79        for (int i = 0; i < additionalTrees.Length; i++) {
80          var newOp = sy.CreateTreeNode();
81          newOp.AddSubtree(newChild);
82          newOp.AddSubtree(additionalTrees[i]);
83          newChild = newOp;
84        }
85        parent.InsertSubtree(childIdx, newChild);
86      } else if (n.SubtreeCount == 2 && n.GetSubtree(1).SubtreeCount == 2 &&
87                 IsAssocOp(n.Symbol) && IsOperator(n.GetSubtree(1).Symbol) &&
88                 Priority(n.Symbol) == Priority(n.GetSubtree(1).Symbol)) {
89        // f(x) <op> (g(x) <op> h(x))) is the same as  (f(x) <op> g(x)) <op> h(x) for associative <op>
90        // which is the same as f(x) <op> g(x) <op> h(x) for left-associative <op>
91        // The latter version is preferred because we do not need to write the parentheses.
92        // rotation:
93        //     (op1)              (op2)
94        //     /   \              /  \
95        //    a    (op2)       (op1)  c
96        //         /  \        /  \
97        //        b    c      a    b     
98        var op1 = n;
99        var op2 = n.GetSubtree(1);
100        var b = op2.GetSubtree(0); op2.RemoveSubtree(0);
101        op1.ReplaceSubtree(op2, b);
102        parent.ReplaceSubtree(op1, op2);
103        op2.InsertSubtree(0, op1);
104      }
105    }
106    public static void FormatRecursively(ISymbolicExpressionTreeNode node, StringBuilder strBuilder,
107                                          NumberFormatInfo numberFormat, string formatString, List<KeyValuePair<string, double>> parameters = null) {
108      // This method assumes that the tree has been converted to binary and left-assoc form (see ConvertToBinaryLeftAssocRec).
109      if (node.SubtreeCount == 0) {
110        // no subtrees
111        if (node.Symbol is Variable) {
112          var varNode = node as VariableTreeNode;
113          if (varNode.Weight != 1.0) { throw new NotSupportedException("Infix formatter does not support variables with coefficients."); }
114          AppendVariableName(strBuilder, varNode.VariableName);
115        } else if (node is INumericTreeNode numNode) {
116          var parenthesisRequired = RequiresParenthesis(node.Parent, node);
117          if (parenthesisRequired) strBuilder.Append("(");
118
119          AppendNumber(strBuilder, parameters, numNode.Value, formatString, numberFormat);
120          if (parenthesisRequired) strBuilder.Append(")");
121        } else if (node.Symbol is LaggedVariable) {
122          var varNode = node as LaggedVariableTreeNode;
123          if (!varNode.Weight.IsAlmost(1.0)) {
124            AppendNumber(strBuilder, parameters, varNode.Weight, formatString, numberFormat);
125            strBuilder.Append("*");
126          }
127
128          strBuilder.Append("LAG(");
129          AppendVariableName(strBuilder, varNode.VariableName);
130          strBuilder.Append(", ")
131                    .AppendFormat(numberFormat, "{0}", varNode.Lag)
132                    .Append(")");
133        } else if (node.Symbol is FactorVariable) {
134          var factorNode = node as FactorVariableTreeNode;
135          AppendVariableName(strBuilder, factorNode.VariableName);
136
137          strBuilder.Append("[");
138          for (int i = 0; i < factorNode.Weights.Length; i++) {
139            if (i > 0) strBuilder.Append(", ");
140            AppendNumber(strBuilder, parameters, factorNode.Weights[i], formatString, numberFormat);
141          }
142          strBuilder.Append("]");
143        } else if (node.Symbol is BinaryFactorVariable) {
144          var factorNode = node as BinaryFactorVariableTreeNode;
145          if (!factorNode.Weight.IsAlmost(1.0)) {
146            AppendNumber(strBuilder, parameters, factorNode.Weight, formatString, numberFormat);
147            strBuilder.Append("*");
148          }
149          AppendVariableName(strBuilder, factorNode.VariableName);
150          strBuilder.Append(" = ");
151          AppendVariableName(strBuilder, factorNode.VariableValue);
152        }
153      } else if (node.SubtreeCount == 1) {
154        // only functions and single-argument subtraction (=negation) or NOT are possible here.
155        var token = GetToken(node.Symbol);
156        // the only operators that are allowed with a single argument
157        if (node.Symbol is Subtraction || node.Symbol is Not) {
158          if (RequiresParenthesis(node.Parent, node)) {
159            strBuilder.Append("(");
160          }
161          strBuilder.Append(token);
162          FormatRecursively(node.GetSubtree(0), strBuilder, numberFormat, formatString, parameters);
163          if (RequiresParenthesis(node.Parent, node)) {
164            strBuilder.Append(")");
165          }
166        } else if (IsOperator(node.Symbol)) {
167          throw new FormatException("Single-argument version of " + node.Symbol.Name + " is not supported.");
168        } else {
169          // function with only one argument
170          strBuilder.Append(token);
171          strBuilder.Append("(");
172          FormatRecursively(node.GetSubtree(0), strBuilder, numberFormat, formatString, parameters);
173          strBuilder.Append(")");
174        }
175      } else if (node.SubtreeCount > 1) {
176        var token = GetToken(node.Symbol);
177        // operators
178        if (IsOperator(node.Symbol)) {
179          var parenthesisRequired = RequiresParenthesis(node.Parent, node);
180          if (parenthesisRequired) strBuilder.Append("(");
181          FormatRecursively(node.Subtrees.First(), strBuilder, numberFormat, formatString, parameters);
182
183          foreach (var subtree in node.Subtrees.Skip(1)) {
184            strBuilder.Append(" ").Append(token).Append(" ");
185            FormatRecursively(subtree, strBuilder, numberFormat, formatString, parameters);
186          }
187
188          if (parenthesisRequired) strBuilder.Append(")");
189        } else {
190          // function with multiple arguments (AQ)
191
192          strBuilder.Append(token);
193          strBuilder.Append("(");
194
195          FormatRecursively(node.Subtrees.First(), strBuilder, numberFormat, formatString, parameters);
196          foreach (var subtree in node.Subtrees.Skip(1)) {
197            strBuilder.Append(", ");
198            FormatRecursively(subtree, strBuilder, numberFormat, formatString, parameters);
199          }
200          strBuilder.Append(")");
201        }
202      }
203    }
204
205    private static int Priority(ISymbol symbol) {
206      if (symbol is Addition || symbol is Subtraction || symbol is Or || symbol is Xor) return 1;
207      if (symbol is Division || symbol is Multiplication || symbol is And) return 2;
208      if (symbol is Power || symbol is Not) return 3;
209      throw new NotSupportedException();
210    }
211
212    private static bool RequiresParenthesis(ISymbolicExpressionTreeNode parent, ISymbolicExpressionTreeNode child) {
213      if (child.SubtreeCount > 2 && IsOperator(child.Symbol)) throw new NotSupportedException("Infix formatter does not support operators with more than two children.");
214
215      // Basically: We need a parenthesis for child if the parent symbol binds stronger than child symbol.
216      if (parent.Symbol == null || parent.Symbol is ProgramRootSymbol || parent.Symbol is StartSymbol) return false;
217      if (IsFunction(parent.Symbol)) return false;
218      if (parent.SubtreeCount == 1 && (parent.Symbol is Subtraction)) return true;
219      if (child.SubtreeCount == 0) return false;
220      var parentPrio = Priority(parent.Symbol);
221      var childPrio = Priority(child.Symbol);
222      if (parentPrio > childPrio) return true;
223      else if (parentPrio == childPrio) {
224        if (IsLeftAssocOp(child.Symbol)) return parent.GetSubtree(0) != child; // (..) required only for right child for left-assoc op
225        if (IsRightAssocOp(child.Symbol)) return parent.GetSubtree(1) != child;
226      }
227      return false;
228    }
229
230    private static bool IsFunction(ISymbol symbol) {
231      // functions are formatted in prefix form e.g. sin(...)
232      return !IsOperator(symbol) && !IsLeaf(symbol);
233    }
234
235    private static bool IsLeaf(ISymbol symbol) {
236      return symbol.MaximumArity == 0;
237    }
238
239    private static bool IsOperator(ISymbol symbol) {
240      return IsLeftAssocOp(symbol) || IsRightAssocOp(symbol);
241    }
242
243    private static bool IsAssocOp(ISymbol symbol) {
244      // (a <op> b) <op> c = a <op> (b <op> c)
245      return symbol is Addition ||
246        symbol is Multiplication ||
247        symbol is And ||
248        symbol is Or ||
249        symbol is Xor;
250    }
251
252    private static bool IsLeftAssocOp(ISymbol symbol) {
253      // a <op> b <op> c = (a <op> b) <op> c
254      return symbol is Addition ||
255        symbol is Subtraction ||
256        symbol is Multiplication ||
257        symbol is Division ||
258        symbol is And ||
259        symbol is Or ||
260        symbol is Xor;
261    }
262
263    private static bool IsRightAssocOp(ISymbol symbol) {
264      // a <op> b <op> c = a <op> (b <op> c)
265      // Negation (single-argument subtraction) is also right-assoc, but we do not have a separate symbol for negation.
266      return symbol is Not ||
267             symbol is Power; // x^y^z = x^(y^z) (as in Fortran or Mathematica)
268    }
269
270    private static void AppendNumber(StringBuilder strBuilder, List<KeyValuePair<string, double>> parameters, double value, string formatString, NumberFormatInfo numberFormat) {
271      if (parameters != null) {
272        string paramKey = $"c_{parameters.Count}";
273        strBuilder.AppendFormat(CultureInfo.InvariantCulture, "{0}", paramKey);
274        parameters.Add(new KeyValuePair<string, double>(paramKey, value));
275      } else {
276        strBuilder.Append(value.ToString(formatString, numberFormat));
277      }
278    }
279
280    private static void AppendVariableName(StringBuilder strBuilder, string name) {
281      if (name.Contains("'"))
282        strBuilder.AppendFormat("\"{0}\"", name);
283      else
284        strBuilder.AppendFormat("'{0}'", name);
285    }
286
287    private static string GetToken(ISymbol symbol) {
288      var tok = InfixExpressionParser.knownSymbols.GetBySecond(symbol).FirstOrDefault();
289      if (tok == null)
290        throw new ArgumentException(string.Format("Unknown symbol {0} found.", symbol.Name));
291      return tok;
292    }
293  }
294
295  /// <summary>
296  /// Formats mathematical expressions in infix form. E.g. x1 * (3.0 * x2 + x3)
297  /// </summary>
298  [StorableType("6FE2C83D-A594-4ABF-B101-5AEAEA6D3E3D")]
299  [Item("Infix Symbolic Expression Tree Formatter",
300    "A string formatter that converts symbolic expression trees to infix expressions.")]
301  public sealed class InfixExpressionFormatter : NamedItem, ISymbolicExpressionTreeStringFormatter {
302    [StorableConstructor]
303    private InfixExpressionFormatter(StorableConstructorFlag _) : base(_) { }
304
305    private InfixExpressionFormatter(InfixExpressionFormatter original, Cloner cloner) : base(original, cloner) { }
306
307    public InfixExpressionFormatter()
308      : base() {
309      Name = ItemName;
310      Description = ItemDescription;
311    }
312
313    public override IDeepCloneable Clone(Cloner cloner) {
314      return new InfixExpressionFormatter(this, cloner);
315    }
316
317    /// <summary>
318    /// Produces an infix expression for a given expression tree.
319    /// </summary>
320    /// <param name="symbolicExpressionTree">The tree representation of the expression.</param>
321    /// <param name="numberFormat">Number format that should be used for parameters (e.g. NumberFormatInfo.InvariantInfo (default)).</param>
322    /// <param name="formatString">The format string for parameters (e.g. \"G4\" to limit to 4 digits, default is \"G\")</param>
323    /// <returns>Infix expression</returns>
324    public string Format(ISymbolicExpressionTree symbolicExpressionTree, NumberFormatInfo numberFormat,
325                         string formatString = "G") {
326      // skip root and start symbols
327      StringBuilder strBuilder = new StringBuilder();
328      var cleanTree = (ISymbolicExpressionTree)symbolicExpressionTree.Clone();
329      BaseInfixExpressionFormatter.ConvertToBinaryLeftAssoc(cleanTree);
330      BaseInfixExpressionFormatter.FormatRecursively(cleanTree.Root.GetSubtree(0).GetSubtree(0),
331        strBuilder, numberFormat, formatString);
332      return strBuilder.ToString();
333    }
334
335    public string Format(ISymbolicExpressionTree symbolicExpressionTree) {
336      return Format(symbolicExpressionTree, NumberFormatInfo.InvariantInfo);
337    }
338  }
339
340  [StorableType("54D917E8-134E-4066-9A60-2737C12D81DC")]
341  [Item("Infix String Formatter", "Formatter for symbolic expressions, which produces an infix expression " +
342                                 "as well as a list of all coefficient values")]
343  public sealed class InfixExpressionStringFormatter : NamedItem, ISymbolicExpressionTreeStringFormatter {
344    [StorableConstructor]
345    private InfixExpressionStringFormatter(StorableConstructorFlag _) : base(_) { }
346
347    private InfixExpressionStringFormatter(InfixExpressionStringFormatter original, Cloner cloner) : base(original, cloner) { }
348
349    public InfixExpressionStringFormatter() : base() {
350      Name = ItemName;
351      Description = ItemDescription;
352    }
353
354    public override IDeepCloneable Clone(Cloner cloner) {
355      return new InfixExpressionStringFormatter(this, cloner);
356    }
357
358    public string Format(ISymbolicExpressionTree symbolicExpressionTree) {
359      StringBuilder strBuilder = new StringBuilder();
360      var parameters = new List<KeyValuePair<string, double>>();
361      var cleanTree = (ISymbolicExpressionTree)symbolicExpressionTree.Clone();
362      BaseInfixExpressionFormatter.ConvertToBinaryLeftAssoc(cleanTree);
363      BaseInfixExpressionFormatter.FormatRecursively(cleanTree.Root.GetSubtree(0).GetSubtree(0),
364        strBuilder, NumberFormatInfo.InvariantInfo, "G", parameters);
365      strBuilder.Append($"{Environment.NewLine}{Environment.NewLine}");
366
367      int maxDigits = GetDigits(parameters.Count);
368      int padding = parameters.Max(x => x.Value.ToString("F12", CultureInfo.InvariantCulture).Length);
369      foreach (var param in parameters) {
370        int digits = GetDigits(int.Parse(param.Key.Substring(2)));
371        strBuilder.Append($"{param.Key}{new string(' ', maxDigits - digits)} = " +
372                          string.Format($"{{0,{padding}:F12}}", param.Value, CultureInfo.InvariantCulture) +
373                          Environment.NewLine);
374      }
375
376      return strBuilder.ToString();
377    }
378
379    private int GetDigits(int x) {
380      if (x == 0) return 1;
381      return (int)Math.Floor(Math.Log10(x) + 1);
382    }
383  }
384}
Note: See TracBrowser for help on using the repository browser.