Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Formatters/InfixExpressionFormatter.cs @ 18211

Last change on this file since 18211 was 18211, checked in by gkronber, 2 years ago

#3145: refactored infix formatter to improve output (less parenthesis) and added unit tests to test that the infix formatter works correclty.

File size: 17.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Globalization;
25using System.Linq;
26using System.Text;
27using HEAL.Attic;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31
32namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
33  public static class BaseInfixExpressionFormatter {
34    /// <summary>
35    ///  Performs some basic re-writing steps to simplify the code for formatting. Tree is changed.
36    ///  Removes single-argument +, * which have no effect
37    ///  Replaces variables with coefficients by an explicitly multiplication
38    ///  Replaces single-argument / with 1 / (..)
39    ///  Replaces multi-argument +, *, /, - with nested binary operations
40    ///  Rotates operations to remove necessity to group sub-expressions
41    /// </summary>
42    /// <param name="tree">The tree is changed</param>
43    public static void ConvertToBinaryLeftAssoc(ISymbolicExpressionTree tree) {
44      ConvertToBinaryLeftAssocRec(tree.Root.GetSubtree(0), tree.Root.GetSubtree(0).GetSubtree(0));
45    }
46
47    private static void ConvertToBinaryLeftAssocRec(ISymbolicExpressionTreeNode parent, ISymbolicExpressionTreeNode n) {
48      // recurse post-order
49      foreach (var subtree in n.Subtrees.ToArray()) ConvertToBinaryLeftAssocRec(n, subtree); // ToArray required as n.Subtrees is changed in method
50
51      if (n is VariableTreeNode varTreeNode && varTreeNode.Weight != 1.0) {
52        var mul = new Multiplication().CreateTreeNode();
53        var num = (NumberTreeNode)new Number().CreateTreeNode();
54        num.Value = varTreeNode.Weight;
55        varTreeNode.Weight = 1.0;
56        parent.ReplaceSubtree(n, mul);
57        mul.AddSubtree(num);
58        mul.AddSubtree(varTreeNode);
59      } else if (n.SubtreeCount == 1 && (n.Symbol is Addition || n.Symbol is Multiplication || n.Symbol is And || n.Symbol is Or || n.Symbol is Xor)) {
60        // single-argument addition or multiplication has no effect -> remove
61        parent.ReplaceSubtree(n, n.GetSubtree(0));
62      } else if (n.SubtreeCount == 1 && (n.Symbol is Division)) {
63        // single-argument division is 1/f(x)
64        n.InsertSubtree(0, new Constant() { Value = 1.0 }.CreateTreeNode());
65      } else if (n.SubtreeCount > 2 && IsLeftAssocOp(n.Symbol)) {
66        // multi-argument +, -, *, / are the same as multiple binary operations (left-associative)
67        var sy = n.Symbol;
68
69        var additionalTrees = n.Subtrees.Skip(2).ToArray();
70        while (n.SubtreeCount > 2) n.RemoveSubtree(2); // keep only the first two arguments
71
72        var childIdx = parent.IndexOfSubtree(n);
73        parent.RemoveSubtree(childIdx);
74        var newChild = n;
75        // build a tree bottom to top, each time adding a subtree on the right
76        for (int i = 0; i < additionalTrees.Length; i++) {
77          var newOp = sy.CreateTreeNode();
78          newOp.AddSubtree(newChild);
79          newOp.AddSubtree(additionalTrees[i]);
80          newChild = newOp;
81        }
82        parent.InsertSubtree(childIdx, newChild);
83      } else if (n.SubtreeCount == 2 && n.GetSubtree(1).SubtreeCount == 2 &&
84                 IsAssocOp(n.Symbol) && IsOperator(n.GetSubtree(1).Symbol) &&
85                 Priority(n.Symbol) == Priority(n.GetSubtree(1).Symbol)) {
86        // f(x) <op> (g(x) <op> h(x))) is the same as  (f(x) <op> g(x)) <op> h(x) for associative <op>
87        // which is the same as f(x) <op> g(x) <op> h(x) for left-associative <op>
88        // The latter version is preferred because we do not need to write the parentheses.
89        // rotation:
90        //     (op1)              (op2)
91        //     /   \              /  \
92        //    a    (op2)       (op1)  c
93        //         /  \        /  \
94        //        b    c      a    b     
95        var op1 = n;
96        var op2 = n.GetSubtree(1);
97        var b = op2.GetSubtree(0); op2.RemoveSubtree(0);
98        op1.ReplaceSubtree(op2, b);
99        parent.ReplaceSubtree(op1, op2);
100        op2.InsertSubtree(0, op1);
101      }
102    }
103    public static void FormatRecursively(ISymbolicExpressionTreeNode node, StringBuilder strBuilder,
104                                          NumberFormatInfo numberFormat, string formatString, List<KeyValuePair<string, double>> parameters = null) {
105      // This method assumes that the tree has been converted to binary and left-assoc form (see ConvertToBinaryLeftAssocRec).
106      // An exception is thrown if the tree has a different shape.
107
108      if (node.SubtreeCount == 0) {
109        // no subtrees
110        if (node.Symbol is Variable) {
111          var varNode = node as VariableTreeNode;
112          if (varNode.Weight != 1.0) { throw new NotSupportedException("Infix formatter does not support variables with coefficients."); }
113          AppendVariableName(strBuilder, varNode.VariableName);
114        } else if (node is INumericTreeNode numNode) {
115          var parenthesisRequired = RequiresParenthesis(node.Parent, node);
116          if (parenthesisRequired) strBuilder.Append("(");
117
118          AppendNumber(strBuilder, parameters, numNode.Value, formatString, numberFormat);
119          if (parenthesisRequired) strBuilder.Append(")");
120        } else if (node.Symbol is LaggedVariable) {
121          var varNode = node as LaggedVariableTreeNode;
122          if (!varNode.Weight.IsAlmost(1.0)) {
123            AppendNumber(strBuilder, parameters, varNode.Weight, formatString, numberFormat);
124            strBuilder.Append("*");
125          }
126
127          strBuilder.Append("LAG(");
128          AppendVariableName(strBuilder, varNode.VariableName);
129          strBuilder.Append(", ")
130                    .AppendFormat(numberFormat, "{0}", varNode.Lag)
131                    .Append(")");
132        } else if (node.Symbol is FactorVariable) {
133          var factorNode = node as FactorVariableTreeNode;
134          AppendVariableName(strBuilder, factorNode.VariableName);
135
136          strBuilder.Append("[");
137          for (int i = 0; i < factorNode.Weights.Length; i++) {
138            if (i > 0) strBuilder.Append(", ");
139            AppendNumber(strBuilder, parameters, factorNode.Weights[i], formatString, numberFormat);
140          }
141          strBuilder.Append("]");
142        } else if (node.Symbol is BinaryFactorVariable) {
143          var factorNode = node as BinaryFactorVariableTreeNode;
144          if (!factorNode.Weight.IsAlmost(1.0)) {
145            AppendNumber(strBuilder, parameters, factorNode.Weight, formatString, numberFormat);
146            strBuilder.Append("*");
147          }
148          AppendVariableName(strBuilder, factorNode.VariableName);
149          strBuilder.Append(" = ");
150          AppendVariableName(strBuilder, factorNode.VariableValue);
151        }
152      } else if (node.SubtreeCount == 1) {
153        // only functions and single-argument subtraction (=negation) or NOT are possible here.
154        var token = GetToken(node.Symbol);
155        // the only operators that are allowed with a single argument
156        if (node.Symbol is Subtraction || node.Symbol is Not) {
157          if (RequiresParenthesis(node.Parent, node)) {
158            strBuilder.Append("(");
159          }
160          strBuilder.Append(token);
161          FormatRecursively(node.GetSubtree(0), strBuilder, numberFormat, formatString, parameters);
162          if (RequiresParenthesis(node.Parent, node)) {
163            strBuilder.Append(")");
164          }
165        } else if (IsOperator(node.Symbol)) {
166          throw new FormatException("Single-argument version of " + node.Symbol.Name + " is not supported.");
167        } else {
168          // function with only one argument
169          strBuilder.Append(token);
170          strBuilder.Append("(");
171          FormatRecursively(node.GetSubtree(0), strBuilder, numberFormat, formatString, parameters);
172          strBuilder.Append(")");
173        }
174      } else if (node.SubtreeCount > 1) {
175        var token = GetToken(node.Symbol);
176        // operators
177        if (IsOperator(node.Symbol)) {
178          var parenthesisRequired = RequiresParenthesis(node.Parent, node);
179          if (parenthesisRequired) strBuilder.Append("(");
180          FormatRecursively(node.Subtrees.First(), strBuilder, numberFormat, formatString, parameters);
181
182          foreach (var subtree in node.Subtrees.Skip(1)) {
183            strBuilder.Append(" ").Append(token).Append(" ");
184            FormatRecursively(subtree, strBuilder, numberFormat, formatString, parameters);
185          }
186
187          if (parenthesisRequired) strBuilder.Append(")");
188        } else {
189          // function with multiple arguments (AQ)
190
191          strBuilder.Append(token);
192          strBuilder.Append("(");
193
194          FormatRecursively(node.Subtrees.First(), strBuilder, numberFormat, formatString, parameters);
195          foreach (var subtree in node.Subtrees.Skip(1)) {
196            strBuilder.Append(", ");
197            FormatRecursively(subtree, strBuilder, numberFormat, formatString, parameters);
198          }
199          strBuilder.Append(")");
200        }
201      }
202    }
203
204    private static int Priority(ISymbol symbol) {
205      if (symbol is Addition || symbol is Subtraction || symbol is Or || symbol is Xor) return 1;
206      if (symbol is Division || symbol is Multiplication || symbol is And) return 2;
207      if (symbol is Power || symbol is Not) return 3;
208      throw new NotSupportedException();
209    }
210
211    private static bool RequiresParenthesis(ISymbolicExpressionTreeNode parent, ISymbolicExpressionTreeNode child) {
212      if (child.SubtreeCount > 2 && IsOperator(child.Symbol)) throw new NotSupportedException("Infix formatter does not support operators with more than two children.");
213
214      // Basically: We need a parenthesis for child if the parent symbol binds stronger than child symbol.
215      if (parent.Symbol == null || parent.Symbol is ProgramRootSymbol || parent.Symbol is StartSymbol) return false;
216      if (IsFunction(parent.Symbol)) return false;
217      if (parent.SubtreeCount == 1 && (parent.Symbol is Subtraction)) return true;
218      if (child.SubtreeCount == 0) return false;
219      var parentPrio = Priority(parent.Symbol);
220      var childPrio = Priority(child.Symbol);
221      if (parentPrio > childPrio) return true;
222      else if (parentPrio == childPrio) {
223        if (IsLeftAssocOp(child.Symbol)) return parent.GetSubtree(0) != child; // (..) required only for right child for left-assoc op
224        if (IsRightAssocOp(child.Symbol)) return parent.GetSubtree(1) != child;
225      }
226      return false;
227    }
228
229    private static bool IsFunction(ISymbol symbol) {
230      // functions are formatted in prefix form e.g. sin(...)
231      return !IsOperator(symbol) && !IsLeaf(symbol);
232    }
233
234    private static bool IsLeaf(ISymbol symbol) {
235      return symbol.MaximumArity == 0;
236    }
237
238    private static bool IsOperator(ISymbol symbol) {
239      return IsLeftAssocOp(symbol) || IsRightAssocOp(symbol);
240    }
241
242    private static bool IsAssocOp(ISymbol symbol) {
243      // (a <op> b) <op> c = a <op> (b <op> c)
244      return symbol is Addition ||
245        symbol is Multiplication ||
246        symbol is And ||
247        symbol is Or ||
248        symbol is Xor;
249    }
250
251    private static bool IsLeftAssocOp(ISymbol symbol) {
252      // a <op> b <op> c = (a <op> b) <op> c
253      return symbol is Addition ||
254        symbol is Subtraction ||
255        symbol is Multiplication ||
256        symbol is Division ||
257        symbol is And ||
258        symbol is Or ||
259        symbol is Xor;
260    }
261
262    private static bool IsRightAssocOp(ISymbol symbol) {
263      // a <op> b <op> c = a <op> (b <op> c)
264      // Negation (single-argument subtraction) is also right-assoc, but we do not have a separate symbol for negation.
265      return symbol is Not ||
266             symbol is Power; // x^y^z = x^(y^z) (as in Fortran or Mathematica)
267    }
268
269    private static void AppendNumber(StringBuilder strBuilder, List<KeyValuePair<string, double>> parameters, double value, string formatString, NumberFormatInfo numberFormat) {
270      if (parameters != null) {
271        string paramKey = $"c_{parameters.Count}";
272        strBuilder.AppendFormat(CultureInfo.InvariantCulture, "{0}", paramKey);
273        parameters.Add(new KeyValuePair<string, double>(paramKey, value));
274      } else {
275        strBuilder.Append(value.ToString(formatString, numberFormat));
276      }
277    }
278
279    private static void AppendVariableName(StringBuilder strBuilder, string name) {
280      if (name.Contains("'"))
281        strBuilder.AppendFormat("\"{0}\"", name);
282      else
283        strBuilder.AppendFormat("'{0}'", name);
284    }
285
286    private static string GetToken(ISymbol symbol) {
287      var tok = InfixExpressionParser.knownSymbols.GetBySecond(symbol).FirstOrDefault();
288      if (tok == null)
289        throw new ArgumentException(string.Format("Unknown symbol {0} found.", symbol.Name));
290      return tok;
291    }
292  }
293
294  /// <summary>
295  /// Formats mathematical expressions in infix form. E.g. x1 * (3.0 * x2 + x3)
296  /// </summary>
297  [StorableType("6FE2C83D-A594-4ABF-B101-5AEAEA6D3E3D")]
298  [Item("Infix Symbolic Expression Tree Formatter",
299    "A string formatter that converts symbolic expression trees to infix expressions.")]
300  public sealed class InfixExpressionFormatter : NamedItem, ISymbolicExpressionTreeStringFormatter {
301    [StorableConstructor]
302    private InfixExpressionFormatter(StorableConstructorFlag _) : base(_) { }
303
304    private InfixExpressionFormatter(InfixExpressionFormatter original, Cloner cloner) : base(original, cloner) { }
305
306    public InfixExpressionFormatter()
307      : base() {
308      Name = ItemName;
309      Description = ItemDescription;
310    }
311
312    public override IDeepCloneable Clone(Cloner cloner) {
313      return new InfixExpressionFormatter(this, cloner);
314    }
315
316    /// <summary>
317    /// Produces an infix expression for a given expression tree.
318    /// </summary>
319    /// <param name="symbolicExpressionTree">The tree representation of the expression.</param>
320    /// <param name="numberFormat">Number format that should be used for parameters (e.g. NumberFormatInfo.InvariantInfo (default)).</param>
321    /// <param name="formatString">The format string for parameters (e.g. \"G4\" to limit to 4 digits, default is \"G\")</param>
322    /// <returns>Infix expression</returns>
323    public string Format(ISymbolicExpressionTree symbolicExpressionTree, NumberFormatInfo numberFormat,
324                         string formatString = "G") {
325      // skip root and start symbols
326      StringBuilder strBuilder = new StringBuilder();
327      var cleanTree = (ISymbolicExpressionTree)symbolicExpressionTree.Clone();
328      BaseInfixExpressionFormatter.ConvertToBinaryLeftAssoc(cleanTree);
329      BaseInfixExpressionFormatter.FormatRecursively(cleanTree.Root.GetSubtree(0).GetSubtree(0),
330        strBuilder, numberFormat, formatString);
331      return strBuilder.ToString();
332    }
333
334    public string Format(ISymbolicExpressionTree symbolicExpressionTree) {
335      return Format(symbolicExpressionTree, NumberFormatInfo.InvariantInfo);
336    }
337  }
338
339  [StorableType("54D917E8-134E-4066-9A60-2737C12D81DC")]
340  [Item("Infix String Formatter", "Formatter for symbolic expressions, which produces an infix expression " +
341                                 "as well as a list of all coefficient values")]
342  public sealed class InfixExpressionStringFormatter : NamedItem, ISymbolicExpressionTreeStringFormatter {
343    [StorableConstructor]
344    private InfixExpressionStringFormatter(StorableConstructorFlag _) : base(_) { }
345
346    private InfixExpressionStringFormatter(InfixExpressionStringFormatter original, Cloner cloner) : base(original, cloner) { }
347
348    public InfixExpressionStringFormatter() : base() {
349      Name = ItemName;
350      Description = ItemDescription;
351    }
352
353    public override IDeepCloneable Clone(Cloner cloner) {
354      return new InfixExpressionStringFormatter(this, cloner);
355    }
356
357    public string Format(ISymbolicExpressionTree symbolicExpressionTree) {
358      StringBuilder strBuilder = new StringBuilder();
359      var parameters = new List<KeyValuePair<string, double>>();
360      var cleanTree = (ISymbolicExpressionTree)symbolicExpressionTree.Clone();
361      BaseInfixExpressionFormatter.ConvertToBinaryLeftAssoc(cleanTree);
362      BaseInfixExpressionFormatter.FormatRecursively(cleanTree.Root.GetSubtree(0).GetSubtree(0),
363        strBuilder, NumberFormatInfo.InvariantInfo, "G", parameters);
364      strBuilder.Append($"{Environment.NewLine}{Environment.NewLine}");
365
366      int maxDigits = GetDigits(parameters.Count);
367      int padding = parameters.Max(x => x.Value.ToString("F12", CultureInfo.InvariantCulture).Length);
368      foreach (var param in parameters) {
369        int digits = GetDigits(int.Parse(param.Key.Substring(2)));
370        strBuilder.Append($"{param.Key}{new string(' ', maxDigits - digits)} = " +
371                          string.Format($"{{0,{padding}:F12}}", param.Value, CultureInfo.InvariantCulture) +
372                          Environment.NewLine);
373      }
374
375      return strBuilder.ToString();
376    }
377
378    private int GetDigits(int x) {
379      if (x == 0) return 1;
380      return (int)Math.Floor(Math.Log10(x) + 1);
381    }
382  }
383}
Note: See TracBrowser for help on using the repository browser.