Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/27/22 11:02:45 (3 years ago)
Author:
gkronber
Message:

#3145: refactored infix formatter to improve output (less parenthesis) and added unit tests to test that the infix formatter works correclty.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Formatters/InfixExpressionFormatter.cs

    r18203 r18211  
    3232namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
    3333  public static class BaseInfixExpressionFormatter {
     34    /// <summary>
     35    ///  Performs some basic re-writing steps to simplify the code for formatting. Tree is changed.
     36    ///  Removes single-argument +, * which have no effect
     37    ///  Replaces variables with coefficients by an explicitly multiplication
     38    ///  Replaces single-argument / with 1 / (..)
     39    ///  Replaces multi-argument +, *, /, - with nested binary operations
     40    ///  Rotates operations to remove necessity to group sub-expressions
     41    /// </summary>
     42    /// <param name="tree">The tree is changed</param>
     43    public static void ConvertToBinaryLeftAssoc(ISymbolicExpressionTree tree) {
     44      ConvertToBinaryLeftAssocRec(tree.Root.GetSubtree(0), tree.Root.GetSubtree(0).GetSubtree(0));
     45    }
     46
     47    private static void ConvertToBinaryLeftAssocRec(ISymbolicExpressionTreeNode parent, ISymbolicExpressionTreeNode n) {
     48      // recurse post-order
     49      foreach (var subtree in n.Subtrees.ToArray()) ConvertToBinaryLeftAssocRec(n, subtree); // ToArray required as n.Subtrees is changed in method
     50
     51      if (n is VariableTreeNode varTreeNode && varTreeNode.Weight != 1.0) {
     52        var mul = new Multiplication().CreateTreeNode();
     53        var num = (NumberTreeNode)new Number().CreateTreeNode();
     54        num.Value = varTreeNode.Weight;
     55        varTreeNode.Weight = 1.0;
     56        parent.ReplaceSubtree(n, mul);
     57        mul.AddSubtree(num);
     58        mul.AddSubtree(varTreeNode);
     59      } else if (n.SubtreeCount == 1 && (n.Symbol is Addition || n.Symbol is Multiplication || n.Symbol is And || n.Symbol is Or || n.Symbol is Xor)) {
     60        // single-argument addition or multiplication has no effect -> remove
     61        parent.ReplaceSubtree(n, n.GetSubtree(0));
     62      } else if (n.SubtreeCount == 1 && (n.Symbol is Division)) {
     63        // single-argument division is 1/f(x)
     64        n.InsertSubtree(0, new Constant() { Value = 1.0 }.CreateTreeNode());
     65      } else if (n.SubtreeCount > 2 && IsLeftAssocOp(n.Symbol)) {
     66        // multi-argument +, -, *, / are the same as multiple binary operations (left-associative)
     67        var sy = n.Symbol;
     68
     69        var additionalTrees = n.Subtrees.Skip(2).ToArray();
     70        while (n.SubtreeCount > 2) n.RemoveSubtree(2); // keep only the first two arguments
     71
     72        var childIdx = parent.IndexOfSubtree(n);
     73        parent.RemoveSubtree(childIdx);
     74        var newChild = n;
     75        // build a tree bottom to top, each time adding a subtree on the right
     76        for (int i = 0; i < additionalTrees.Length; i++) {
     77          var newOp = sy.CreateTreeNode();
     78          newOp.AddSubtree(newChild);
     79          newOp.AddSubtree(additionalTrees[i]);
     80          newChild = newOp;
     81        }
     82        parent.InsertSubtree(childIdx, newChild);
     83      } else if (n.SubtreeCount == 2 && n.GetSubtree(1).SubtreeCount == 2 &&
     84                 IsAssocOp(n.Symbol) && IsOperator(n.GetSubtree(1).Symbol) &&
     85                 Priority(n.Symbol) == Priority(n.GetSubtree(1).Symbol)) {
     86        // f(x) <op> (g(x) <op> h(x))) is the same as  (f(x) <op> g(x)) <op> h(x) for associative <op>
     87        // which is the same as f(x) <op> g(x) <op> h(x) for left-associative <op>
     88        // The latter version is preferred because we do not need to write the parentheses.
     89        // rotation:
     90        //     (op1)              (op2)
     91        //     /   \              /  \
     92        //    a    (op2)       (op1)  c
     93        //         /  \        /  \
     94        //        b    c      a    b     
     95        var op1 = n;
     96        var op2 = n.GetSubtree(1);
     97        var b = op2.GetSubtree(0); op2.RemoveSubtree(0);
     98        op1.ReplaceSubtree(op2, b);
     99        parent.ReplaceSubtree(op1, op2);
     100        op2.InsertSubtree(0, op1);
     101      }
     102    }
    34103    public static void FormatRecursively(ISymbolicExpressionTreeNode node, StringBuilder strBuilder,
    35104                                          NumberFormatInfo numberFormat, string formatString, List<KeyValuePair<string, double>> parameters = null) {
    36       if (node.SubtreeCount > 1) {
    37         var token = GetToken(node.Symbol);
    38         // operators
    39         if (token == "+" || token == "-" || token == "OR" || token == "XOR") {
    40           var parenthesisRequired = false;
    41           if (node.Parent != null && IsOperator(node.Parent.Symbol)) {
    42             var parentOp = GetToken(node.Parent.Symbol);
    43             if (parentOp != "+" && parentOp != "-" && parentOp != "OR" && parentOp != "XOR")
    44               parenthesisRequired = true;
    45           }
     105      // This method assumes that the tree has been converted to binary and left-assoc form (see ConvertToBinaryLeftAssocRec).
     106      // An exception is thrown if the tree has a different shape.
     107
     108      if (node.SubtreeCount == 0) {
     109        // no subtrees
     110        if (node.Symbol is Variable) {
     111          var varNode = node as VariableTreeNode;
     112          if (varNode.Weight != 1.0) { throw new NotSupportedException("Infix formatter does not support variables with coefficients."); }
     113          AppendVariableName(strBuilder, varNode.VariableName);
     114        } else if (node is INumericTreeNode numNode) {
     115          var parenthesisRequired = RequiresParenthesis(node.Parent, node);
    46116          if (parenthesisRequired) strBuilder.Append("(");
    47           FormatRecursively(node.Subtrees.First(), strBuilder, numberFormat, formatString, parameters);
    48 
    49           foreach (var subtree in node.Subtrees.Skip(1)) {
    50             strBuilder.Append(" ").Append(token).Append(" ");
    51             FormatRecursively(subtree, strBuilder, numberFormat, formatString, parameters);
    52           }
    53 
     117
     118          AppendNumber(strBuilder, parameters, numNode.Value, formatString, numberFormat);
    54119          if (parenthesisRequired) strBuilder.Append(")");
    55         } else if (token == "*" || token == "/" || token == "AND") {
    56           FormatRecursively(node.Subtrees.First(), strBuilder, numberFormat, formatString, parameters);
    57 
    58           foreach (var subtree in node.Subtrees.Skip(1)) {
    59             strBuilder.Append(" ").Append(token).Append(" ");
    60             // a / b * c => a / (b * C)
    61             if (subtree.SubtreeCount > 1 && token == "/" && GetToken(subtree.Symbol) == "*") {
    62               strBuilder.Append("(");
    63               FormatRecursively(subtree, strBuilder, numberFormat, formatString, parameters);
    64               strBuilder.Append(")");
    65             } else {
    66               FormatRecursively(subtree, strBuilder, numberFormat, formatString, parameters);
    67             }
    68           }
    69         } else if (token == "^") {
    70           // handle integer powers directly
    71           FormatRecursively(node.Subtrees.First(), strBuilder, numberFormat, formatString, parameters);
    72 
    73           var power = node.GetSubtree(1);
    74           if (power is INumericTreeNode numNode && Math.Truncate(numNode.Value) == numNode.Value) {
    75             strBuilder.Append(" ").Append(token).Append(" ").Append(numNode.Value.ToString(formatString, numberFormat));
    76           } else {
    77             strBuilder.Append(" ").Append(token).Append(" ");
    78             FormatRecursively(power, strBuilder, numberFormat, formatString, parameters);
    79           }
    80         } else {
    81           // function with multiple arguments
    82           strBuilder.Append(token).Append("(");
    83           FormatRecursively(node.Subtrees.First(), strBuilder, numberFormat, formatString, parameters);
    84           foreach (var subtree in node.Subtrees.Skip(1)) {
    85             strBuilder.Append(", ");
    86             FormatRecursively(subtree, strBuilder, numberFormat, formatString, parameters);
    87           }
    88 
    89           strBuilder.Append(")");
    90         }
    91       } else if (node.SubtreeCount == 1) {
    92         var token = GetToken(node.Symbol);
    93         if (token == "-" || token == "NOT") {
    94           strBuilder.Append(token);
    95           FormatRecursively(node.GetSubtree(0), strBuilder, numberFormat, formatString, parameters);
    96         } else if (token == "/") {
    97           strBuilder.Append("1/");
    98           FormatRecursively(node.GetSubtree(0), strBuilder, numberFormat, formatString, parameters);
    99         } else if (token == "+" || token == "*") {
    100           FormatRecursively(node.GetSubtree(0), strBuilder, numberFormat, formatString, parameters);
    101         } else {
    102           // function with only one argument
    103           strBuilder.Append(token).Append("(");
    104           FormatRecursively(node.GetSubtree(0), strBuilder, numberFormat, formatString, parameters);
    105           strBuilder.Append(")");
    106         }
    107       } else {
    108         // no subtrees
    109         if (node.Symbol is LaggedVariable) {
     120        } else if (node.Symbol is LaggedVariable) {
    110121          var varNode = node as LaggedVariableTreeNode;
    111122          if (!varNode.Weight.IsAlmost(1.0)) {
     
    119130                    .AppendFormat(numberFormat, "{0}", varNode.Lag)
    120131                    .Append(")");
    121         } else if (node.Symbol is Variable) {
    122           var varNode = node as VariableTreeNode;
    123           if (!varNode.Weight.IsAlmost(1.0)) {
    124             AppendNumber(strBuilder, parameters, varNode.Weight, formatString, numberFormat);
    125             strBuilder.Append("*");
    126           }
    127           AppendVariableName(strBuilder, varNode.VariableName);
    128132        } else if (node.Symbol is FactorVariable) {
    129133          var factorNode = node as FactorVariableTreeNode;
     
    145149          strBuilder.Append(" = ");
    146150          AppendVariableName(strBuilder, factorNode.VariableValue);
    147         } else if (node is INumericTreeNode numNode) {
    148           if (parameters == null && numNode.Value < 0) {
    149             // negative value
    150             strBuilder.Append(numNode.Value.ToString(formatString, numberFormat));
    151           } else {
    152             AppendNumber(strBuilder, parameters, numNode.Value, formatString, numberFormat);
    153           }
    154151        }
    155       }
    156     }
    157 
    158     private static bool IsOperator(ISymbol sy) {
    159       return sy is Addition ||
    160         sy is Subtraction ||
    161         sy is Multiplication ||
    162         sy is Division ||
    163         sy is And ||
    164         sy is Or ||
    165         sy is Xor;
     152      } else if (node.SubtreeCount == 1) {
     153        // only functions and single-argument subtraction (=negation) or NOT are possible here.
     154        var token = GetToken(node.Symbol);
     155        // the only operators that are allowed with a single argument
     156        if (node.Symbol is Subtraction || node.Symbol is Not) {
     157          if (RequiresParenthesis(node.Parent, node)) {
     158            strBuilder.Append("(");
     159          }
     160          strBuilder.Append(token);
     161          FormatRecursively(node.GetSubtree(0), strBuilder, numberFormat, formatString, parameters);
     162          if (RequiresParenthesis(node.Parent, node)) {
     163            strBuilder.Append(")");
     164          }
     165        } else if (IsOperator(node.Symbol)) {
     166          throw new FormatException("Single-argument version of " + node.Symbol.Name + " is not supported.");
     167        } else {
     168          // function with only one argument
     169          strBuilder.Append(token);
     170          strBuilder.Append("(");
     171          FormatRecursively(node.GetSubtree(0), strBuilder, numberFormat, formatString, parameters);
     172          strBuilder.Append(")");
     173        }
     174      } else if (node.SubtreeCount > 1) {
     175        var token = GetToken(node.Symbol);
     176        // operators
     177        if (IsOperator(node.Symbol)) {
     178          var parenthesisRequired = RequiresParenthesis(node.Parent, node);
     179          if (parenthesisRequired) strBuilder.Append("(");
     180          FormatRecursively(node.Subtrees.First(), strBuilder, numberFormat, formatString, parameters);
     181
     182          foreach (var subtree in node.Subtrees.Skip(1)) {
     183            strBuilder.Append(" ").Append(token).Append(" ");
     184            FormatRecursively(subtree, strBuilder, numberFormat, formatString, parameters);
     185          }
     186
     187          if (parenthesisRequired) strBuilder.Append(")");
     188        } else {
     189          // function with multiple arguments (AQ)
     190
     191          strBuilder.Append(token);
     192          strBuilder.Append("(");
     193
     194          FormatRecursively(node.Subtrees.First(), strBuilder, numberFormat, formatString, parameters);
     195          foreach (var subtree in node.Subtrees.Skip(1)) {
     196            strBuilder.Append(", ");
     197            FormatRecursively(subtree, strBuilder, numberFormat, formatString, parameters);
     198          }
     199          strBuilder.Append(")");
     200        }
     201      }
     202    }
     203
     204    private static int Priority(ISymbol symbol) {
     205      if (symbol is Addition || symbol is Subtraction || symbol is Or || symbol is Xor) return 1;
     206      if (symbol is Division || symbol is Multiplication || symbol is And) return 2;
     207      if (symbol is Power || symbol is Not) return 3;
     208      throw new NotSupportedException();
     209    }
     210
     211    private static bool RequiresParenthesis(ISymbolicExpressionTreeNode parent, ISymbolicExpressionTreeNode child) {
     212      if (child.SubtreeCount > 2 && IsOperator(child.Symbol)) throw new NotSupportedException("Infix formatter does not support operators with more than two children.");
     213
     214      // Basically: We need a parenthesis for child if the parent symbol binds stronger than child symbol.
     215      if (parent.Symbol == null || parent.Symbol is ProgramRootSymbol || parent.Symbol is StartSymbol) return false;
     216      if (IsFunction(parent.Symbol)) return false;
     217      if (parent.SubtreeCount == 1 && (parent.Symbol is Subtraction)) return true;
     218      if (child.SubtreeCount == 0) return false;
     219      var parentPrio = Priority(parent.Symbol);
     220      var childPrio = Priority(child.Symbol);
     221      if (parentPrio > childPrio) return true;
     222      else if (parentPrio == childPrio) {
     223        if (IsLeftAssocOp(child.Symbol)) return parent.GetSubtree(0) != child; // (..) required only for right child for left-assoc op
     224        if (IsRightAssocOp(child.Symbol)) return parent.GetSubtree(1) != child;
     225      }
     226      return false;
     227    }
     228
     229    private static bool IsFunction(ISymbol symbol) {
     230      // functions are formatted in prefix form e.g. sin(...)
     231      return !IsOperator(symbol) && !IsLeaf(symbol);
     232    }
     233
     234    private static bool IsLeaf(ISymbol symbol) {
     235      return symbol.MaximumArity == 0;
     236    }
     237
     238    private static bool IsOperator(ISymbol symbol) {
     239      return IsLeftAssocOp(symbol) || IsRightAssocOp(symbol);
     240    }
     241
     242    private static bool IsAssocOp(ISymbol symbol) {
     243      // (a <op> b) <op> c = a <op> (b <op> c)
     244      return symbol is Addition ||
     245        symbol is Multiplication ||
     246        symbol is And ||
     247        symbol is Or ||
     248        symbol is Xor;
     249    }
     250
     251    private static bool IsLeftAssocOp(ISymbol symbol) {
     252      // a <op> b <op> c = (a <op> b) <op> c
     253      return symbol is Addition ||
     254        symbol is Subtraction ||
     255        symbol is Multiplication ||
     256        symbol is Division ||
     257        symbol is And ||
     258        symbol is Or ||
     259        symbol is Xor;
     260    }
     261
     262    private static bool IsRightAssocOp(ISymbol symbol) {
     263      // a <op> b <op> c = a <op> (b <op> c)
     264      // Negation (single-argument subtraction) is also right-assoc, but we do not have a separate symbol for negation.
     265      return symbol is Not ||
     266             symbol is Power; // x^y^z = x^(y^z) (as in Fortran or Mathematica)
    166267    }
    167268
     
    224325      // skip root and start symbols
    225326      StringBuilder strBuilder = new StringBuilder();
    226       BaseInfixExpressionFormatter.FormatRecursively(symbolicExpressionTree.Root.GetSubtree(0).GetSubtree(0),
     327      var cleanTree = (ISymbolicExpressionTree)symbolicExpressionTree.Clone();
     328      BaseInfixExpressionFormatter.ConvertToBinaryLeftAssoc(cleanTree);
     329      BaseInfixExpressionFormatter.FormatRecursively(cleanTree.Root.GetSubtree(0).GetSubtree(0),
    227330        strBuilder, numberFormat, formatString);
    228331      return strBuilder.ToString();
     
    235338
    236339  [StorableType("54D917E8-134E-4066-9A60-2737C12D81DC")]
    237   [Item("Infix String Formater", "Formatter for symbolic expressions, which produces an infix expression " +
     340  [Item("Infix String Formatter", "Formatter for symbolic expressions, which produces an infix expression " +
    238341                                 "as well as a list of all coefficient values")]
    239342  public sealed class InfixExpressionStringFormatter : NamedItem, ISymbolicExpressionTreeStringFormatter {
     
    255358      StringBuilder strBuilder = new StringBuilder();
    256359      var parameters = new List<KeyValuePair<string, double>>();
    257       BaseInfixExpressionFormatter.FormatRecursively(symbolicExpressionTree.Root.GetSubtree(0).GetSubtree(0),
     360      var cleanTree = (ISymbolicExpressionTree)symbolicExpressionTree.Clone();
     361      BaseInfixExpressionFormatter.ConvertToBinaryLeftAssoc(cleanTree);
     362      BaseInfixExpressionFormatter.FormatRecursively(cleanTree.Root.GetSubtree(0).GetSubtree(0),
    258363        strBuilder, NumberFormatInfo.InvariantInfo, "G", parameters);
    259364      strBuilder.Append($"{Environment.NewLine}{Environment.NewLine}");
Note: See TracChangeset for help on using the changeset viewer.