Changeset 14720


Ignore:
Timestamp:
03/06/17 17:15:11 (4 years ago)
Author:
gkronber
Message:

#2650: changed translation of variable names to C# identifiers

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Formatters/SymbolicDataAnalysisExpressionCSharpFormatter.cs

    r14502 r14720  
    2525using System.Linq;
    2626using System.Text;
     27using System.Text.RegularExpressions;
    2728using HeuristicLab.Common;
    2829using HeuristicLab.Core;
     
    5657
    5758    private string VariableName2Identifier(string name) {
    58       return "_" + string.Join("_", Encoding.UTF8.GetBytes(name));
     59      // tries to convert a variable name to a valid C# identifier.
     60      // the following code would work for all possible variable names
     61      // return "_" + string.Join("_", Encoding.UTF8.GetBytes(name));
     62
     63      /*
     64       * identifier-start-character:
     65       *    letter-character
     66       *    _ (the underscore character U+005F)
     67       *  identifier-part-characters:
     68       *    identifier-part-character
     69       *    identifier-part-characters   identifier-part-character
     70       *  identifier-part-character:
     71       *    letter-character
     72       *    decimal-digit-character
     73       *    connecting-character
     74       *    combining-character
     75       *    formatting-character
     76       *  letter-character:
     77       *    A Unicode character of classes Lu, Ll, Lt, Lm, Lo, or Nl
     78       *    A unicode-escape-sequence representing a character of classes Lu, Ll, Lt, Lm, Lo, or Nl
     79       *  combining-character:
     80       *    A Unicode character of classes Mn or Mc
     81       *    A unicode-escape-sequence representing a character of classes Mn or Mc
     82       *  decimal-digit-character:
     83       *    A Unicode character of the class Nd
     84       *    A unicode-escape-sequence representing a character of the class Nd
     85       *  connecting-character:
     86       *    A Unicode character of the class Pc
     87       *    A unicode-escape-sequence representing a character of the class Pc
     88       *  formatting-character:
     89       *    A Unicode character of the class Cf
     90       *    A unicode-escape-sequence representing a character of the class Cf
     91       */
     92
     93      var invalidIdentifierStarts = new Regex(@"[^_\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]");
     94      var invalidIdentifierParts = new Regex(@"[^\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}\p{Mn}\p{Mc}\p{Nd}\p{Pc}\p{Cf}]");
     95      return "@" +
     96        (invalidIdentifierStarts.IsMatch(name.Substring(0, 1)) ? "_" : "") + // prepend '_' if necessary
     97        invalidIdentifierParts.Replace(name, "_");
    5998    }
    6099
    61100    private void FormatRecursively(ISymbolicExpressionTreeNode node, StringBuilder strBuilder) {
    62101      // TODO: adapt to interpreter semantics. The HL interpreter also allows Boolean operations on reals
    63       if (node.Subtrees.Any()) {
    64         if (node.Symbol is Addition) {
     102      if(node.Subtrees.Any()) {
     103        if(node.Symbol is Addition) {
    65104          FormatOperator(node, "+", strBuilder);
    66         } else if (node.Symbol is And) {
     105        } else if(node.Symbol is And) {
    67106          FormatOperator(node, "&&", strBuilder);
    68         } else if (node.Symbol is Average) {
     107        } else if(node.Symbol is Average) {
    69108          FormatFunction(node, "Average", strBuilder);
    70         } else if (node.Symbol is Cosine) {
     109        } else if(node.Symbol is Cosine) {
    71110          FormatFunction(node, "Math.Cos", strBuilder);
    72         } else if (node.Symbol is Division) {
     111        } else if(node.Symbol is Division) {
    73112          FormatDivision(node, strBuilder);
    74         } else if (node.Symbol is Exponential) {
     113        } else if(node.Symbol is Exponential) {
    75114          FormatFunction(node, "Math.Exp", strBuilder);
    76         } else if (node.Symbol is GreaterThan) {
     115        } else if(node.Symbol is GreaterThan) {
    77116          FormatOperator(node, ">", strBuilder);
    78         } else if (node.Symbol is IfThenElse) {
     117        } else if(node.Symbol is IfThenElse) {
    79118          FormatFunction(node, "EvaluateIf", strBuilder);
    80         } else if (node.Symbol is LessThan) {
     119        } else if(node.Symbol is LessThan) {
    81120          FormatOperator(node, "<", strBuilder);
    82         } else if (node.Symbol is Logarithm) {
     121        } else if(node.Symbol is Logarithm) {
    83122          FormatFunction(node, "Math.Log", strBuilder);
    84         } else if (node.Symbol is Multiplication) {
     123        } else if(node.Symbol is Multiplication) {
    85124          FormatOperator(node, "*", strBuilder);
    86         } else if (node.Symbol is Not) {
     125        } else if(node.Symbol is Not) {
    87126          FormatOperator(node, "!", strBuilder);
    88         } else if (node.Symbol is Or) {
     127        } else if(node.Symbol is Or) {
    89128          FormatOperator(node, "||", strBuilder);
    90         } else if (node.Symbol is Xor) {
     129        } else if(node.Symbol is Xor) {
    91130          FormatOperator(node, "^", strBuilder);
    92         } else if (node.Symbol is Sine) {
     131        } else if(node.Symbol is Sine) {
    93132          FormatFunction(node, "Math.Sin", strBuilder);
    94         } else if (node.Symbol is Subtraction) {
     133        } else if(node.Symbol is Subtraction) {
    95134          FormatSubtraction(node, strBuilder);
    96         } else if (node.Symbol is Tangent) {
     135        } else if(node.Symbol is Tangent) {
    97136          FormatFunction(node, "Math.Tan", strBuilder);
    98         } else if (node.Symbol is Square) {
     137        } else if(node.Symbol is Square) {
    99138          FormatSquare(node, strBuilder);
    100         } else if (node.Symbol is SquareRoot) {
     139        } else if(node.Symbol is SquareRoot) {
    101140          FormatFunction(node, "Math.Sqrt", strBuilder);
    102         } else if (node.Symbol is Power) {
     141        } else if(node.Symbol is Power) {
    103142          FormatFunction(node, "Math.Pow", strBuilder);
    104         } else if (node.Symbol is Root) {
     143        } else if(node.Symbol is Root) {
    105144          FormatRoot(node, strBuilder);
    106145        } else {
     
    108147        }
    109148      } else {
    110         if (node is VariableTreeNode) {
     149        if(node is VariableTreeNode) {
    111150          var varNode = node as VariableTreeNode;
    112151          strBuilder.AppendFormat("{0} * {1}", VariableName2Identifier(varNode.VariableName), varNode.Weight.ToString("g17", CultureInfo.InvariantCulture));
    113         } else if (node is ConstantTreeNode) {
     152        } else if(node is ConstantTreeNode) {
    114153          var constNode = node as ConstantTreeNode;
    115154          strBuilder.Append(constNode.Value.ToString("g17", CultureInfo.InvariantCulture));
    116         } else if (node.Symbol is FactorVariable) {
     155        } else if(node.Symbol is FactorVariable) {
    117156          var factorNode = node as FactorVariableTreeNode;
    118157          FormatFactor(factorNode, strBuilder);
    119         } else if (node.Symbol is BinaryFactorVariable) {
     158        } else if(node.Symbol is BinaryFactorVariable) {
    120159          var binFactorNode = node as BinaryFactorVariableTreeNode;
    121160          FormatBinaryFactor(binFactorNode, strBuilder);
     
    150189
    151190    private void FormatDivision(ISymbolicExpressionTreeNode node, StringBuilder strBuilder) {
    152       if (node.SubtreeCount == 1) {
     191      if(node.SubtreeCount == 1) {
    153192        strBuilder.Append("1.0 / ");
    154193        FormatRecursively(node.GetSubtree(0), strBuilder);
     
    156195        FormatRecursively(node.GetSubtree(0), strBuilder);
    157196        strBuilder.Append("/ (");
    158         for (int i = 1; i < node.SubtreeCount; i++) {
    159           if (i > 1) strBuilder.Append(" * ");
     197        for(int i = 1; i < node.SubtreeCount; i++) {
     198          if(i > 1) strBuilder.Append(" * ");
    160199          FormatRecursively(node.GetSubtree(i), strBuilder);
    161200        }
     
    165204
    166205    private void FormatSubtraction(ISymbolicExpressionTreeNode node, StringBuilder strBuilder) {
    167       if (node.SubtreeCount == 1) {
     206      if(node.SubtreeCount == 1) {
    168207        strBuilder.Append("-");
    169208        FormatRecursively(node.GetSubtree(0), strBuilder);
     
    176215    private void FormatOperator(ISymbolicExpressionTreeNode node, string symbol, StringBuilder strBuilder) {
    177216      strBuilder.Append("(");
    178       foreach (var child in node.Subtrees) {
     217      foreach(var child in node.Subtrees) {
    179218        FormatRecursively(child, strBuilder);
    180         if (child != node.Subtrees.Last())
     219        if(child != node.Subtrees.Last())
    181220          strBuilder.Append(" " + symbol + " ");
    182221      }
     
    186225    private void FormatFunction(ISymbolicExpressionTreeNode node, string function, StringBuilder strBuilder) {
    187226      strBuilder.Append(function + "(");
    188       foreach (var child in node.Subtrees) {
     227      foreach(var child in node.Subtrees) {
    189228        FormatRecursively(child, strBuilder);
    190         if (child != node.Subtrees.Last())
     229        if(child != node.Subtrees.Last())
    191230          strBuilder.Append(", ");
    192231      }
     
    207246      // here we don't have access to problemData to determine the type for each variable (double/string) therefore we must distinguish based on the symbol type
    208247      HashSet<string> doubleVarNames = new HashSet<string>();
    209       foreach (var node in symbolicExpressionTree.IterateNodesPostfix().Where(x => x is VariableTreeNode || x is VariableConditionTreeNode)) {
     248      foreach(var node in symbolicExpressionTree.IterateNodesPostfix().Where(x => x is VariableTreeNode || x is VariableConditionTreeNode)) {
    210249        doubleVarNames.Add(((IVariableTreeNode)node).VariableName);
    211250      }
    212251
    213252      HashSet<string> stringVarNames = new HashSet<string>();
    214       foreach (var node in symbolicExpressionTree.IterateNodesPostfix().Where(x => x is BinaryFactorVariableTreeNode || x is FactorVariableTreeNode)) {
     253      foreach(var node in symbolicExpressionTree.IterateNodesPostfix().Where(x => x is BinaryFactorVariableTreeNode || x is FactorVariableTreeNode)) {
    215254        stringVarNames.Add(((IVariableTreeNode)node).VariableName);
    216255      }
     
    219258      strBuilder.Append(string.Join(", ", orderedNames));
    220259
    221       if (stringVarNames.Any() && doubleVarNames.Any())
     260      if(stringVarNames.Any() && doubleVarNames.Any())
    222261        strBuilder.AppendLine(",");
    223262      orderedNames = doubleVarNames.OrderBy(n => n, new NaturalStringComparer()).Select(n => "double " + VariableName2Identifier(n) + " /* " + n + " */");
Note: See TracChangeset for help on using the changeset viewer.