[15334] | 1 | using System;
|
---|
| 2 |
|
---|
[15771] | 3 | namespace HeuristicLab.Problems.ProgramSynthesis { |
---|
[15334] | 4 | public static class StringExtensions {
|
---|
| 5 | public static bool IsNumeric(this string str) {
|
---|
| 6 | int n;
|
---|
[15341] | 7 | return int.TryParse(str, out n);
|
---|
[15334] | 8 | }
|
---|
| 9 |
|
---|
| 10 | /// <summary>
|
---|
[15341] | 11 | /// https://github.com/DanHarltey/Fastenshtein
|
---|
| 12 | /// </summary>
|
---|
| 13 | public static int LevenshteinDistance(this string source, string target) {
|
---|
| 14 | if (target.Length == 0) {
|
---|
| 15 | return source.Length;
|
---|
| 16 | }
|
---|
| 17 |
|
---|
| 18 | int[] costs = new int[target.Length];
|
---|
| 19 |
|
---|
| 20 | // Add indexing for insertion to first row
|
---|
| 21 | for (int i = 0; i < costs.Length;) {
|
---|
| 22 | costs[i] = ++i;
|
---|
| 23 | }
|
---|
| 24 |
|
---|
| 25 | for (int i = 0; i < source.Length; i++) {
|
---|
| 26 | // cost of the first index
|
---|
| 27 | int cost = i;
|
---|
| 28 | int addationCost = i;
|
---|
| 29 |
|
---|
| 30 | // cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker
|
---|
| 31 | char value1Char = source[i];
|
---|
| 32 |
|
---|
| 33 | for (int j = 0; j < target.Length; j++) {
|
---|
| 34 | int insertionCost = cost;
|
---|
| 35 |
|
---|
| 36 | cost = addationCost;
|
---|
| 37 |
|
---|
| 38 | // assigning this here reduces the array reads we do, improvement of the old version
|
---|
| 39 | addationCost = costs[j];
|
---|
| 40 |
|
---|
| 41 | if (value1Char != target[j]) {
|
---|
| 42 | if (insertionCost < cost) {
|
---|
| 43 | cost = insertionCost;
|
---|
| 44 | }
|
---|
| 45 |
|
---|
| 46 | if (addationCost < cost) {
|
---|
| 47 | cost = addationCost;
|
---|
| 48 | }
|
---|
| 49 |
|
---|
| 50 | ++cost;
|
---|
| 51 | }
|
---|
| 52 |
|
---|
| 53 | costs[j] = cost;
|
---|
| 54 | }
|
---|
| 55 | }
|
---|
| 56 |
|
---|
| 57 | return costs[costs.Length - 1];
|
---|
| 58 | }
|
---|
| 59 |
|
---|
| 60 | /// <summary>
|
---|
[15334] | 61 | /// https://www.dotnetperls.com/levenshtein
|
---|
| 62 | /// </summary>
|
---|
[15341] | 63 | public static int LevenshteinDistance2(this string source, string target) {
|
---|
[15334] | 64 | if (source == null && target == null) return 0;
|
---|
| 65 | if (source == null) return target.Length;
|
---|
| 66 | if (target == null) return source.Length;
|
---|
| 67 |
|
---|
| 68 | int n = source.Length;
|
---|
| 69 | int m = target.Length;
|
---|
| 70 | int[,] d = new int[n + 1, m + 1];
|
---|
| 71 |
|
---|
| 72 | // Step 1
|
---|
| 73 | if (n == 0) {
|
---|
| 74 | return m;
|
---|
| 75 | }
|
---|
| 76 |
|
---|
| 77 | if (m == 0) {
|
---|
| 78 | return n;
|
---|
| 79 | }
|
---|
| 80 |
|
---|
| 81 | // Step 2
|
---|
| 82 | for (int i = 0; i <= n; d[i, 0] = i++) {
|
---|
| 83 | }
|
---|
| 84 |
|
---|
| 85 | for (int j = 0; j <= m; d[0, j] = j++) {
|
---|
| 86 | }
|
---|
| 87 |
|
---|
| 88 | // Step 3
|
---|
| 89 | for (int i = 1; i <= n; i++) {
|
---|
| 90 | //Step 4
|
---|
| 91 | for (int j = 1; j <= m; j++) {
|
---|
| 92 | // Step 5
|
---|
| 93 | int cost = (target[j - 1] == source[i - 1]) ? 0 : 1;
|
---|
| 94 |
|
---|
| 95 | // Step 6
|
---|
| 96 | d[i, j] = Math.Min(
|
---|
| 97 | Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1),
|
---|
| 98 | d[i - 1, j - 1] + cost);
|
---|
| 99 | }
|
---|
| 100 | }
|
---|
| 101 | // Step 7
|
---|
| 102 | return d[n, m];
|
---|
| 103 | }
|
---|
| 104 | }
|
---|
| 105 | }
|
---|