1 | using System;
|
---|
2 |
|
---|
3 | namespace HeuristicLab.Problems.ProgramSynthesis.Base.Extensions {
|
---|
4 | public static class StringExtensions {
|
---|
5 | public static bool IsNumeric(this string str) {
|
---|
6 | int n;
|
---|
7 | return int.TryParse(str, out n);
|
---|
8 | }
|
---|
9 |
|
---|
10 | /// <summary>
|
---|
11 | /// https://github.com/DanHarltey/Fastenshtein
|
---|
12 | /// </summary>
|
---|
13 | public static int LevenshteinDistance(this string source, string target) {
|
---|
14 | if (target.Length == 0) {
|
---|
15 | return source.Length;
|
---|
16 | }
|
---|
17 |
|
---|
18 | int[] costs = new int[target.Length];
|
---|
19 |
|
---|
20 | // Add indexing for insertion to first row
|
---|
21 | for (int i = 0; i < costs.Length;) {
|
---|
22 | costs[i] = ++i;
|
---|
23 | }
|
---|
24 |
|
---|
25 | for (int i = 0; i < source.Length; i++) {
|
---|
26 | // cost of the first index
|
---|
27 | int cost = i;
|
---|
28 | int addationCost = i;
|
---|
29 |
|
---|
30 | // cache value for inner loop to avoid index lookup and bonds checking, profiled this is quicker
|
---|
31 | char value1Char = source[i];
|
---|
32 |
|
---|
33 | for (int j = 0; j < target.Length; j++) {
|
---|
34 | int insertionCost = cost;
|
---|
35 |
|
---|
36 | cost = addationCost;
|
---|
37 |
|
---|
38 | // assigning this here reduces the array reads we do, improvement of the old version
|
---|
39 | addationCost = costs[j];
|
---|
40 |
|
---|
41 | if (value1Char != target[j]) {
|
---|
42 | if (insertionCost < cost) {
|
---|
43 | cost = insertionCost;
|
---|
44 | }
|
---|
45 |
|
---|
46 | if (addationCost < cost) {
|
---|
47 | cost = addationCost;
|
---|
48 | }
|
---|
49 |
|
---|
50 | ++cost;
|
---|
51 | }
|
---|
52 |
|
---|
53 | costs[j] = cost;
|
---|
54 | }
|
---|
55 | }
|
---|
56 |
|
---|
57 | return costs[costs.Length - 1];
|
---|
58 | }
|
---|
59 |
|
---|
60 | /// <summary>
|
---|
61 | /// https://www.dotnetperls.com/levenshtein
|
---|
62 | /// </summary>
|
---|
63 | public static int LevenshteinDistance2(this string source, string target) {
|
---|
64 | if (source == null && target == null) return 0;
|
---|
65 | if (source == null) return target.Length;
|
---|
66 | if (target == null) return source.Length;
|
---|
67 |
|
---|
68 | int n = source.Length;
|
---|
69 | int m = target.Length;
|
---|
70 | int[,] d = new int[n + 1, m + 1];
|
---|
71 |
|
---|
72 | // Step 1
|
---|
73 | if (n == 0) {
|
---|
74 | return m;
|
---|
75 | }
|
---|
76 |
|
---|
77 | if (m == 0) {
|
---|
78 | return n;
|
---|
79 | }
|
---|
80 |
|
---|
81 | // Step 2
|
---|
82 | for (int i = 0; i <= n; d[i, 0] = i++) {
|
---|
83 | }
|
---|
84 |
|
---|
85 | for (int j = 0; j <= m; d[0, j] = j++) {
|
---|
86 | }
|
---|
87 |
|
---|
88 | // Step 3
|
---|
89 | for (int i = 1; i <= n; i++) {
|
---|
90 | //Step 4
|
---|
91 | for (int j = 1; j <= m; j++) {
|
---|
92 | // Step 5
|
---|
93 | int cost = (target[j - 1] == source[i - 1]) ? 0 : 1;
|
---|
94 |
|
---|
95 | // Step 6
|
---|
96 | d[i, j] = Math.Min(
|
---|
97 | Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1),
|
---|
98 | d[i - 1, j - 1] + cost);
|
---|
99 | }
|
---|
100 | }
|
---|
101 | // Step 7
|
---|
102 | return d[n, m];
|
---|
103 | }
|
---|
104 | }
|
---|
105 | }
|
---|