javascript - Damerau-Levenshtein distance Implementation -
i'm trying create damerau-levenshtein distance function in js.
i've found description off algorithm on wikipedia, no implementation off it. says:
to devise proper algorithm calculate unrestricted damerau–levenshtein distance note there exists optimal sequence of edit operations, once-transposed letters never modified afterwards. thus, need consider 2 symmetric ways of modifying substring more once: (1) transpose letters , insert arbitrary number of characters between them, or (2) delete sequence of characters , transpose letters become adjacent after deletion. straightforward implementation of idea gives algorithm of cubic complexity: o\left (m \cdot n \cdot \max(m, n) \right ), m , n string lengths. using ideas of lowrance , wagner,[7] naive algorithm can improved o\left (m \cdot n \right) in worst case. interesting bitap algorithm can modified process transposition. see information retrieval section of[1] example of such adaptation.
https://en.wikipedia.org/wiki/damerau%e2%80%93levenshtein_distance
the section [1] points http://acl.ldc.upenn.edu/p/p00/p00-1037.pdf more complicated me.
if understood correctly, it's not easy create implementation off it.
here's levenshtein implementation use :
levenshtein=function (s1, s2) { // discuss at: http://phpjs.org/functions/levenshtein/ // original by: carlos r. l. rodrigues (http://www.jsfromhell.com) // bugfixed by: onno marsman // revised by: andrea giammarchi (http://webreflection.blogspot.com) // reimplemented by: brett zamir (http://brett-zamir.me) // reimplemented by: alexander m beedie // example 1: levenshtein('kevin van zonneveld', 'kevin van sommeveld'); // returns 1: 3 if (s1 == s2) { return 0; } var s1_len = s1.length; var s2_len = s2.length; if (s1_len === 0) { return s2_len; } if (s2_len === 0) { return s1_len; } // begin static var split = false; try { split = !('0')[0]; } catch (e) { // earlier ie may not support access string index split = true; } // end static if (split) { s1 = s1.split(''); s2 = s2.split(''); } var v0 = new array(s1_len + 1); var v1 = new array(s1_len + 1); var s1_idx = 0, s2_idx = 0, cost = 0; (s1_idx = 0; s1_idx < s1_len + 1; s1_idx++) { v0[s1_idx] = s1_idx; } var char_s1 = '', char_s2 = ''; (s2_idx = 1; s2_idx <= s2_len; s2_idx++) { v1[0] = s2_idx; char_s2 = s2[s2_idx - 1]; (s1_idx = 0; s1_idx < s1_len; s1_idx++) { char_s1 = s1[s1_idx]; cost = (char_s1 == char_s2) ? 0 : 1; var m_min = v0[s1_idx + 1] + 1; var b = v1[s1_idx] + 1; var c = v0[s1_idx] + cost; if (b < m_min) { m_min = b; } if (c < m_min) { m_min = c; } v1[s1_idx + 1] = m_min; } var v_tmp = v0; v0 = v1; v1 = v_tmp; } return v0[s1_len]; } what ideas building such algorithm and, if think complicated, make no difference between 'l' (l lowercase) , 'i' (i uppercase) example.
the gist @doukremt gave: https://gist.github.com/doukremt/9473228
gives following in javascript.
you can change weights of operations in weighter object.
var levenshteinweighted= function(seq1,seq2) { var len1=seq1.length; var len2=seq2.length; var i, j; var dist; var ic, dc, rc; var last, old, column; var weighter={ insert:function(c) { return 1.; }, delete:function(c) { return 0.5; }, replace:function(c, d) { return 0.3; } }; /* don't swap sequences, or gonna painful */ if (len1 == 0 || len2 == 0) { dist = 0; while (len1) dist += weighter.delete(seq1[--len1]); while (len2) dist += weighter.insert(seq2[--len2]); return dist; } column = []; // malloc((len2 + 1) * sizeof(double)); //if (!column) return -1; column[0] = 0; (j = 1; j <= len2; ++j) column[j] = column[j - 1] + weighter.insert(seq2[j - 1]); (i = 1; <= len1; ++i) { last = column[0]; /* m[i-1][0] */ column[0] += weighter.delete(seq1[i - 1]); /* m[i][0] */ (j = 1; j <= len2; ++j) { old = column[j]; if (seq1[i - 1] == seq2[j - 1]) { column[j] = last; /* m[i-1][j-1] */ } else { ic = column[j - 1] + weighter.insert(seq2[j - 1]); /* m[i][j-1] */ dc = column[j] + weighter.delete(seq1[i - 1]); /* m[i-1][j] */ rc = last + weighter.replace(seq1[i - 1], seq2[j - 1]); /* m[i-1][j-1] */ column[j] = ic < dc ? ic : (dc < rc ? dc : rc); } last = old; } } dist = column[len2]; return dist; }
Comments
Post a Comment