N-Gram version of edit distance based on paper by Grzegorz Kondrak, "N-gram similarity and distance". Proceedings of the Twelfth International Conference on String Processing and Information Retrieval (SPIRE 2005), pp. 115-126, Buenos Aires, Argentina, November 2005. http://www.cs.ualberta.ca/~kondrak/papers/spire05.pdf This implementation uses the position-based optimization to compute partial matches of n-gram sub-strings and adds a null-character prefix of size n-1 so that the first character is contained in the same number of n-grams as a middle character. Null-character prefix matches are discounted so that strings with no matching characters will return a distance of 0.
Наследование: IStringDistance
Пример #1
0
        public void TestGetDistance1()
        {
            IStringDistance nsd = new NGramDistance(1);
            float           d   = nsd.GetDistance("al", "al");

            assertEquals(d, 1.0f, 0.001);
            d = nsd.GetDistance("a", "a");
            assertEquals(d, 1.0f, 0.001);
            d = nsd.GetDistance("b", "a");
            assertEquals(d, 0.0f, 0.001);
            d = nsd.GetDistance("martha", "marhta");
            assertEquals(d, 0.6666, 0.001);
            d = nsd.GetDistance("jones", "johnson");
            assertEquals(d, 0.4285, 0.001);
            d = nsd.GetDistance("natural", "contrary");
            assertEquals(d, 0.25, 0.001);
            d = nsd.GetDistance("abcvwxyz", "cabvwxyz");
            assertEquals(d, 0.75, 0.001);
            d = nsd.GetDistance("dwayne", "duane");
            assertEquals(d, 0.666, 0.001);
            d = nsd.GetDistance("dixon", "dicksonx");
            assertEquals(d, 0.5, 0.001);
            d = nsd.GetDistance("six", "ten");
            assertEquals(d, 0, 0.001);
            float d1 = nsd.GetDistance("zac ephron", "zac efron");
            float d2 = nsd.GetDistance("zac ephron", "kai ephron");

            assertEquals(d1, d2, 0.001);
            d1 = nsd.GetDistance("brittney spears", "britney spears");
            d2 = nsd.GetDistance("brittney spears", "brittney startzman");
            assertTrue(d1 > d2);
            d1 = nsd.GetDistance("12345678", "12890678");
            d2 = nsd.GetDistance("12345678", "72385698");
            assertEquals(d1, d2, 001);
        }
Пример #2
0
        public void TestEmpty()
        {
            IStringDistance nsd = new NGramDistance(1);
            float           d   = nsd.GetDistance("", "al");

            assertEquals(d, 0.0f, 0.001);
        }
 public void TestGetDistance2()
 {
     IStringDistance sd = new NGramDistance(2);
     float d = sd.GetDistance("al", "al");
     assertEquals(d, 1.0f, 0.001);
     d = sd.GetDistance("a", "a");
     assertEquals(d, 1.0f, 0.001);
     d = sd.GetDistance("b", "a");
     assertEquals(d, 0.0f, 0.001);
     d = sd.GetDistance("a", "aa");
     assertEquals(d, 0.5f, 0.001);
     d = sd.GetDistance("martha", "marhta");
     assertEquals(d, 0.6666, 0.001);
     d = sd.GetDistance("jones", "johnson");
     assertEquals(d, 0.4285, 0.001);
     d = sd.GetDistance("natural", "contrary");
     assertEquals(d, 0.25, 0.001);
     d = sd.GetDistance("abcvwxyz", "cabvwxyz");
     assertEquals(d, 0.625, 0.001);
     d = sd.GetDistance("dwayne", "duane");
     assertEquals(d, 0.5833, 0.001);
     d = sd.GetDistance("dixon", "dicksonx");
     assertEquals(d, 0.5, 0.001);
     d = sd.GetDistance("six", "ten");
     assertEquals(d, 0, 0.001);
     float d1 = sd.GetDistance("zac ephron", "zac efron");
     float d2 = sd.GetDistance("zac ephron", "kai ephron");
     assertTrue(d1 > d2);
     d1 = sd.GetDistance("brittney spears", "britney spears");
     d2 = sd.GetDistance("brittney spears", "brittney startzman");
     assertTrue(d1 > d2);
     d1 = sd.GetDistance("0012345678", "0012890678");
     d2 = sd.GetDistance("0012345678", "0072385698");
     assertEquals(d1, d2, 0.001);
 }
Пример #4
0
        public void TestGetDistance3()
        {
            IStringDistance sd = new NGramDistance(3);
            float           d  = sd.GetDistance("al", "al");

            assertEquals(d, 1.0f, 0.001);
            d = sd.GetDistance("a", "a");
            assertEquals(d, 1.0f, 0.001);
            d = sd.GetDistance("b", "a");
            assertEquals(d, 0.0f, 0.001);
            d = sd.GetDistance("martha", "marhta");
            assertEquals(d, 0.7222, 0.001);
            d = sd.GetDistance("jones", "johnson");
            assertEquals(d, 0.4762, 0.001);
            d = sd.GetDistance("natural", "contrary");
            assertEquals(d, 0.2083, 0.001);
            d = sd.GetDistance("abcvwxyz", "cabvwxyz");
            assertEquals(d, 0.5625, 0.001);
            d = sd.GetDistance("dwayne", "duane");
            assertEquals(d, 0.5277, 0.001);
            d = sd.GetDistance("dixon", "dicksonx");
            assertEquals(d, 0.4583, 0.001);
            d = sd.GetDistance("six", "ten");
            assertEquals(d, 0, 0.001);
            float d1 = sd.GetDistance("zac ephron", "zac efron");
            float d2 = sd.GetDistance("zac ephron", "kai ephron");

            assertTrue(d1 > d2);
            d1 = sd.GetDistance("brittney spears", "britney spears");
            d2 = sd.GetDistance("brittney spears", "brittney startzman");
            assertTrue(d1 > d2);
            d1 = sd.GetDistance("0012345678", "0012890678");
            d2 = sd.GetDistance("0012345678", "0072385698");
            assertTrue(d1 < d2);
        }
 public void TestEmpty()
 {
     IStringDistance nsd = new NGramDistance(1);
     float d = nsd.GetDistance("", "al");
     assertEquals(d, 0.0f, 0.001);
 }