public void TestMinHashSimilarity()
        {
            var bag = new List <string> {
                "bob",
                "alice",
                "frank",
                "tyler",
                "sara"
            };

            var simRatio = MinHash.Similarity(bag.ToArray(), bag.ToArray());

            Assert.AreEqual(1.0, simRatio);

            var dict = Words.Dictionary(1000);
            var bag2 = new List <string>();

            for (int i = 0; i < 1000; i++)
            {
                bag2.Add(i.ToString());
            }

            simRatio = MinHash.Similarity(dict, bag2.ToArray());
            Assert.AreEqual(0.0, simRatio);

            var bag3 = Words.Dictionary(500);

            simRatio = MinHash.Similarity(dict, bag3);
            if (simRatio > 0.7 || simRatio < 0.5)
            {
                Assert.Fail(string.Format("Expected between 0.5 and 0.7, got {0}", simRatio));
            }
        }
Esempio n. 2
0
        // GET: api/Document/id/idTwo
        public IHttpActionResult GetDocument(int id, int idTwo)
        {
            var docOne = documents.Find((d) => d.ID == id);
            var docTwo = documents.Find((d) => d.ID == idTwo);

            if (docOne == null || docTwo == null)
            {
                return(NotFound());
            }
            return(Ok(_minHashes.Similarity(docOne.MinHashes, docTwo.MinHashes)));
        }
Esempio n. 3
0
        public void SimilarityTest()
        {
            MinHash     minHash    = new MinHash(10);
            List <uint> firstInput = new List <uint>()
            {
                1, 2, 3
            };
            List <uint> secondInput = new List <uint>()
            {
                1, 2, 3
            };
            double result   = minHash.Similarity(firstInput, secondInput);
            double expected = 1.0;

            Assert.AreEqual(expected, result);
        }