Ejemplo n.º 1
0
        public void VerifyRemovesInvalidTerms()
        {
            var termsCollection = new TermsCollection(new[] {"accepted"});
            var document = new Document(new[] {"accepted", "nonaccepted", "accepted"});

            var documentPurer = new DocumentPurer(termsCollection);
            Document puredDocument = documentPurer.PureDocument(document);

            CollectionAssert.AreEqual(new[] {"accepted", "accepted"}, puredDocument.Words);
        }
Ejemplo n.º 2
0
        public void DoesNotAddInexistingTermToTermsCollection()
        {
            var document1 = new Document(new[] { "first", "word" });
            var document2 = new Document(new[] { "word", "second" });

            var termsCollection = new TermsCollection();
            termsCollection.AddDocument(document1);
            termsCollection.AddDocument(document2);

            Assert.That(termsCollection.ContainsTerm("nonexisting"), Is.False);
        }
Ejemplo n.º 3
0
        public void ContainsAllWordsInPublicCollection()
        {
            var document1 = new Document(new[] { "first", "word" });
            var document2 = new Document(new[] { "word", "second" });

            var termsCollection = new TermsCollection();
            termsCollection.AddDocument(document1);
            termsCollection.AddDocument(document2);

            CollectionAssert.AreEqual(new[] { "first", "word", "second"}, termsCollection.Terms);
        }
Ejemplo n.º 4
0
        public void AddsAllTermsFromDocumentsToTermsCollection()
        {
            var document1 = new Document(new[] {"first", "word"});
            var document2 = new Document(new[] {"word", "second"});

            var termsCollection = new TermsCollection();
            termsCollection.AddDocument(document1);
            termsCollection.AddDocument(document2);

            Assert.That(termsCollection.ContainsTerm("first"), Is.True);
            Assert.That(termsCollection.ContainsTerm("word"), Is.True);
            Assert.That(termsCollection.ContainsTerm("second"), Is.True);
        }
Ejemplo n.º 5
0
        public void DistanceCalculation()
        {
            var termWeightRepresentation = new Mock<ITermWeightRepresentation>();
            termWeightRepresentation.Setup(x => x.TermWeight(It.Is<string>(y => y == "bee"))).Returns(0.91629);
            termWeightRepresentation.Setup(x => x.TermWeight(It.Is<string>(y => y == "wasp"))).Returns(0.91629);
            termWeightRepresentation.Setup(x => x.TermWeight(It.Is<string>(y => y == "fly"))).Returns(0.22314);
            var termsCollection = new TermsCollection(new[] {"bee", "wasp", "fly", "fruit", "like"});

            var distanceCalculator = new DistanceCalculator();
            double actualDistance = distanceCalculator.CalculateDistance(termWeightRepresentation.Object,
                                                                         termsCollection);

            Assert.AreEqual(1.314903211, actualDistance, 0.0001);
        }
Ejemplo n.º 6
0
        public double CalculateProbability(ITermWeightRepresentation queryRepresentation,
                                           ITermWeightRepresentation documentRepresentation,
                                           TermsCollection termsCollection)
        {
            double queryDistance = _distanceCalculator.CalculateDistance(queryRepresentation, termsCollection);
            double documentDistance = _distanceCalculator.CalculateDistance(documentRepresentation, termsCollection);

            double sum = 0;
            foreach (string term in termsCollection.Terms)
            {
                sum += queryRepresentation.TermWeight(term)*documentRepresentation.TermWeight(term);
            }
            return sum/(queryDistance*documentDistance);
        }
Ejemplo n.º 7
0
        public void ProbabilityMatrixCalculation()
        {
            var queryRepresentation = new Mock<ITermWeightRepresentation>();
            var documentRepresentation = new Mock<ITermWeightRepresentation>();
            queryRepresentation.Setup(x => x.TermWeight(It.Is<string>(y => y == "fruit"))).Returns(0.51083);
            queryRepresentation.Setup(x => x.TermWeight(It.Is<string>(y => y == "fly"))).Returns(0.22314);
            documentRepresentation.Setup(x => x.TermWeight(It.Is<string>(y => y == "bee"))).Returns(0.91629);
            documentRepresentation.Setup(x => x.TermWeight(It.Is<string>(y => y == "wasp"))).Returns(0.91629);
            documentRepresentation.Setup(x => x.TermWeight(It.Is<string>(y => y == "fly"))).Returns(0.22314);
            var termsCollection = new TermsCollection(new[] {"bee", "wasp", "fly", "fruit", "like"});
            var distanceCalculator = new DistanceCalculator();

            var probabilityMatrix = new ProbabilityMatrixCalculator();
            double probability = probabilityMatrix.CalculateProbability(queryRepresentation.Object,
                                                                        documentRepresentation.Object, termsCollection);

            Assert.AreEqual(0.067932752, probability, 0.0001);
        }