public void VerifyRemovesInvalidTerms() { var termsCollection = new TermsCollection(new[] {"accepted"}); var document = new Document(new[] {"accepted", "nonaccepted", "accepted"}); var documentPurer = new DocumentPurer(termsCollection); Document puredDocument = documentPurer.PureDocument(document); CollectionAssert.AreEqual(new[] {"accepted", "accepted"}, puredDocument.Words); }
public void DoesNotAddInexistingTermToTermsCollection() { var document1 = new Document(new[] { "first", "word" }); var document2 = new Document(new[] { "word", "second" }); var termsCollection = new TermsCollection(); termsCollection.AddDocument(document1); termsCollection.AddDocument(document2); Assert.That(termsCollection.ContainsTerm("nonexisting"), Is.False); }
public void ContainsAllWordsInPublicCollection() { var document1 = new Document(new[] { "first", "word" }); var document2 = new Document(new[] { "word", "second" }); var termsCollection = new TermsCollection(); termsCollection.AddDocument(document1); termsCollection.AddDocument(document2); CollectionAssert.AreEqual(new[] { "first", "word", "second"}, termsCollection.Terms); }
public void AddsAllTermsFromDocumentsToTermsCollection() { var document1 = new Document(new[] {"first", "word"}); var document2 = new Document(new[] {"word", "second"}); var termsCollection = new TermsCollection(); termsCollection.AddDocument(document1); termsCollection.AddDocument(document2); Assert.That(termsCollection.ContainsTerm("first"), Is.True); Assert.That(termsCollection.ContainsTerm("word"), Is.True); Assert.That(termsCollection.ContainsTerm("second"), Is.True); }
public void DistanceCalculation() { var termWeightRepresentation = new Mock<ITermWeightRepresentation>(); termWeightRepresentation.Setup(x => x.TermWeight(It.Is<string>(y => y == "bee"))).Returns(0.91629); termWeightRepresentation.Setup(x => x.TermWeight(It.Is<string>(y => y == "wasp"))).Returns(0.91629); termWeightRepresentation.Setup(x => x.TermWeight(It.Is<string>(y => y == "fly"))).Returns(0.22314); var termsCollection = new TermsCollection(new[] {"bee", "wasp", "fly", "fruit", "like"}); var distanceCalculator = new DistanceCalculator(); double actualDistance = distanceCalculator.CalculateDistance(termWeightRepresentation.Object, termsCollection); Assert.AreEqual(1.314903211, actualDistance, 0.0001); }
public double CalculateProbability(ITermWeightRepresentation queryRepresentation, ITermWeightRepresentation documentRepresentation, TermsCollection termsCollection) { double queryDistance = _distanceCalculator.CalculateDistance(queryRepresentation, termsCollection); double documentDistance = _distanceCalculator.CalculateDistance(documentRepresentation, termsCollection); double sum = 0; foreach (string term in termsCollection.Terms) { sum += queryRepresentation.TermWeight(term)*documentRepresentation.TermWeight(term); } return sum/(queryDistance*documentDistance); }
public void ProbabilityMatrixCalculation() { var queryRepresentation = new Mock<ITermWeightRepresentation>(); var documentRepresentation = new Mock<ITermWeightRepresentation>(); queryRepresentation.Setup(x => x.TermWeight(It.Is<string>(y => y == "fruit"))).Returns(0.51083); queryRepresentation.Setup(x => x.TermWeight(It.Is<string>(y => y == "fly"))).Returns(0.22314); documentRepresentation.Setup(x => x.TermWeight(It.Is<string>(y => y == "bee"))).Returns(0.91629); documentRepresentation.Setup(x => x.TermWeight(It.Is<string>(y => y == "wasp"))).Returns(0.91629); documentRepresentation.Setup(x => x.TermWeight(It.Is<string>(y => y == "fly"))).Returns(0.22314); var termsCollection = new TermsCollection(new[] {"bee", "wasp", "fly", "fruit", "like"}); var distanceCalculator = new DistanceCalculator(); var probabilityMatrix = new ProbabilityMatrixCalculator(); double probability = probabilityMatrix.CalculateProbability(queryRepresentation.Object, documentRepresentation.Object, termsCollection); Assert.AreEqual(0.067932752, probability, 0.0001); }