[TestCase(" ", " ", 0)] // Doesn't contain shingles and should therefore have 0 similarity public void FullImplementationSimilarityTest(string first, string second, double expectedSimilarity) { List <string> firstList = InitializeListWithWords(first); List <string> secondList = InitializeListWithWords(second); var test = new JaccardSimilarity(firstList, secondList); Assert.AreEqual(expectedSimilarity, test.Similarity); }
[TestCase("We could also try with two equal sentences", "We could also try with two equal sentences", 5)] // Equal sentences public void AllElementsTest(string first, string second, int expectedResult) { List <string> firstList = InitializeListWithWords(first); List <string> secondList = InitializeListWithWords(second); var test = new JaccardSimilarity(firstList, secondList); Assert.AreEqual(expectedResult, test.AllElements.Count()); }
[Test()] // Check that the first text are handled correctly public void InitializeTextATest() { List <string> first = InitializeListWithWords("This is a test"); var test = new JaccardSimilarity(first, new List <string>() { "Can't be empty" }); CollectionAssert.AreEqual(first, test.TextA); }
// We expect no shingles to be generated from the 2 above standing texts input public void GetShinglesBExpectNoShinglesInGivenTextTest(string text) { var listOfWords = InitializeListWithWords(text); var test = new JaccardSimilarity(new List <string>() { "Can't be empty" }, listOfWords); Assert.IsFalse(test.shinglesB.Any()); }
[Test()] // Check that the second text are handled correctly public void InitializeTextBTest() { List <string> Second = InitializeListWithWords("This is a test"); var test = new JaccardSimilarity(new List <string>() { "Can't be empty" }, Second); CollectionAssert.AreEqual(Second, test.TextB); }
[Test()] // Method that insert stopwords to HashSet public void AmountOfStopwordsTest() { var test = new JaccardSimilarity(new List <string>() { "Can't be empty" }, new List <string>() { "Can't be empty" }); Assert.AreEqual(464, test.stopWords.Count); }
[Test()] // Load of stopwords from textfile public void TemporaryStopwordsTest() { var test = new JaccardSimilarity(new List <string>() { "Can't be empty" }, new List <string>() { "Can't be empty" }); Assert.AreEqual(464, test.tempStopWords.Count()); }
"to check for", "for generated shingles")] // Regular sentence public void GetShinglesBShinglesInGivenTextTest(string text, params string[] shingles) { List <string> listOfWords = InitializeListWithWords(text); List <string> expectedResult = InitiaizeListWithShingles(shingles); var test = new JaccardSimilarity(new List <string>() { "Can't be empty" }, listOfWords); CollectionAssert.AreEqual(expectedResult, test.shinglesB); }
static void Main(string[] args) { Stopwatch stopwatch = new Stopwatch(); Console.WriteLine(Stopwatch.IsHighResolution); stopwatch.Start(); #region LevenshteinDistance //Indlæser hver linje af teksten til en liste af strings LoadStringToList tekstA = new LoadStringToList(@"C:\Users\Patri\Dropbox\Projekt\P2\Program\Nyheder_Database\Koran_Bible_Same1.txt"); LoadStringToList tekstB = new LoadStringToList(@"C:\Users\Patri\Dropbox\Projekt\P2\Program\Nyheder_Database\Koran_Bible_Same2.txt"); // Beregner LevenshteinDistance LevenshteinDistance levDis = new LevenshteinDistance(tekstA.Lines, tekstB.Lines, tekstA.GetAmountOfChars(), tekstB.GetAmountOfChars(), tekstA.LinesInText, tekstB.LinesInText); levDis.Print(); // Printer LevenshteinDistance mellem de to tekster #endregion Console.WriteLine(stopwatch.ElapsedMilliseconds); stopwatch.Restart(); #region CosineDistance //Indlæser hvert ord fra teksten til en liste af strings LoadEachWordToList TextA = new LoadEachWordToList(@"C:\Users\Patri\Dropbox\Projekt\P2\Program\Nyheder_Database\Koran_Bible_Same1.txt"); LoadEachWordToList TextB = new LoadEachWordToList(@"C:\Users\Patri\Dropbox\Projekt\P2\Program\Nyheder_Database\Koran_Bible_Same2.txt"); CalculateCosine CalcCos = new CalculateCosine(TextA.Words, TextB.Words); // Beregner CosineDistance CalcCos.Print(); // Printer CosineSimilarity mellem de to tekster #endregion Console.WriteLine(stopwatch.ElapsedMilliseconds); stopwatch.Restart(); #region JaccardDistance JaccardSimilarity nytekstA = new JaccardSimilarity(TextA.Words, TextB.Words); // TextA og TextB er indlæst i #region CosineDistance - (LoadEachWordToList) nytekstA.Print(); // Printer JaccardSimilarity mellem de to tekster #endregion Console.WriteLine(stopwatch.ElapsedMilliseconds); stopwatch.Stop(); #region TryDiffrentLoadMethod (Resources) //var TekstA = new List<string>(); //var TekstB = new List<string>(); //string[] stringSeparators = { ",", ".", "!", "?", ";", ":", " ", "-", "\"", "(", ")" }; //TekstA = Resources.Pizzagate1.Split(stringSeparators, StringSplitOptions.RemoveEmptyEntries).ToList(); //TekstB = Resources.Pizzagate2.Split(stringSeparators, StringSplitOptions.RemoveEmptyEntries).ToList(); //CalculateCosine CalcCos2 = new CalculateCosine(TekstA, TekstB); // Beregner CosineDistance //CalcCos2.Print(); // Printer CosineDistance mellem de to tekster #endregion Console.ReadLine(); }
[Test()] // Test for two hole text1s in reverse order (added as resources) public void SimilarityBetweenTwoRealTextstInReverseOrderTest() { List <string> firstText; List <string> secondText; InitializeListWithWordsFromText(out firstText, out secondText); var test = new JaccardSimilarity(secondText, firstText); decimal similarity = Math.Round(test.Similarity, 6); Assert.AreEqual(0.008997, similarity); }
// Returns the greatest JaccardSimilarity optained by comparing the text to texts in the directory public override decimal CompareWithTexts(List <string> paths) { decimal greatestSimilarity = 0; foreach (string path in paths) // Gets JaccardSimilarity for all false articles { var databaseText = new LoadEachWordToList(path); var compareTexts = new JaccardSimilarity(TextToBeCompared, databaseText.Words); // Happens if the jaccardSimilarity between the two current texts are the greatest so far if (compareTexts.Similarity > greatestSimilarity) { greatestSimilarity = compareTexts.Similarity; } } return(greatestSimilarity); }
public void GetSimilarity_TwoDifferentVectors_ExpectedResult() { JaccardSimilarity js = new JaccardSimilarity(); Article x = new Article(); Article y = new Article(); x.Vector[1] = 1; x.Vector[2] = 3; x.Vector[3] = 5; y.Vector[1] = 2; y.Vector[2] = 4; y.Vector[5] = 3; double expected = 0.28571428571428571428571428571429; double actual = js.GetSimilarity(x, y); Assert.IsTrue(Math.Abs(expected - actual) < 0.001); }
public static double ApproximatelyEquals(this string firstWord, string secondWord, SimMetricType simMetricType = SimMetricType.Levenstein) { switch (simMetricType) { case SimMetricType.BlockDistance: var sim2 = new BlockDistance(); return(sim2.GetSimilarity(firstWord, secondWord)); case SimMetricType.ChapmanLengthDeviation: var sim3 = new ChapmanLengthDeviation(); return(sim3.GetSimilarity(firstWord, secondWord)); case SimMetricType.CosineSimilarity: var sim4 = new CosineSimilarity(); return(sim4.GetSimilarity(firstWord, secondWord)); case SimMetricType.DiceSimilarity: var sim5 = new DiceSimilarity(); return(sim5.GetSimilarity(firstWord, secondWord)); case SimMetricType.EuclideanDistance: var sim6 = new EuclideanDistance(); return(sim6.GetSimilarity(firstWord, secondWord)); case SimMetricType.JaccardSimilarity: var sim7 = new JaccardSimilarity(); return(sim7.GetSimilarity(firstWord, secondWord)); case SimMetricType.Jaro: var sim8 = new Jaro(); return(sim8.GetSimilarity(firstWord, secondWord)); case SimMetricType.JaroWinkler: var sim9 = new JaroWinkler(); return(sim9.GetSimilarity(firstWord, secondWord)); case SimMetricType.MatchingCoefficient: var sim10 = new MatchingCoefficient(); return(sim10.GetSimilarity(firstWord, secondWord)); case SimMetricType.MongeElkan: var sim11 = new MongeElkan(); return(sim11.GetSimilarity(firstWord, secondWord)); case SimMetricType.NeedlemanWunch: var sim12 = new NeedlemanWunch(); return(sim12.GetSimilarity(firstWord, secondWord)); case SimMetricType.OverlapCoefficient: var sim13 = new OverlapCoefficient(); return(sim13.GetSimilarity(firstWord, secondWord)); case SimMetricType.QGramsDistance: var sim14 = new QGramsDistance(); return(sim14.GetSimilarity(firstWord, secondWord)); case SimMetricType.SmithWaterman: var sim15 = new SmithWaterman(); return(sim15.GetSimilarity(firstWord, secondWord)); case SimMetricType.SmithWatermanGotoh: var sim16 = new SmithWatermanGotoh(); return(sim16.GetSimilarity(firstWord, secondWord)); case SimMetricType.SmithWatermanGotohWindowedAffine: var sim17 = new SmithWatermanGotohWindowedAffine(); return(sim17.GetSimilarity(firstWord, secondWord)); case SimMetricType.ChapmanMeanLength: var sim18 = new ChapmanMeanLength(); return(sim18.GetSimilarity(firstWord, secondWord)); default: var sim1 = new Levenstein(); return(sim1.GetSimilarity(firstWord, secondWord)); } }
public double GetSimilarity(string str1, string str2, string type) { IStringMetric stringMetric; switch (type) { case AlgorithmTypes.BlockDistance: stringMetric = new BlockDistance(); break; case AlgorithmTypes.ChapmanLengthDeviation: stringMetric = new ChapmanLengthDeviation(); break; case AlgorithmTypes.ChapmanMeanLength: stringMetric = new ChapmanMeanLength(); break; case AlgorithmTypes.CosineSimilarity: stringMetric = new CosineSimilarity(); break; case AlgorithmTypes.DiceSimilarity: stringMetric = new DiceSimilarity(); break; case AlgorithmTypes.EuclideanDistance: stringMetric = new EuclideanDistance(); break; case AlgorithmTypes.JaccardSimilarity: stringMetric = new JaccardSimilarity(); break; case AlgorithmTypes.Jaro: stringMetric = new Jaro(); break; case AlgorithmTypes.JaroWinkler: stringMetric = new JaroWinkler(); break; case AlgorithmTypes.Levenstein: stringMetric = new Levenstein(); break; case AlgorithmTypes.MatchingCoefficient: stringMetric = new MatchingCoefficient(); break; case AlgorithmTypes.MongeElkan: stringMetric = new MongeElkan(); break; case AlgorithmTypes.NeedlemanWunch: stringMetric = new NeedlemanWunch(); break; case AlgorithmTypes.OverlapCoefficient: stringMetric = new OverlapCoefficient(); break; case AlgorithmTypes.QGramsDistance: stringMetric = new QGramsDistance(); break; case AlgorithmTypes.SmithWaterman: stringMetric = new SmithWaterman(); break; case AlgorithmTypes.SmithWatermanGotoh: stringMetric = new SmithWatermanGotoh(); break; case AlgorithmTypes.SmithWatermanGotohWindowedAffine: stringMetric = new SmithWatermanGotohWindowedAffine(); break; default: stringMetric = new SmithWatermanGotoh(); break; } var similarity = stringMetric.GetSimilarity(str1.Trim(), str2.Trim()); return(similarity); }
public IEnumerable <GameDTO> GetGamesByCollaborative(Guid gamekey, UserDTO userDTO = null) { //var user = _unitOfWork.Users.Get(x => x.IsWoman == userDTO.IsWoman && x.Adulthood == userDTO.Adulthood); var user = _unitOfWork.Users.GetAll(); UserProfile currentUserProfile = new UserProfile(Guid.NewGuid(), new Guid[] { gamekey }); var profiles = new List <UserProfile>(); for (int i = 0; i < user.Count(); i++) { var orders = _unitOfWork.Orders.Get(u => u.UserId == user.ToArray()[i].Id); var orderDet = _unitOfWork.OrderDetails.Get(od => orders.Any(o => od.OrderId == o.Id)); var games = _unitOfWork.Games.Get(g => orderDet.Any(o => g.Id == o.GameId)); var gamesId = new Guid[games.Count()]; for (int j = 0; j < games.Count(); j++) { gamesId[j] = games.ToArray()[j].Id; } if (user.ToArray()[i].Id == userDTO.Id) { var gamesIds = new Guid[games.Count() + 1]; for (int j = 0; j < gamesId.Count(); j++) { gamesIds[j] = gamesId[j]; } gamesIds[games.Count()] = gamekey; currentUserProfile = new UserProfile(user.ToArray()[i].Id, gamesId); } profiles.Add(new UserProfile(user.ToArray()[i].Id, gamesId)); } var simiarity = new JaccardSimilarity(); var engine = new CollaborativeFiltering(); var results = engine.recommend(profiles, simiarity, currentUserProfile); var games3 = _unitOfWork.Games.Get(x => results.Any(r => x.Id == r.Key)); if (results.ElementAt(0).Value == 0) { var user2 = user.Where(x => x.IsWoman == userDTO.IsWoman && x.Adulthood == userDTO.Adulthood); var profiles2 = new List <UserProfile>(); for (int i = 0; i < user2.Count(); i++) { var orders = _unitOfWork.Orders.Get(u => u.UserId == user.ToArray()[i].Id); var orderDet = _unitOfWork.OrderDetails.Get(od => orders.Any(o => od.OrderId == o.Id)); var games = _unitOfWork.Games.Get(g => orderDet.Any(o => g.Id == o.GameId)); var gamesId = new Guid[games.Count()]; for (int j = 0; j < games.Count(); j++) { gamesId[j] = games.ToArray()[j].Id; } if (user2.ToArray()[i].Id == userDTO.Id) { currentUserProfile = new UserProfile(user.ToArray()[i].Id, gamesId); } profiles.Add(new UserProfile(user.ToArray()[i].Id, gamesId)); } var simiarity2 = new JaccardSimilarity(); var engine2 = new CollaborativeFiltering(); var results2 = engine.recommend(profiles, simiarity, currentUserProfile); var games2 = _unitOfWork.Games.Get(x => results.Any(r => x.Id == r.Key)); return(_mapper.Map <IEnumerable <GameDTO> >(games3)); } return(_mapper.Map <IEnumerable <GameDTO> >(games3)); }
// [SetUp] public JaccardSimilarityUnitTests() { LoadData(); _myJaccardSimilarity = new JaccardSimilarity(); }