Ejemplo n.º 1
0
        [TestCase(" ", " ", 0)]                                                                       // Doesn't contain shingles and should therefore have 0 similarity
        public void FullImplementationSimilarityTest(string first, string second, double expectedSimilarity)
        {
            List <string> firstList  = InitializeListWithWords(first);
            List <string> secondList = InitializeListWithWords(second);

            var test = new JaccardSimilarity(firstList, secondList);

            Assert.AreEqual(expectedSimilarity, test.Similarity);
        }
Ejemplo n.º 2
0
        [TestCase("We could also try with two equal sentences", "We could also try with two equal sentences", 5)] // Equal sentences
        public void AllElementsTest(string first, string second, int expectedResult)
        {
            List <string> firstList  = InitializeListWithWords(first);
            List <string> secondList = InitializeListWithWords(second);

            var test = new JaccardSimilarity(firstList, secondList);

            Assert.AreEqual(expectedResult, test.AllElements.Count());
        }
Ejemplo n.º 3
0
        [Test()] // Check that the first text are handled correctly
        public void InitializeTextATest()
        {
            List <string> first = InitializeListWithWords("This is a test");

            var test = new JaccardSimilarity(first, new List <string>()
            {
                "Can't be empty"
            });

            CollectionAssert.AreEqual(first, test.TextA);
        }
Ejemplo n.º 4
0
        // We expect no shingles to be generated from the 2 above standing texts input
        public void GetShinglesBExpectNoShinglesInGivenTextTest(string text)
        {
            var listOfWords = InitializeListWithWords(text);

            var test = new JaccardSimilarity(new List <string>()
            {
                "Can't be empty"
            }, listOfWords);

            Assert.IsFalse(test.shinglesB.Any());
        }
Ejemplo n.º 5
0
        [Test()] // Check that the second text are handled correctly
        public void InitializeTextBTest()
        {
            List <string> Second = InitializeListWithWords("This is a test");

            var test = new JaccardSimilarity(new List <string>()
            {
                "Can't be empty"
            }, Second);

            CollectionAssert.AreEqual(Second, test.TextB);
        }
Ejemplo n.º 6
0
        [Test()] // Method that insert stopwords to HashSet
        public void AmountOfStopwordsTest()
        {
            var test = new JaccardSimilarity(new List <string>()
            {
                "Can't be empty"
            }, new List <string>()
            {
                "Can't be empty"
            });

            Assert.AreEqual(464, test.stopWords.Count);
        }
Ejemplo n.º 7
0
        [Test()] // Load of stopwords from textfile
        public void TemporaryStopwordsTest()
        {
            var test = new JaccardSimilarity(new List <string>()
            {
                "Can't be empty"
            }, new List <string>()
            {
                "Can't be empty"
            });

            Assert.AreEqual(464, test.tempStopWords.Count());
        }
Ejemplo n.º 8
0
                  "to check for", "for generated shingles")] // Regular sentence
        public void GetShinglesBShinglesInGivenTextTest(string text, params string[] shingles)
        {
            List <string> listOfWords    = InitializeListWithWords(text);
            List <string> expectedResult = InitiaizeListWithShingles(shingles);

            var test = new JaccardSimilarity(new List <string>()
            {
                "Can't be empty"
            }, listOfWords);

            CollectionAssert.AreEqual(expectedResult, test.shinglesB);
        }
Ejemplo n.º 9
0
        static void Main(string[] args)
        {
            Stopwatch stopwatch = new Stopwatch();

            Console.WriteLine(Stopwatch.IsHighResolution);
            stopwatch.Start();

            #region LevenshteinDistance
            //Indlæser hver linje af teksten til en liste af strings
            LoadStringToList tekstA = new LoadStringToList(@"C:\Users\Patri\Dropbox\Projekt\P2\Program\Nyheder_Database\Koran_Bible_Same1.txt");
            LoadStringToList tekstB = new LoadStringToList(@"C:\Users\Patri\Dropbox\Projekt\P2\Program\Nyheder_Database\Koran_Bible_Same2.txt");

            // Beregner LevenshteinDistance
            LevenshteinDistance levDis = new LevenshteinDistance(tekstA.Lines, tekstB.Lines, tekstA.GetAmountOfChars(),
                                                                 tekstB.GetAmountOfChars(), tekstA.LinesInText, tekstB.LinesInText);
            levDis.Print(); // Printer LevenshteinDistance mellem de to tekster
            #endregion
            Console.WriteLine(stopwatch.ElapsedMilliseconds);
            stopwatch.Restart();

            #region CosineDistance
            //Indlæser hvert ord fra teksten til en liste af strings
            LoadEachWordToList TextA = new LoadEachWordToList(@"C:\Users\Patri\Dropbox\Projekt\P2\Program\Nyheder_Database\Koran_Bible_Same1.txt");
            LoadEachWordToList TextB = new LoadEachWordToList(@"C:\Users\Patri\Dropbox\Projekt\P2\Program\Nyheder_Database\Koran_Bible_Same2.txt");

            CalculateCosine CalcCos = new CalculateCosine(TextA.Words, TextB.Words); // Beregner CosineDistance
            CalcCos.Print();                                                         // Printer CosineSimilarity mellem de to tekster
            #endregion
            Console.WriteLine(stopwatch.ElapsedMilliseconds);
            stopwatch.Restart();

            #region JaccardDistance
            JaccardSimilarity nytekstA = new JaccardSimilarity(TextA.Words, TextB.Words); // TextA og TextB er indlæst i #region CosineDistance - (LoadEachWordToList)

            nytekstA.Print();                                                             // Printer JaccardSimilarity mellem de to tekster
            #endregion
            Console.WriteLine(stopwatch.ElapsedMilliseconds);
            stopwatch.Stop();

            #region TryDiffrentLoadMethod (Resources)
            //var TekstA = new List<string>();
            //var TekstB = new List<string>();
            //string[] stringSeparators = { ",", ".", "!", "?", ";", ":", " ", "-", "\"", "(", ")" };

            //TekstA = Resources.Pizzagate1.Split(stringSeparators, StringSplitOptions.RemoveEmptyEntries).ToList();
            //TekstB = Resources.Pizzagate2.Split(stringSeparators, StringSplitOptions.RemoveEmptyEntries).ToList();

            //CalculateCosine CalcCos2 = new CalculateCosine(TekstA, TekstB); // Beregner CosineDistance
            //CalcCos2.Print(); // Printer CosineDistance mellem de to tekster
            #endregion

            Console.ReadLine();
        }
Ejemplo n.º 10
0
        [Test()] // Test for two hole text1s in reverse order (added as resources)
        public void SimilarityBetweenTwoRealTextstInReverseOrderTest()
        {
            List <string> firstText;
            List <string> secondText;

            InitializeListWithWordsFromText(out firstText, out secondText);

            var test = new JaccardSimilarity(secondText, firstText);

            decimal similarity = Math.Round(test.Similarity, 6);

            Assert.AreEqual(0.008997, similarity);
        }
Ejemplo n.º 11
0
        // Returns the greatest JaccardSimilarity optained by comparing the text to texts in the directory
        public override decimal CompareWithTexts(List <string> paths)
        {
            decimal greatestSimilarity = 0;

            foreach (string path in paths) // Gets JaccardSimilarity for all false articles
            {
                var databaseText = new LoadEachWordToList(path);

                var compareTexts = new JaccardSimilarity(TextToBeCompared, databaseText.Words);

                // Happens if the jaccardSimilarity between the two current texts are the greatest so far
                if (compareTexts.Similarity > greatestSimilarity)
                {
                    greatestSimilarity = compareTexts.Similarity;
                }
            }

            return(greatestSimilarity);
        }
Ejemplo n.º 12
0
        public void GetSimilarity_TwoDifferentVectors_ExpectedResult()
        {
            JaccardSimilarity js = new JaccardSimilarity();
            Article           x  = new Article();
            Article           y  = new Article();

            x.Vector[1] = 1;
            x.Vector[2] = 3;
            x.Vector[3] = 5;

            y.Vector[1] = 2;
            y.Vector[2] = 4;
            y.Vector[5] = 3;

            double expected = 0.28571428571428571428571428571429;
            double actual   = js.GetSimilarity(x, y);

            Assert.IsTrue(Math.Abs(expected - actual) < 0.001);
        }
Ejemplo n.º 13
0
        public static double ApproximatelyEquals(this string firstWord, string secondWord, SimMetricType simMetricType = SimMetricType.Levenstein)
        {
            switch (simMetricType)
            {
            case SimMetricType.BlockDistance:
                var sim2 = new BlockDistance();
                return(sim2.GetSimilarity(firstWord, secondWord));

            case SimMetricType.ChapmanLengthDeviation:
                var sim3 = new ChapmanLengthDeviation();
                return(sim3.GetSimilarity(firstWord, secondWord));

            case SimMetricType.CosineSimilarity:
                var sim4 = new CosineSimilarity();
                return(sim4.GetSimilarity(firstWord, secondWord));

            case SimMetricType.DiceSimilarity:
                var sim5 = new DiceSimilarity();
                return(sim5.GetSimilarity(firstWord, secondWord));

            case SimMetricType.EuclideanDistance:
                var sim6 = new EuclideanDistance();
                return(sim6.GetSimilarity(firstWord, secondWord));

            case SimMetricType.JaccardSimilarity:
                var sim7 = new JaccardSimilarity();
                return(sim7.GetSimilarity(firstWord, secondWord));

            case SimMetricType.Jaro:
                var sim8 = new Jaro();
                return(sim8.GetSimilarity(firstWord, secondWord));

            case SimMetricType.JaroWinkler:
                var sim9 = new JaroWinkler();
                return(sim9.GetSimilarity(firstWord, secondWord));

            case SimMetricType.MatchingCoefficient:
                var sim10 = new MatchingCoefficient();
                return(sim10.GetSimilarity(firstWord, secondWord));

            case SimMetricType.MongeElkan:
                var sim11 = new MongeElkan();
                return(sim11.GetSimilarity(firstWord, secondWord));

            case SimMetricType.NeedlemanWunch:
                var sim12 = new NeedlemanWunch();
                return(sim12.GetSimilarity(firstWord, secondWord));

            case SimMetricType.OverlapCoefficient:
                var sim13 = new OverlapCoefficient();
                return(sim13.GetSimilarity(firstWord, secondWord));

            case SimMetricType.QGramsDistance:
                var sim14 = new QGramsDistance();
                return(sim14.GetSimilarity(firstWord, secondWord));

            case SimMetricType.SmithWaterman:
                var sim15 = new SmithWaterman();
                return(sim15.GetSimilarity(firstWord, secondWord));

            case SimMetricType.SmithWatermanGotoh:
                var sim16 = new SmithWatermanGotoh();
                return(sim16.GetSimilarity(firstWord, secondWord));

            case SimMetricType.SmithWatermanGotohWindowedAffine:
                var sim17 = new SmithWatermanGotohWindowedAffine();
                return(sim17.GetSimilarity(firstWord, secondWord));

            case SimMetricType.ChapmanMeanLength:
                var sim18 = new ChapmanMeanLength();
                return(sim18.GetSimilarity(firstWord, secondWord));

            default:
                var sim1 = new Levenstein();
                return(sim1.GetSimilarity(firstWord, secondWord));
            }
        }
Ejemplo n.º 14
0
        public double GetSimilarity(string str1, string str2, string type)
        {
            IStringMetric stringMetric;

            switch (type)
            {
            case AlgorithmTypes.BlockDistance:
                stringMetric = new BlockDistance();
                break;

            case AlgorithmTypes.ChapmanLengthDeviation:
                stringMetric = new ChapmanLengthDeviation();
                break;

            case AlgorithmTypes.ChapmanMeanLength:
                stringMetric = new ChapmanMeanLength();
                break;

            case AlgorithmTypes.CosineSimilarity:
                stringMetric = new CosineSimilarity();
                break;

            case AlgorithmTypes.DiceSimilarity:
                stringMetric = new DiceSimilarity();
                break;

            case AlgorithmTypes.EuclideanDistance:
                stringMetric = new EuclideanDistance();
                break;

            case AlgorithmTypes.JaccardSimilarity:
                stringMetric = new JaccardSimilarity();
                break;

            case AlgorithmTypes.Jaro:
                stringMetric = new Jaro();
                break;

            case AlgorithmTypes.JaroWinkler:
                stringMetric = new JaroWinkler();
                break;

            case AlgorithmTypes.Levenstein:
                stringMetric = new Levenstein();
                break;

            case AlgorithmTypes.MatchingCoefficient:
                stringMetric = new MatchingCoefficient();
                break;

            case AlgorithmTypes.MongeElkan:
                stringMetric = new MongeElkan();
                break;

            case AlgorithmTypes.NeedlemanWunch:
                stringMetric = new NeedlemanWunch();
                break;

            case AlgorithmTypes.OverlapCoefficient:
                stringMetric = new OverlapCoefficient();
                break;

            case AlgorithmTypes.QGramsDistance:
                stringMetric = new QGramsDistance();
                break;

            case AlgorithmTypes.SmithWaterman:
                stringMetric = new SmithWaterman();
                break;

            case AlgorithmTypes.SmithWatermanGotoh:
                stringMetric = new SmithWatermanGotoh();
                break;

            case AlgorithmTypes.SmithWatermanGotohWindowedAffine:
                stringMetric = new SmithWatermanGotohWindowedAffine();
                break;

            default:
                stringMetric = new SmithWatermanGotoh();
                break;
            }

            var similarity = stringMetric.GetSimilarity(str1.Trim(), str2.Trim());

            return(similarity);
        }
Ejemplo n.º 15
0
        public IEnumerable <GameDTO> GetGamesByCollaborative(Guid gamekey, UserDTO userDTO = null)
        {
            //var user = _unitOfWork.Users.Get(x => x.IsWoman == userDTO.IsWoman && x.Adulthood == userDTO.Adulthood);

            var         user = _unitOfWork.Users.GetAll();
            UserProfile currentUserProfile = new UserProfile(Guid.NewGuid(), new Guid[] { gamekey });
            var         profiles           = new List <UserProfile>();

            for (int i = 0; i < user.Count(); i++)
            {
                var orders   = _unitOfWork.Orders.Get(u => u.UserId == user.ToArray()[i].Id);
                var orderDet = _unitOfWork.OrderDetails.Get(od => orders.Any(o => od.OrderId == o.Id));
                var games    = _unitOfWork.Games.Get(g => orderDet.Any(o => g.Id == o.GameId));

                var gamesId = new Guid[games.Count()];

                for (int j = 0; j < games.Count(); j++)
                {
                    gamesId[j] = games.ToArray()[j].Id;
                }


                if (user.ToArray()[i].Id == userDTO.Id)
                {
                    var gamesIds = new Guid[games.Count() + 1];
                    for (int j = 0; j < gamesId.Count(); j++)
                    {
                        gamesIds[j] = gamesId[j];
                    }
                    gamesIds[games.Count()] = gamekey;
                    currentUserProfile      = new UserProfile(user.ToArray()[i].Id, gamesId);
                }

                profiles.Add(new UserProfile(user.ToArray()[i].Id, gamesId));
            }


            var simiarity = new JaccardSimilarity();
            var engine    = new CollaborativeFiltering();

            var results = engine.recommend(profiles, simiarity, currentUserProfile);

            var games3 = _unitOfWork.Games.Get(x => results.Any(r => x.Id == r.Key));


            if (results.ElementAt(0).Value == 0)
            {
                var user2     = user.Where(x => x.IsWoman == userDTO.IsWoman && x.Adulthood == userDTO.Adulthood);
                var profiles2 = new List <UserProfile>();

                for (int i = 0; i < user2.Count(); i++)
                {
                    var orders   = _unitOfWork.Orders.Get(u => u.UserId == user.ToArray()[i].Id);
                    var orderDet = _unitOfWork.OrderDetails.Get(od => orders.Any(o => od.OrderId == o.Id));
                    var games    = _unitOfWork.Games.Get(g => orderDet.Any(o => g.Id == o.GameId));

                    var gamesId = new Guid[games.Count()];

                    for (int j = 0; j < games.Count(); j++)
                    {
                        gamesId[j] = games.ToArray()[j].Id;
                    }


                    if (user2.ToArray()[i].Id == userDTO.Id)
                    {
                        currentUserProfile = new UserProfile(user.ToArray()[i].Id, gamesId);
                    }

                    profiles.Add(new UserProfile(user.ToArray()[i].Id, gamesId));
                }

                var simiarity2 = new JaccardSimilarity();
                var engine2    = new CollaborativeFiltering();

                var results2 = engine.recommend(profiles, simiarity, currentUserProfile);

                var games2 = _unitOfWork.Games.Get(x => results.Any(r => x.Id == r.Key));
                return(_mapper.Map <IEnumerable <GameDTO> >(games3));
            }
            return(_mapper.Map <IEnumerable <GameDTO> >(games3));
        }
Ejemplo n.º 16
0
 // [SetUp]
 public JaccardSimilarityUnitTests()
 {
     LoadData();
     _myJaccardSimilarity = new JaccardSimilarity();
 }