Example #1
0
        public double CalculateCosineSimilarity(CosineSimilarity cosineSimilarity, string documentA, string documentB, string documentAIndex, string documentBIndex)
        {
            var document1 = cosineSimilarity.MatrixMovie.Where(m => m.Document.ToLower() == documentA.ToLower());
            var document2 = cosineSimilarity.MatrixMovie.Where(m => m.Document.ToLower() == documentB.ToLower());

            double item1     = 0;
            double sqrtItem1 = 0;
            double sqrtItem2 = 0;

            for (var i = 0; i < cosineSimilarity.BagOfWords.Count; i++)
            {
                item1     += document1.ElementAt(i).Exist *document2.ElementAt(i).Exist;
                sqrtItem1 += Math.Pow(document1.ElementAt(i).Exist, 2);
                sqrtItem2 += Math.Pow(document2.ElementAt(i).Exist, 2);
            }

            var finalResult = item1 / (Math.Sqrt(sqrtItem1) * Math.Sqrt(sqrtItem2));

            foreach (var item in cosineSimilarity.BagOfWords)
            {
                Console.Write("|" + item);
            }

            Console.WriteLine();
            Console.Write(string.Concat("DOCUMENTO BINÁRIO ", documentAIndex, ": "));

            foreach (var item in document1)
            {
                Console.Write(" | " + item.Exist);
            }

            Console.WriteLine();
            Console.Write(string.Concat("DOCUMENTO BINÁRIO ", documentBIndex, ": "));

            foreach (var item in document2)
            {
                Console.Write(" | " + item.Exist);
            }



            Console.WriteLine();
            Console.WriteLine(string.Concat("SIM(", documentAIndex, ",", documentBIndex, ") = ", finalResult));
            Console.WriteLine();


            return(finalResult);
        }
Example #2
0
        static void Main(string[] args)
        {
            var movies         = new Movies();
            var stopWords      = new StopWords();
            var bagOfWordsCalc = new BagOfWordsCalc();

            var moviesItems    = movies.Get().ToList();
            var stopWordsItens = stopWords.Get().ToList();
            var finalWords     = new List <string>();


            foreach (var item in moviesItems)
            {
                foreach (var caracther in stopWords.CharactersToRemove())
                {
                    item.Movie = item.Movie.Replace(caracther, "");
                }
                item.MovieSplited.AddRange(item.Movie.Split(' ').ToList());
            }

            foreach (var item in moviesItems)
            {
                var moviesWithoutStopWords = item.MovieSplited.Where(a => !stopWordsItens.Contains(a)).ToList();
                item.MovieSplited = moviesWithoutStopWords;
                finalWords.AddRange(item.MovieSplited.Where(a => !finalWords.Contains(a)));
            }



            var matrixMovies = new List <MatrixMovie>();

            foreach (var item in moviesItems)
            {
                foreach (var finalWord in finalWords)
                {
                    var exists = item.MovieSplited.Contains(finalWord)? 1: 0;
                    matrixMovies.Add(new MatrixMovie {
                        Document = item.Movie, Word = finalWord, Exist = exists
                    });
                }
            }



            var cosineSimilarity = new CosineSimilarity {
                MatrixMovie = matrixMovies, BagOfWords = finalWords
            };

            var documents = moviesItems.Select(a => a.Movie).ToList();


            var result = Combinations.GetPermutations(documents, 2).ToList();

            foreach (var item in result)
            {
                var itens = new List <string>();

                foreach (var item2 in item)
                {
                    itens.Add(item2);
                }

                var document1Index = moviesItems.FirstOrDefault(a => a.Movie == itens[0])?.Index;
                var document2Index = moviesItems.FirstOrDefault(a => a.Movie == itens[1])?.Index;

                var bagCalculation = bagOfWordsCalc.CalculateCosineSimilarity(cosineSimilarity, itens[0], itens[1], document1Index, document2Index);
            }


            Console.WriteLine();
            Console.WriteLine("TABELA FREQUENCIA");
            Console.WriteLine();

            foreach (var word in finalWords)
            {
                Console.Write("|" + word);
            }

            Console.WriteLine();

            foreach (var item in moviesItems)
            {
                Console.Write(item.Movie + ": ");

                foreach (var word in finalWords)
                {
                    Console.Write(" | " + item.MovieSplited.Count(f => word.Contains(f)));
                }

                Console.WriteLine();
            }



            Console.WriteLine();
            Console.ReadLine();
        }