public double CalculateCosineSimilarity(CosineSimilarity cosineSimilarity, string documentA, string documentB, string documentAIndex, string documentBIndex) { var document1 = cosineSimilarity.MatrixMovie.Where(m => m.Document.ToLower() == documentA.ToLower()); var document2 = cosineSimilarity.MatrixMovie.Where(m => m.Document.ToLower() == documentB.ToLower()); double item1 = 0; double sqrtItem1 = 0; double sqrtItem2 = 0; for (var i = 0; i < cosineSimilarity.BagOfWords.Count; i++) { item1 += document1.ElementAt(i).Exist *document2.ElementAt(i).Exist; sqrtItem1 += Math.Pow(document1.ElementAt(i).Exist, 2); sqrtItem2 += Math.Pow(document2.ElementAt(i).Exist, 2); } var finalResult = item1 / (Math.Sqrt(sqrtItem1) * Math.Sqrt(sqrtItem2)); foreach (var item in cosineSimilarity.BagOfWords) { Console.Write("|" + item); } Console.WriteLine(); Console.Write(string.Concat("DOCUMENTO BINÁRIO ", documentAIndex, ": ")); foreach (var item in document1) { Console.Write(" | " + item.Exist); } Console.WriteLine(); Console.Write(string.Concat("DOCUMENTO BINÁRIO ", documentBIndex, ": ")); foreach (var item in document2) { Console.Write(" | " + item.Exist); } Console.WriteLine(); Console.WriteLine(string.Concat("SIM(", documentAIndex, ",", documentBIndex, ") = ", finalResult)); Console.WriteLine(); return(finalResult); }
static void Main(string[] args) { var movies = new Movies(); var stopWords = new StopWords(); var bagOfWordsCalc = new BagOfWordsCalc(); var moviesItems = movies.Get().ToList(); var stopWordsItens = stopWords.Get().ToList(); var finalWords = new List <string>(); foreach (var item in moviesItems) { foreach (var caracther in stopWords.CharactersToRemove()) { item.Movie = item.Movie.Replace(caracther, ""); } item.MovieSplited.AddRange(item.Movie.Split(' ').ToList()); } foreach (var item in moviesItems) { var moviesWithoutStopWords = item.MovieSplited.Where(a => !stopWordsItens.Contains(a)).ToList(); item.MovieSplited = moviesWithoutStopWords; finalWords.AddRange(item.MovieSplited.Where(a => !finalWords.Contains(a))); } var matrixMovies = new List <MatrixMovie>(); foreach (var item in moviesItems) { foreach (var finalWord in finalWords) { var exists = item.MovieSplited.Contains(finalWord)? 1: 0; matrixMovies.Add(new MatrixMovie { Document = item.Movie, Word = finalWord, Exist = exists }); } } var cosineSimilarity = new CosineSimilarity { MatrixMovie = matrixMovies, BagOfWords = finalWords }; var documents = moviesItems.Select(a => a.Movie).ToList(); var result = Combinations.GetPermutations(documents, 2).ToList(); foreach (var item in result) { var itens = new List <string>(); foreach (var item2 in item) { itens.Add(item2); } var document1Index = moviesItems.FirstOrDefault(a => a.Movie == itens[0])?.Index; var document2Index = moviesItems.FirstOrDefault(a => a.Movie == itens[1])?.Index; var bagCalculation = bagOfWordsCalc.CalculateCosineSimilarity(cosineSimilarity, itens[0], itens[1], document1Index, document2Index); } Console.WriteLine(); Console.WriteLine("TABELA FREQUENCIA"); Console.WriteLine(); foreach (var word in finalWords) { Console.Write("|" + word); } Console.WriteLine(); foreach (var item in moviesItems) { Console.Write(item.Movie + ": "); foreach (var word in finalWords) { Console.Write(" | " + item.MovieSplited.Count(f => word.Contains(f))); } Console.WriteLine(); } Console.WriteLine(); Console.ReadLine(); }