public Dictionary <string, double> CalculateSimilarity(TermDocumentMatrixModel documentMatrix, string query) { Dictionary <string, int> isTermInQuery = CountFrequencyOfTermQuery(query); Dictionary <string, double> tfOfQuery = new Dictionary <string, double>(); Dictionary <string, double> idfOfQuery = new Dictionary <string, double>(); Dictionary <string, double> tf_idfOfQuery = new Dictionary <string, double>(); double vectorNorm = 0.0; //tf of query , idf of query , tf_idf of query , vectorNorm foreach (var item in isTermInQuery) { int numberOfTermInDocument = 0; tfOfQuery.Add(item.Key, new TermDocumentMatrix().CalculateTF(item.Value)); if (documentMatrix.IsTermInDocument.ContainsKey(item.Key)) { numberOfTermInDocument = documentMatrix.IsTermInDocument[item.Key]; } idfOfQuery.Add(item.Key, new TermDocumentMatrix().CalculateIDF(numberOfTermInDocument, documentMatrix.DocumentList.Count())); tf_idfOfQuery.Add(item.Key, new TermDocumentMatrix().CalculateTF_IDF(tfOfQuery[item.Key], idfOfQuery[item.Key])); vectorNorm = vectorNorm + Math.Pow(tf_idfOfQuery[item.Key], 2); } vectorNorm = Math.Round(Math.Sqrt(vectorNorm), 3); //cosine similarity var result = CalcaulateCosineSimilarity(tf_idfOfQuery, vectorNorm, documentMatrix); return(result); }
public TermDocumentMatrixModel SettingTermDocumentMatrix(TermDocumentMatrixModel documentMatrix) { Dictionary <string, int> IsTermInDocument = new Dictionary <string, int>(); Dictionary <string, Dictionary <string, double> > TFIndexTermOfDocumentList = new Dictionary <string, Dictionary <string, double> >(); Dictionary <string, double> IDFIndexTermOfDocument = new Dictionary <string, double>(); Dictionary <string, Dictionary <string, double> > TF_IDFIndexTermOfDocumentList = new Dictionary <string, Dictionary <string, double> >(); //Term all foreach (var item in documentMatrix.IndexTermList) { int countTermInDocument = 0; Dictionary <string, double> TFIndexTermOfDocument = new Dictionary <string, double>(); Dictionary <string, double> TF_IDFIndexTermOfDocument = new Dictionary <string, double>(); //document foreach (var indexdocterm in documentMatrix.DocumentList) { //Ni if (indexdocterm.IndexTermDocuments.ContainsKey(item)) { countTermInDocument++; //Calculate TF TFIndexTermOfDocument.Add(indexdocterm.Name, CalculateTF(indexdocterm.IndexTermDocuments[item])); } else { //TF = 0 TFIndexTermOfDocument.Add(indexdocterm.Name, 0); } } //Calculate IDF IDFIndexTermOfDocument.Add(item, CalculateIDF(countTermInDocument, documentMatrix.DocumentList.Count())); IsTermInDocument.Add(item, countTermInDocument); TFIndexTermOfDocumentList.Add(item, TFIndexTermOfDocument); foreach (var document in documentMatrix.DocumentList) { //calulate IF_IDF var tfOfDocumentList = TFIndexTermOfDocumentList[item]; var result = CalculateTF_IDF(tfOfDocumentList[document.Name], IDFIndexTermOfDocument[item]); TF_IDFIndexTermOfDocument.Add(document.Name, result); document.VectorNorm = document.VectorNorm + Math.Pow(result, 2); } TF_IDFIndexTermOfDocumentList.Add(item, TF_IDFIndexTermOfDocument); } //VectorNorm foreach (var document in documentMatrix.DocumentList) { document.VectorNorm = Math.Round(Math.Sqrt(document.VectorNorm), 3); } documentMatrix.IDFIndexTermOfDocument = IDFIndexTermOfDocument; documentMatrix.TFIndexTermOfDocument = TFIndexTermOfDocumentList; documentMatrix.IsTermInDocument = IsTermInDocument; documentMatrix.TF_IDFIndexTermOfDocument = TF_IDFIndexTermOfDocumentList; return(documentMatrix); }
public void DisplayHead(TermDocumentMatrixModel documentMatrixModel) { Console.WriteLine("==[ Vector Model ]=="); Console.WriteLine(string.Format("\nInformation: We use these {0} articles from Medium.com for demonstrating the Vector Model", documentMatrixModel.DocumentList.Count)); for (int i = 0; i < documentMatrixModel.DocumentList.Count; i++) { Console.WriteLine(string.Format("({0}) {1}", i + 1, documentMatrixModel.DocumentList[i].Name)); } Console.WriteLine("The result will show the retrieved document and similarity of Vector Model"); Console.WriteLine("\n------------------------------\n"); }
public void DisplayBodyResult(TermDocumentMatrixModel documentMatrixModel, Dictionary <string, double> cosineSimilarity, string query) { Console.WriteLine("\n\n=[ Similarity ]=\n"); Console.WriteLine("Query is :{0}", query); ConsoleTableBuilder.From(GetSampleTableData(cosineSimilarity)).ExportAndWriteLine(); }
public Dictionary <string, double> CalcaulateCosineSimilarity(Dictionary <string, double> tf_idfQuery, double vectorNorm, TermDocumentMatrixModel documentMatrix) { Dictionary <string, double> cosineSimilarity = new Dictionary <string, double>(); foreach (var item in documentMatrix.DocumentList) { //follow by document double crossWeight = 0.0; double crossSumVectorNorm = 0.0; double cosineSimilarityOfDocument = 0.0; foreach (var tf_idfQItem in tf_idfQuery) { if (documentMatrix.TF_IDFIndexTermOfDocument.ContainsKey(tf_idfQItem.Key)) { var tF_IDFList = documentMatrix.TF_IDFIndexTermOfDocument[tf_idfQItem.Key]; crossWeight = crossWeight + (tF_IDFList[item.Name] * tf_idfQItem.Value); } else { crossWeight = crossWeight + (0 * tf_idfQItem.Value); } } crossSumVectorNorm = Math.Round(item.VectorNorm * vectorNorm, 3); cosineSimilarityOfDocument = Math.Round(crossWeight / crossSumVectorNorm, 3); cosineSimilarity.Add(item.Name, cosineSimilarityOfDocument); } return(cosineSimilarity); }