public void TestSweetSpotTf() { SweetSpotSimilarity ss = new SweetSpotSimilarity(); TFIDFSimilarity d = new DefaultSimilarity(); TFIDFSimilarity s = ss; // tf equal ss.SetBaselineTfFactors(0.0f, 0.0f); for (int i = 1; i < 1000; i++) { assertEquals("tf: i=" + i, d.Tf(i), s.Tf(i), 0.0f); } // tf higher ss.SetBaselineTfFactors(1.0f, 0.0f); for (int i = 1; i < 1000; i++) { assertTrue("tf: i=" + i + " : d=" + d.Tf(i) + " < s=" + s.Tf(i), d.Tf(i) < s.Tf(i)); } // tf flat ss.SetBaselineTfFactors(1.0f, 6.0f); for (int i = 1; i <= 6; i++) { assertEquals("tf flat1: i=" + i, 1.0f, s.Tf(i), 0.0f); } ss.SetBaselineTfFactors(2.0f, 6.0f); for (int i = 1; i <= 6; i++) { assertEquals("tf flat2: i=" + i, 2.0f, s.Tf(i), 0.0f); } for (int i = 6; i <= 1000; i++) { assertTrue("tf: i=" + i + " : s=" + s.Tf(i) + " < d=" + d.Tf(i), s.Tf(i) < d.Tf(i)); } // stupidity assertEquals("tf zero", 0.0f, s.Tf(0), 0.0f); }
public Hashtable GetVector2() { StringBuilder sb = new StringBuilder(); string indexPath = AppDomain.CurrentDomain.BaseDirectory.ToString() + "\\App_Data\\"; DirectoryInfo dirInfo = new DirectoryInfo(indexPath); FSDirectory dir = FSDirectory.Open(dirInfo); Hashtable ht = new Hashtable(); Hashtable ht2 = new Hashtable(); IndexReader ir = IndexReader.Open(dir, false); DefaultSimilarity similarity = new DefaultSimilarity(); string[] label = null; int[] freq = null; foreach (var obj in ir.GetTermFreqVectors(1)) { label = obj.GetTerms(); freq = obj.GetTermFrequencies(); } Response.Write("s1<br>"); for (int i = 0; i <= label.Length - 1; i++) { ht.Add(label[i], freq[i]); if (!TotalNoun.Contains(label[i])) { TotalNoun.Add(label[i]); } } foreach (DictionaryEntry obj in ht) { float tf = similarity.Tf(Convert.ToInt32(obj.Value)); Term t = new Term("Text", obj.Key.ToString()); float idf = similarity.Idf(ir.DocFreq(t), ir.NumDocs()); float tfidf = tf * idf; sb.AppendFormat(string.Format("label:{0} freq:{1}<BR>", obj.Key, obj.Value)); sb.AppendFormat(string.Format("TF:{0}<br>", similarity.Tf(Convert.ToInt32(obj.Value)))); sb.AppendFormat(string.Format("IDF:{0}<br>", similarity.Idf(ir.DocFreq(t), ir.NumDocs()))); sb.AppendFormat(string.Format("TF-IDF:{0}<br>", tfidf)); ht2.Add(obj.Key, tfidf); } Response.Write(sb.ToString()); return(ht2); }