public override void Norms(System.String field, byte[] result, int offset) { lock (this) { EnsureOpen(); byte[] bytes = normsCache[field]; for (int i = 0; i < subReaders.Length; i++) { // read from segments subReaders[i].Norms(field, result, offset + starts[i]); } if (bytes == null && !HasNorms(field)) { for (int i = offset; i < result.Length; i++) { result[i] = (byte)DefaultSimilarity.EncodeNorm(1.0f); } } else if (bytes != null) { // cache hit Array.Copy(bytes, 0, result, offset, MaxDoc); } else { for (int i = 0; i < subReaders.Length; i++) { // read from segments subReaders[i].Norms(field, result, offset + starts[i]); } } } }
public void TestSweetSpotTf() { SweetSpotSimilarity ss = new SweetSpotSimilarity(); TFIDFSimilarity d = new DefaultSimilarity(); TFIDFSimilarity s = ss; // tf equal ss.SetBaselineTfFactors(0.0f, 0.0f); for (int i = 1; i < 1000; i++) { assertEquals("tf: i=" + i, d.Tf(i), s.Tf(i), 0.0f); } // tf higher ss.SetBaselineTfFactors(1.0f, 0.0f); for (int i = 1; i < 1000; i++) { assertTrue("tf: i=" + i + " : d=" + d.Tf(i) + " < s=" + s.Tf(i), d.Tf(i) < s.Tf(i)); } // tf flat ss.SetBaselineTfFactors(1.0f, 6.0f); for (int i = 1; i <= 6; i++) { assertEquals("tf flat1: i=" + i, 1.0f, s.Tf(i), 0.0f); } ss.SetBaselineTfFactors(2.0f, 6.0f); for (int i = 1; i <= 6; i++) { assertEquals("tf flat2: i=" + i, 2.0f, s.Tf(i), 0.0f); } for (int i = 6; i <= 1000; i++) { assertTrue("tf: i=" + i + " : s=" + s.Tf(i) + " < d=" + d.Tf(i), s.Tf(i) < d.Tf(i)); } // stupidity assertEquals("tf zero", 0.0f, s.Tf(0), 0.0f); }
public Hashtable GetVector2() { StringBuilder sb = new StringBuilder(); string indexPath = AppDomain.CurrentDomain.BaseDirectory.ToString() + "\\App_Data\\"; DirectoryInfo dirInfo = new DirectoryInfo(indexPath); FSDirectory dir = FSDirectory.Open(dirInfo); Hashtable ht = new Hashtable(); Hashtable ht2 = new Hashtable(); IndexReader ir = IndexReader.Open(dir, false); DefaultSimilarity similarity = new DefaultSimilarity(); string[] label = null; int[] freq = null; foreach (var obj in ir.GetTermFreqVectors(1)) { label = obj.GetTerms(); freq = obj.GetTermFrequencies(); } Response.Write("s1<br>"); for (int i = 0; i <= label.Length - 1; i++) { ht.Add(label[i], freq[i]); if (!TotalNoun.Contains(label[i])) { TotalNoun.Add(label[i]); } } foreach (DictionaryEntry obj in ht) { float tf = similarity.Tf(Convert.ToInt32(obj.Value)); Term t = new Term("Text", obj.Key.ToString()); float idf = similarity.Idf(ir.DocFreq(t), ir.NumDocs()); float tfidf = tf * idf; sb.AppendFormat(string.Format("label:{0} freq:{1}<BR>", obj.Key, obj.Value)); sb.AppendFormat(string.Format("TF:{0}<br>", similarity.Tf(Convert.ToInt32(obj.Value)))); sb.AppendFormat(string.Format("IDF:{0}<br>", similarity.Idf(ir.DocFreq(t), ir.NumDocs()))); sb.AppendFormat(string.Format("TF-IDF:{0}<br>", tfidf)); ht2.Add(obj.Key, tfidf); } Response.Write(sb.ToString()); return(ht2); }
public void TestSweetSpotComputeNorm() { SweetSpotSimilarity ss = new SweetSpotSimilarity(); ss.SetLengthNormFactors(1, 1, 0.5f, true); Similarity d = new DefaultSimilarity(); Similarity s = ss; // base case, should degrade FieldInvertState invertState = new FieldInvertState("bogus"); invertState.Boost = 1.0f; for (int i = 1; i < 1000; i++) { invertState.Length = i; assertEquals("base case: i=" + i, ComputeAndGetNorm(d, invertState), ComputeAndGetNorm(s, invertState), 0.0f); } // make a sweet spot ss.SetLengthNormFactors(3, 10, 0.5f, true); for (int i = 3; i <= 10; i++) { invertState.Length = i; assertEquals("3,10: spot i=" + i, 1.0f, ComputeAndDecodeNorm(ss, ss, invertState), 0.0f); } for (int i = 10; i < 1000; i++) { invertState.Length = (i - 9); byte normD = ComputeAndGetNorm(d, invertState); invertState.Length = i; byte normS = ComputeAndGetNorm(s, invertState); assertEquals("3,10: 10<x : i=" + i, normD, normS, 0.0f); } // separate sweet spot for certain fields SweetSpotSimilarity ssBar = new SweetSpotSimilarity(); ssBar.SetLengthNormFactors(8, 13, 0.5f, false); SweetSpotSimilarity ssYak = new SweetSpotSimilarity(); ssYak.SetLengthNormFactors(6, 9, 0.5f, false); SweetSpotSimilarity ssA = new SweetSpotSimilarity(); ssA.SetLengthNormFactors(5, 8, 0.5f, false); SweetSpotSimilarity ssB = new SweetSpotSimilarity(); ssB.SetLengthNormFactors(5, 8, 0.1f, false); Similarity sp = new PerFieldSimilarityWrapperHelper(ssBar, ssYak, ssA, ssB, ss); invertState = new FieldInvertState("foo"); invertState.Boost = 1.0f; for (int i = 3; i <= 10; i++) { invertState.Length = i; assertEquals("f: 3,10: spot i=" + i, 1.0f, ComputeAndDecodeNorm(ss, sp, invertState), 0.0f); } for (int i = 10; i < 1000; i++) { invertState.Length = (i - 9); byte normD = ComputeAndGetNorm(d, invertState); invertState.Length = (i); byte normS = ComputeAndGetNorm(sp, invertState); assertEquals("f: 3,10: 10<x : i=" + i, normD, normS, 0.0f); } invertState = new FieldInvertState("bar"); invertState.Boost = (1.0f); for (int i = 8; i <= 13; i++) { invertState.Length = (i); assertEquals("f: 8,13: spot i=" + i, 1.0f, ComputeAndDecodeNorm(ss, sp, invertState), 0.0f); } invertState = new FieldInvertState("yak"); invertState.Boost = (1.0f); for (int i = 6; i <= 9; i++) { invertState.Length = (i); assertEquals("f: 6,9: spot i=" + i, 1.0f, ComputeAndDecodeNorm(ss, sp, invertState), 0.0f); } invertState = new FieldInvertState("bar"); invertState.Boost = (1.0f); for (int i = 13; i < 1000; i++) { invertState.Length = (i - 12); byte normD = ComputeAndGetNorm(d, invertState); invertState.Length = (i); byte normS = ComputeAndGetNorm(sp, invertState); assertEquals("f: 8,13: 13<x : i=" + i, normD, normS, 0.0f); } invertState = new FieldInvertState("yak"); invertState.Boost = (1.0f); for (int i = 9; i < 1000; i++) { invertState.Length = (i - 8); byte normD = ComputeAndGetNorm(d, invertState); invertState.Length = (i); byte normS = ComputeAndGetNorm(sp, invertState); assertEquals("f: 6,9: 9<x : i=" + i, normD, normS, 0.0f); } // steepness for (int i = 9; i < 1000; i++) { invertState = new FieldInvertState("a"); invertState.Boost = (1.0f); invertState.Length = (i); byte normSS = ComputeAndGetNorm(sp, invertState); invertState = new FieldInvertState("b"); invertState.Boost = (1.0f); invertState.Length = (i); byte normS = ComputeAndGetNorm(sp, invertState); assertTrue("s: i=" + i + " : a=" + normSS + " < b=" + normS, normSS < normS); } }
public PerFieldSimilarityWrapperAnonymousInnerClassHelper(TestTaxonomyFacetCounts outerInstance) { this.outerInstance = outerInstance; sim = new DefaultSimilarity(); }
public PerFieldSimilarityWrapperAnonymousInnerClassHelper(TestTaxonomyFacetCounts outerInstance) { this.outerInstance = outerInstance; sim = new DefaultSimilarity(); }