Exemplo n.º 1
0
        public override void  Norms(System.String field, byte[] result, int offset)
        {
            lock (this)
            {
                EnsureOpen();
                byte[] bytes = normsCache[field];
                for (int i = 0; i < subReaders.Length; i++)
                {
                    // read from segments
                    subReaders[i].Norms(field, result, offset + starts[i]);
                }

                if (bytes == null && !HasNorms(field))
                {
                    for (int i = offset; i < result.Length; i++)
                    {
                        result[i] = (byte)DefaultSimilarity.EncodeNorm(1.0f);
                    }
                }
                else if (bytes != null)
                {
                    // cache hit
                    Array.Copy(bytes, 0, result, offset, MaxDoc);
                }
                else
                {
                    for (int i = 0; i < subReaders.Length; i++)
                    {
                        // read from segments
                        subReaders[i].Norms(field, result, offset + starts[i]);
                    }
                }
            }
        }
        public void TestSweetSpotTf()
        {
            SweetSpotSimilarity ss = new SweetSpotSimilarity();

            TFIDFSimilarity d = new DefaultSimilarity();
            TFIDFSimilarity s = ss;

            // tf equal

            ss.SetBaselineTfFactors(0.0f, 0.0f);

            for (int i = 1; i < 1000; i++)
            {
                assertEquals("tf: i=" + i,
                             d.Tf(i), s.Tf(i), 0.0f);
            }

            // tf higher

            ss.SetBaselineTfFactors(1.0f, 0.0f);

            for (int i = 1; i < 1000; i++)
            {
                assertTrue("tf: i=" + i + " : d=" + d.Tf(i) +
                           " < s=" + s.Tf(i),
                           d.Tf(i) < s.Tf(i));
            }

            // tf flat

            ss.SetBaselineTfFactors(1.0f, 6.0f);
            for (int i = 1; i <= 6; i++)
            {
                assertEquals("tf flat1: i=" + i, 1.0f, s.Tf(i), 0.0f);
            }
            ss.SetBaselineTfFactors(2.0f, 6.0f);
            for (int i = 1; i <= 6; i++)
            {
                assertEquals("tf flat2: i=" + i, 2.0f, s.Tf(i), 0.0f);
            }
            for (int i = 6; i <= 1000; i++)
            {
                assertTrue("tf: i=" + i + " : s=" + s.Tf(i) +
                           " < d=" + d.Tf(i),
                           s.Tf(i) < d.Tf(i));
            }

            // stupidity
            assertEquals("tf zero", 0.0f, s.Tf(0), 0.0f);
        }
    public Hashtable GetVector2()
    {
        StringBuilder sb = new StringBuilder();

        string indexPath = AppDomain.CurrentDomain.BaseDirectory.ToString() + "\\App_Data\\";

        DirectoryInfo dirInfo = new DirectoryInfo(indexPath);

        FSDirectory dir = FSDirectory.Open(dirInfo);

        Hashtable ht = new Hashtable();

        Hashtable ht2 = new Hashtable();

        IndexReader ir = IndexReader.Open(dir, false);

        DefaultSimilarity similarity = new DefaultSimilarity();

        string[] label = null;

        int[] freq = null;


        foreach (var obj in ir.GetTermFreqVectors(1))
        {
            label = obj.GetTerms();
            freq  = obj.GetTermFrequencies();
        }

        Response.Write("s1<br>");



        for (int i = 0; i <= label.Length - 1; i++)
        {
            ht.Add(label[i], freq[i]);


            if (!TotalNoun.Contains(label[i]))
            {
                TotalNoun.Add(label[i]);
            }
        }

        foreach (DictionaryEntry obj in ht)
        {
            float tf    = similarity.Tf(Convert.ToInt32(obj.Value));
            Term  t     = new Term("Text", obj.Key.ToString());
            float idf   = similarity.Idf(ir.DocFreq(t), ir.NumDocs());
            float tfidf = tf * idf;

            sb.AppendFormat(string.Format("label:{0} freq:{1}<BR>", obj.Key, obj.Value));
            sb.AppendFormat(string.Format("TF:{0}<br>", similarity.Tf(Convert.ToInt32(obj.Value))));
            sb.AppendFormat(string.Format("IDF:{0}<br>", similarity.Idf(ir.DocFreq(t), ir.NumDocs())));
            sb.AppendFormat(string.Format("TF-IDF:{0}<br>", tfidf));
            ht2.Add(obj.Key, tfidf);
        }

        Response.Write(sb.ToString());
        return(ht2);
    }
        public void TestSweetSpotComputeNorm()
        {
            SweetSpotSimilarity ss = new SweetSpotSimilarity();

            ss.SetLengthNormFactors(1, 1, 0.5f, true);

            Similarity d = new DefaultSimilarity();
            Similarity s = ss;


            // base case, should degrade
            FieldInvertState invertState = new FieldInvertState("bogus");

            invertState.Boost = 1.0f;
            for (int i = 1; i < 1000; i++)
            {
                invertState.Length = i;
                assertEquals("base case: i=" + i,
                             ComputeAndGetNorm(d, invertState),
                             ComputeAndGetNorm(s, invertState),
                             0.0f);
            }

            // make a sweet spot

            ss.SetLengthNormFactors(3, 10, 0.5f, true);

            for (int i = 3; i <= 10; i++)
            {
                invertState.Length = i;
                assertEquals("3,10: spot i=" + i,
                             1.0f,
                             ComputeAndDecodeNorm(ss, ss, invertState),
                             0.0f);
            }

            for (int i = 10; i < 1000; i++)
            {
                invertState.Length = (i - 9);
                byte normD = ComputeAndGetNorm(d, invertState);
                invertState.Length = i;
                byte normS = ComputeAndGetNorm(s, invertState);
                assertEquals("3,10: 10<x : i=" + i,
                             normD,
                             normS,
                             0.0f);
            }


            // separate sweet spot for certain fields

            SweetSpotSimilarity ssBar = new SweetSpotSimilarity();

            ssBar.SetLengthNormFactors(8, 13, 0.5f, false);
            SweetSpotSimilarity ssYak = new SweetSpotSimilarity();

            ssYak.SetLengthNormFactors(6, 9, 0.5f, false);
            SweetSpotSimilarity ssA = new SweetSpotSimilarity();

            ssA.SetLengthNormFactors(5, 8, 0.5f, false);
            SweetSpotSimilarity ssB = new SweetSpotSimilarity();

            ssB.SetLengthNormFactors(5, 8, 0.1f, false);

            Similarity sp = new PerFieldSimilarityWrapperHelper(ssBar, ssYak, ssA, ssB, ss);

            invertState       = new FieldInvertState("foo");
            invertState.Boost = 1.0f;
            for (int i = 3; i <= 10; i++)
            {
                invertState.Length = i;
                assertEquals("f: 3,10: spot i=" + i,
                             1.0f,
                             ComputeAndDecodeNorm(ss, sp, invertState),
                             0.0f);
            }

            for (int i = 10; i < 1000; i++)
            {
                invertState.Length = (i - 9);
                byte normD = ComputeAndGetNorm(d, invertState);
                invertState.Length = (i);
                byte normS = ComputeAndGetNorm(sp, invertState);
                assertEquals("f: 3,10: 10<x : i=" + i,
                             normD,
                             normS,
                             0.0f);
            }

            invertState       = new FieldInvertState("bar");
            invertState.Boost = (1.0f);
            for (int i = 8; i <= 13; i++)
            {
                invertState.Length = (i);
                assertEquals("f: 8,13: spot i=" + i,
                             1.0f,
                             ComputeAndDecodeNorm(ss, sp, invertState),
                             0.0f);
            }

            invertState       = new FieldInvertState("yak");
            invertState.Boost = (1.0f);
            for (int i = 6; i <= 9; i++)
            {
                invertState.Length = (i);
                assertEquals("f: 6,9: spot i=" + i,
                             1.0f,
                             ComputeAndDecodeNorm(ss, sp, invertState),
                             0.0f);
            }

            invertState       = new FieldInvertState("bar");
            invertState.Boost = (1.0f);
            for (int i = 13; i < 1000; i++)
            {
                invertState.Length = (i - 12);
                byte normD = ComputeAndGetNorm(d, invertState);
                invertState.Length = (i);
                byte normS = ComputeAndGetNorm(sp, invertState);
                assertEquals("f: 8,13: 13<x : i=" + i,
                             normD,
                             normS,
                             0.0f);
            }

            invertState       = new FieldInvertState("yak");
            invertState.Boost = (1.0f);
            for (int i = 9; i < 1000; i++)
            {
                invertState.Length = (i - 8);
                byte normD = ComputeAndGetNorm(d, invertState);
                invertState.Length = (i);
                byte normS = ComputeAndGetNorm(sp, invertState);
                assertEquals("f: 6,9: 9<x : i=" + i,
                             normD,
                             normS,
                             0.0f);
            }


            // steepness

            for (int i = 9; i < 1000; i++)
            {
                invertState        = new FieldInvertState("a");
                invertState.Boost  = (1.0f);
                invertState.Length = (i);
                byte normSS = ComputeAndGetNorm(sp, invertState);
                invertState        = new FieldInvertState("b");
                invertState.Boost  = (1.0f);
                invertState.Length = (i);
                byte normS = ComputeAndGetNorm(sp, invertState);
                assertTrue("s: i=" + i + " : a=" + normSS +
                           " < b=" + normS,
                           normSS < normS);
            }
        }
Exemplo n.º 5
0
 public PerFieldSimilarityWrapperAnonymousInnerClassHelper(TestTaxonomyFacetCounts outerInstance)
 {
     this.outerInstance = outerInstance;
     sim = new DefaultSimilarity();
 }
 public PerFieldSimilarityWrapperAnonymousInnerClassHelper(TestTaxonomyFacetCounts outerInstance)
 {
     this.outerInstance = outerInstance;
     sim = new DefaultSimilarity();
 }