Пример #1
0
        public void TestGetHighFreqTerms()
        {
            int    numTerms = 12;
            string field    = "FIELD_1";

            TermStats[]
            terms = HighFreqTerms.GetHighFreqTerms(reader, numTerms, field, new HighFreqTerms.DocFreqComparer());

            for (int i = 0; i < terms.Length; i++)
            {
                string termtext = terms[i].termtext.Utf8ToString();
                // hardcoded highTF or highTFmedDF
                if (termtext.Contains("highTF"))
                {
                    if (termtext.Contains("medDF"))
                    {
                        assertEquals("doc freq is not as expected", 5, terms[i].DocFreq);
                    }
                    else
                    {
                        assertEquals("doc freq is not as expected", 1, terms[i].DocFreq);
                    }
                }
                else
                {
                    int n = Convert.ToInt32(termtext);
                    assertEquals("doc freq is not as expected", GetExpecteddocFreq(n),
                                 terms[i].DocFreq);
                }
            }
        }
Пример #2
0
        public void TestGetTermFreqOrdered()
        {
            int    numTerms = 12;
            string field    = "FIELD_1";

            TermStats[]
            terms = HighFreqTerms.GetHighFreqTerms(reader, numTerms, field, new HighFreqTerms.TotalTermFreqComparer());

            for (int i = 0; i < terms.Length; i++)
            {
                string text = terms[i].termtext.Utf8ToString();
                if (text.Contains("highTF"))
                {
                    if (text.Contains("medDF"))
                    {
                        assertEquals("total term freq is expected", 125,
                                     terms[i].TotalTermFreq);
                    }
                    else
                    {
                        assertEquals("total term freq is expected", 200,
                                     terms[i].TotalTermFreq);
                    }
                }
                else
                {
                    int n = Convert.ToInt32(text);
                    assertEquals("doc freq is expected", GetExpecteddocFreq(n),
                                 terms[i].DocFreq);
                    assertEquals("total term freq is expected", GetExpectedtotalTermFreq(n),
                                 terms[i].TotalTermFreq);
                }
            }
        }
Пример #3
0
        public void TestFirstTermHighestDocFreq()
        {
            int    numTerms = 12;
            string field    = "FIELD_1";

            TermStats[]
            terms = HighFreqTerms.GetHighFreqTerms(reader, numTerms, field, new HighFreqTerms.DocFreqComparer());
            assertEquals("Term with highest docfreq is first", 10, terms[0].DocFreq);
        }
Пример #4
0
        public void TestFirstTermHighestTotalTermFreqDifferentField()
        {
            int    numTerms = 20;
            string field    = "different_field";

            TermStats[]
            terms = HighFreqTerms.GetHighFreqTerms(reader, numTerms, field, new HighFreqTerms.TotalTermFreqComparer());
            assertEquals("Term with highest totalTermFreq is first" + terms[0].GetTermText(), 150, terms[0].TotalTermFreq);
        }
Пример #5
0
        public void TestFirstTermHighestTotalTermFreq()
        {
            int    numTerms = 20;
            string field    = null;

            TermStats[]
            terms = HighFreqTerms.GetHighFreqTerms(reader, numTerms, field, new HighFreqTerms.TotalTermFreqComparer());
            assertEquals("Term with highest totalTermFreq is first", 200, terms[0].TotalTermFreq);
        }
Пример #6
0
        public void TestNumTerms()
        {
            int    numTerms = 12;
            string field    = null;

            TermStats[]
            terms = HighFreqTerms.GetHighFreqTerms(reader, numTerms, field, new HighFreqTerms.DocFreqComparer());
            assertEquals("length of terms array equals numTerms :" + numTerms, numTerms, terms.Length);
        }
Пример #7
0
        public void TestFirstTermHighestDocFreqAllFields()
        {
            int    numTerms = 12;
            string field    = null;

            TermStats[]
            terms = HighFreqTerms.GetHighFreqTerms(reader, numTerms, field, new HighFreqTerms.DocFreqComparator());
            assertEquals("Term with highest docfreq is first", 20, terms[0].DocFreq);
        }
Пример #8
0
        public void TestOrderedByDocFreqDescending()
        {
            int    numTerms = 12;
            string field    = "FIELD_1";

            TermStats[]
            terms = HighFreqTerms.GetHighFreqTerms(reader, numTerms, field, new HighFreqTerms.DocFreqComparer());
            for (int i = 0; i < terms.Length; i++)
            {
                if (i > 0)
                {
                    assertTrue("out of order " + terms[i - 1].DocFreq + "should be >= " + terms[i].DocFreq, terms[i - 1].DocFreq >= terms[i].DocFreq);
                }
            }
        }
Пример #9
0
        public void TestOrderedByTermFreqDescending()
        {
            int    numTerms = 12;
            string field    = "FIELD_1";

            TermStats[]
            terms = HighFreqTerms.GetHighFreqTerms(reader, numTerms, field, new HighFreqTerms.TotalTermFreqComparer());

            for (int i = 0; i < terms.Length; i++)
            {
                // check that they are sorted by descending termfreq
                // order
                if (i > 0)
                {
                    assertTrue("out of order" + terms[i - 1] + " > " + terms[i], terms[i - 1].TotalTermFreq >= terms[i].TotalTermFreq);
                }
            }
        }