public void TestGetHighFreqTerms() { int numTerms = 12; string field = "FIELD_1"; TermStats[] terms = HighFreqTerms.GetHighFreqTerms(reader, numTerms, field, new HighFreqTerms.DocFreqComparer()); for (int i = 0; i < terms.Length; i++) { string termtext = terms[i].termtext.Utf8ToString(); // hardcoded highTF or highTFmedDF if (termtext.Contains("highTF")) { if (termtext.Contains("medDF")) { assertEquals("doc freq is not as expected", 5, terms[i].DocFreq); } else { assertEquals("doc freq is not as expected", 1, terms[i].DocFreq); } } else { int n = Convert.ToInt32(termtext); assertEquals("doc freq is not as expected", GetExpecteddocFreq(n), terms[i].DocFreq); } } }
public void TestGetTermFreqOrdered() { int numTerms = 12; string field = "FIELD_1"; TermStats[] terms = HighFreqTerms.GetHighFreqTerms(reader, numTerms, field, new HighFreqTerms.TotalTermFreqComparer()); for (int i = 0; i < terms.Length; i++) { string text = terms[i].termtext.Utf8ToString(); if (text.Contains("highTF")) { if (text.Contains("medDF")) { assertEquals("total term freq is expected", 125, terms[i].TotalTermFreq); } else { assertEquals("total term freq is expected", 200, terms[i].TotalTermFreq); } } else { int n = Convert.ToInt32(text); assertEquals("doc freq is expected", GetExpecteddocFreq(n), terms[i].DocFreq); assertEquals("total term freq is expected", GetExpectedtotalTermFreq(n), terms[i].TotalTermFreq); } } }
public void TestFirstTermHighestDocFreq() { int numTerms = 12; string field = "FIELD_1"; TermStats[] terms = HighFreqTerms.GetHighFreqTerms(reader, numTerms, field, new HighFreqTerms.DocFreqComparer()); assertEquals("Term with highest docfreq is first", 10, terms[0].DocFreq); }
public void TestFirstTermHighestTotalTermFreqDifferentField() { int numTerms = 20; string field = "different_field"; TermStats[] terms = HighFreqTerms.GetHighFreqTerms(reader, numTerms, field, new HighFreqTerms.TotalTermFreqComparer()); assertEquals("Term with highest totalTermFreq is first" + terms[0].GetTermText(), 150, terms[0].TotalTermFreq); }
public void TestFirstTermHighestTotalTermFreq() { int numTerms = 20; string field = null; TermStats[] terms = HighFreqTerms.GetHighFreqTerms(reader, numTerms, field, new HighFreqTerms.TotalTermFreqComparer()); assertEquals("Term with highest totalTermFreq is first", 200, terms[0].TotalTermFreq); }
public void TestNumTerms() { int numTerms = 12; string field = null; TermStats[] terms = HighFreqTerms.GetHighFreqTerms(reader, numTerms, field, new HighFreqTerms.DocFreqComparer()); assertEquals("length of terms array equals numTerms :" + numTerms, numTerms, terms.Length); }
public void TestFirstTermHighestDocFreqAllFields() { int numTerms = 12; string field = null; TermStats[] terms = HighFreqTerms.GetHighFreqTerms(reader, numTerms, field, new HighFreqTerms.DocFreqComparator()); assertEquals("Term with highest docfreq is first", 20, terms[0].DocFreq); }
public void TestOrderedByDocFreqDescending() { int numTerms = 12; string field = "FIELD_1"; TermStats[] terms = HighFreqTerms.GetHighFreqTerms(reader, numTerms, field, new HighFreqTerms.DocFreqComparer()); for (int i = 0; i < terms.Length; i++) { if (i > 0) { assertTrue("out of order " + terms[i - 1].DocFreq + "should be >= " + terms[i].DocFreq, terms[i - 1].DocFreq >= terms[i].DocFreq); } } }
public void TestOrderedByTermFreqDescending() { int numTerms = 12; string field = "FIELD_1"; TermStats[] terms = HighFreqTerms.GetHighFreqTerms(reader, numTerms, field, new HighFreqTerms.TotalTermFreqComparer()); for (int i = 0; i < terms.Length; i++) { // check that they are sorted by descending termfreq // order if (i > 0) { assertTrue("out of order" + terms[i - 1] + " > " + terms[i], terms[i - 1].TotalTermFreq >= terms[i].TotalTermFreq); } } }