public void TestGetTermSet() { BooleanQuery query = new BooleanQuery(); query.Add(new TermQuery(new Term(F, "A")), Occur.MUST); query.Add(new TermQuery(new Term(F, "B")), Occur.MUST); query.Add(new TermQuery(new Term("x", "C")), Occur.SHOULD); BooleanQuery innerQuery = new BooleanQuery(); innerQuery.Add(new TermQuery(new Term(F, "D")), Occur.MUST); innerQuery.Add(new TermQuery(new Term(F, "E")), Occur.MUST); query.Add(innerQuery, Occur.MUST_NOT); FieldQuery fq = new FieldQuery(query, true, true); assertEquals(2, fq.termSetMap.size()); ISet <String> termSet = fq.GetTermSet(F); assertEquals(2, termSet.size()); assertTrue(termSet.contains("A")); assertTrue(termSet.contains("B")); termSet = fq.GetTermSet("x"); assertEquals(1, termSet.size()); assertTrue(termSet.contains("C")); termSet = fq.GetTermSet("y"); assertNull(termSet); }
//public static void main( string[] args ) throws Exception { // Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT); // QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "f", analyzer ); // Query query = parser.parse( "a x:b" ); // FieldQuery fieldQuery = new FieldQuery( query, true, false ); // Directory dir = new RAMDirectory(); // IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)); // Document doc = new Document(); // IndexableFieldType ft = new IndexableFieldType(TextField.TYPE_STORED); // ft.setStoreTermVectors(true); // ft.setStoreTermVectorOffsets(true); // ft.setStoreTermVectorPositions(true); // doc.add( new Field( "f", ft, "a a a b b c a b b c d e f" ) ); // doc.add( new Field( "f", ft, "b a b a f" ) ); // writer.addDocument( doc ); // writer.close(); // IndexReader reader = IndexReader.open(dir1); // new FieldTermStack( reader, 0, "f", fieldQuery ); // reader.close(); //} /// <summary> /// a constructor. /// </summary> /// <param name="reader"><see cref="IndexReader"/> of the index</param> /// <param name="docId">document id to be highlighted</param> /// <param name="fieldName">field of the document to be highlighted</param> /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param> /// <exception cref="IOException">If there is a low-level I/O error</exception> public FieldTermStack(IndexReader reader, int docId, string fieldName, FieldQuery fieldQuery) { this.fieldName = fieldName; ISet <string> termSet = fieldQuery.GetTermSet(fieldName); // just return to make null snippet if un-matched fieldName specified when fieldMatch == true if (termSet == null) { return; } Fields vectors = reader.GetTermVectors(docId); if (vectors == null) { // null snippet return; } Terms vector = vectors.GetTerms(fieldName); if (vector == null) { // null snippet return; } CharsRef spare = new CharsRef(); TermsEnum termsEnum = vector.GetEnumerator(); DocsAndPositionsEnum dpEnum = null; BytesRef text; int numDocs = reader.MaxDoc; while (termsEnum.MoveNext()) { text = termsEnum.Term; UnicodeUtil.UTF8toUTF16(text, spare); string term = spare.ToString(); if (!termSet.Contains(term)) { continue; } dpEnum = termsEnum.DocsAndPositions(null, dpEnum); if (dpEnum == null) { // null snippet return; } dpEnum.NextDoc(); // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html float weight = (float)(Math.Log(numDocs / (double)(reader.DocFreq(new Term(fieldName, text)) + 1)) + 1.0); int freq = dpEnum.Freq; for (int i = 0; i < freq; i++) { int pos = dpEnum.NextPosition(); if (dpEnum.StartOffset < 0) { return; // no offsets, null snippet } termList.Add(new TermInfo(term, dpEnum.StartOffset, dpEnum.EndOffset, pos, weight)); } } // sort by position CollectionUtil.TimSort(termList); // now look for dups at the same position, linking them together int currentPos = -1; TermInfo previous = null; TermInfo first = null; for (int i = 0; i < termList.Count;) { TermInfo current = termList[i]; if (current.Position == currentPos) { if (Debugging.AssertsEnabled) { Debugging.Assert(previous != null); } previous.SetNext(current); previous = current; //iterator.Remove(); // LUCENENET NOTE: Remove, but don't advance the i position (since removing will advance to the next item) termList.RemoveAt(i); } else { if (previous != null) { previous.SetNext(first); } previous = first = current; currentPos = current.Position; // LUCENENET NOTE: Only increment the position if we don't do a delete. i++; } } if (previous != null) { previous.SetNext(first); } }