Пример #1
0
            public WeightedPhraseInfo(IList <TermInfo> terms, float boost, int seqnum)
            {
                this.boost  = boost;
                this.seqnum = seqnum;

                // We keep TermInfos for further operations
                termsInfos = new List <TermInfo>(terms);

                termsOffsets = new List <Toffs>(terms.Count);
                TermInfo ti = terms[0];

                termsOffsets.Add(new Toffs(ti.StartOffset, ti.EndOffset));
                if (terms.Count == 1)
                {
                    return;
                }
                int pos = ti.Position;

                for (int i = 1; i < terms.Count; i++)
                {
                    ti = terms[i];
                    if (ti.Position - pos == 1)
                    {
                        Toffs to = termsOffsets[termsOffsets.Count - 1];
                        to.EndOffset = ti.EndOffset;
                    }
                    else
                    {
                        termsOffsets.Add(new Toffs(ti.StartOffset, ti.EndOffset));
                    }
                    pos = ti.Position;
                }
            }
Пример #2
0
        /// <summary>
        /// Returns the top <see cref="TermInfo"/> object of the stack
        /// </summary>
        /// <returns>the top <see cref="TermInfo"/> object of the stack</returns>
        public virtual TermInfo Pop()
        {
            if (termList.Count == 0)
            {
                return(null);
            }
            TermInfo first = termList[0];

            termList.Remove(first);
            return(first);
        }
Пример #3
0
        public void TestFieldTermStackIndex1wSearch2terms()
        {
            makeIndex1w();

            BooleanQuery bq = new BooleanQuery();

            bq.Add(tq("Mac"), Occur.SHOULD);
            bq.Add(tq("MacBook"), Occur.SHOULD);
            FieldQuery     fq    = new FieldQuery(bq, true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);

            assertEquals(1, stack.termList.size());
            TermInfo ti = stack.Pop();

            assertEquals("Mac(11,20,3)", ti.toString());
            assertEquals("MacBook(11,20,3)", ti.Next.toString());
            assertSame(ti, ti.Next.Next);
        }
Пример #4
0
        public void TestTermInfoComparisonConsistency()
        {
            TermInfo a = new TermInfo(TestUtil.RandomUnicodeString(Random()), 0, 0, 0, 1);
            TermInfo b = new TermInfo(TestUtil.RandomUnicodeString(Random()), 0, 0, 1, 1);
            TermInfo c = new TermInfo(TestUtil.RandomUnicodeString(Random()), 0, 0, 2, 1);
            TermInfo d = new TermInfo(TestUtil.RandomUnicodeString(Random()), 0, 0, 0, 1);

            assertConsistentEquals(a, a);
            assertConsistentEquals(b, b);
            assertConsistentEquals(c, c);
            assertConsistentEquals(d, d);
            assertConsistentEquals(a, d);
            assertConsistentLessThan(a, b);
            assertConsistentLessThan(b, c);
            assertConsistentLessThan(a, c);
            assertConsistentLessThan(d, b);
            assertConsistentLessThan(d, c);
        }
Пример #5
0
        public void TestFieldTermStackIndex1w2wSearch1term1phrase()
        {
            makeIndex1w2w();

            BooleanQuery bq = new BooleanQuery();

            bq.Add(tq("pc"), Occur.SHOULD);
            bq.Add(pqF("personal", "computer"), Occur.SHOULD);
            FieldQuery     fq    = new FieldQuery(bq, true, true);
            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);

            assertEquals(2, stack.termList.size());
            TermInfo ti = stack.Pop();

            assertEquals("pc(3,5,1)", ti.toString());
            assertEquals("personal(3,5,1)", ti.Next.toString());
            assertSame(ti, ti.Next.Next);
            assertEquals("computer(3,5,2)", stack.Pop().toString());
        }
Пример #6
0
        /// <summary>
        /// a constructor.
        /// </summary>
        /// <param name="fieldTermStack"><see cref="FieldTermStack"/> object</param>
        /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param>
        /// <param name="phraseLimit">maximum size of phraseList</param>
        public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit)
        {
            string field = fieldTermStack.FieldName;

            List <TermInfo> phraseCandidate = new List <TermInfo>();
            QueryPhraseMap  currMap         = null;
            QueryPhraseMap  nextMap         = null;

            while (!fieldTermStack.IsEmpty && (phraseList.Count < phraseLimit))
            {
                phraseCandidate.Clear();

                TermInfo ti    = null;
                TermInfo first = null;

                first   = ti = fieldTermStack.Pop();
                currMap = fieldQuery.GetFieldTermMap(field, ti.Text);
                while (currMap == null && ti.Next != first)
                {
                    ti      = ti.Next;
                    currMap = fieldQuery.GetFieldTermMap(field, ti.Text);
                }

                // if not found, discard top TermInfo from stack, then try next element
                if (currMap == null)
                {
                    continue;
                }

                // if found, search the longest phrase
                phraseCandidate.Add(ti);
                while (true)
                {
                    first   = ti = fieldTermStack.Pop();
                    nextMap = null;
                    if (ti != null)
                    {
                        nextMap = currMap.GetTermMap(ti.Text);
                        while (nextMap == null && ti.Next != first)
                        {
                            ti      = ti.Next;
                            nextMap = currMap.GetTermMap(ti.Text);
                        }
                    }
                    if (ti == null || nextMap == null)
                    {
                        if (ti != null)
                        {
                            fieldTermStack.Push(ti);
                        }
                        if (currMap.IsValidTermOrPhrase(phraseCandidate))
                        {
                            AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber));
                        }
                        else
                        {
                            while (phraseCandidate.Count > 1)
                            {
                                //fieldTermStack.Push(phraseCandidate.Last.Value);
                                //phraseCandidate.RemoveLast();

                                TermInfo last = phraseCandidate[phraseCandidate.Count - 1];
                                phraseCandidate.Remove(last);
                                fieldTermStack.Push(last);

                                currMap = fieldQuery.SearchPhrase(field, phraseCandidate);
                                if (currMap != null)
                                {
                                    AddIfNoOverlap(new WeightedPhraseInfo(phraseCandidate, currMap.Boost, currMap.TermOrPhraseNumber));
                                    break;
                                }
                            }
                        }
                        break;
                    }
                    else
                    {
                        phraseCandidate.Add(ti);
                        currMap = nextMap;
                    }
                }
            }
        }
Пример #7
0
        //public static void main( string[] args ) throws Exception {
        //  Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT);
        //  QueryParser parser = new QueryParser(Version.LUCENE_CURRENT,  "f", analyzer );
        //  Query query = parser.parse( "a x:b" );
        //  FieldQuery fieldQuery = new FieldQuery( query, true, false );

        //  Directory dir = new RAMDirectory();
        //  IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer));
        //  Document doc = new Document();
        //  FieldType ft = new FieldType(TextField.TYPE_STORED);
        //  ft.setStoreTermVectors(true);
        //  ft.setStoreTermVectorOffsets(true);
        //  ft.setStoreTermVectorPositions(true);
        //  doc.add( new Field( "f", ft, "a a a b b c a b b c d e f" ) );
        //  doc.add( new Field( "f", ft, "b a b a f" ) );
        //  writer.addDocument( doc );
        //  writer.close();

        //  IndexReader reader = IndexReader.open(dir1);
        //  new FieldTermStack( reader, 0, "f", fieldQuery );
        //  reader.close();
        //}

        /// <summary>
        /// a constructor.
        /// </summary>
        /// <param name="reader"><see cref="IndexReader"/> of the index</param>
        /// <param name="docId">document id to be highlighted</param>
        /// <param name="fieldName">field of the document to be highlighted</param>
        /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param>
        /// <exception cref="System.IO.IOException">If there is a low-level I/O error</exception>
        public FieldTermStack(IndexReader reader, int docId, string fieldName, FieldQuery fieldQuery)
        {
            this.fieldName = fieldName;

            ISet <string> termSet = fieldQuery.GetTermSet(fieldName);

            // just return to make null snippet if un-matched fieldName specified when fieldMatch == true
            if (termSet == null)
            {
                return;
            }

            Fields vectors = reader.GetTermVectors(docId);

            if (vectors == null)
            {
                // null snippet
                return;
            }

            Terms vector = vectors.GetTerms(fieldName);

            if (vector == null)
            {
                // null snippet
                return;
            }

            CharsRef             spare     = new CharsRef();
            TermsEnum            termsEnum = vector.GetIterator(null);
            DocsAndPositionsEnum dpEnum    = null;
            BytesRef             text;

            int numDocs = reader.MaxDoc;

            while ((text = termsEnum.Next()) != null)
            {
                UnicodeUtil.UTF8toUTF16(text, spare);
                string term = spare.ToString();
                if (!termSet.Contains(term))
                {
                    continue;
                }
                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
                if (dpEnum == null)
                {
                    // null snippet
                    return;
                }

                dpEnum.NextDoc();

                // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html
                float weight = (float)(Math.Log(numDocs / (double)(reader.DocFreq(new Term(fieldName, text)) + 1)) + 1.0);

                int freq = dpEnum.Freq;

                for (int i = 0; i < freq; i++)
                {
                    int pos = dpEnum.NextPosition();
                    if (dpEnum.StartOffset < 0)
                    {
                        return; // no offsets, null snippet
                    }
                    termList.Add(new TermInfo(term, dpEnum.StartOffset, dpEnum.EndOffset, pos, weight));
                }
            }

            // sort by position
            CollectionUtil.TimSort(termList);

            // now look for dups at the same position, linking them together
            int      currentPos = -1;
            TermInfo previous   = null;
            TermInfo first      = null;

            for (int i = 0; i < termList.Count;)
            {
                TermInfo current = termList[i];
                if (current.Position == currentPos)
                {
                    Debug.Assert(previous != null);
                    previous.SetNext(current);
                    previous = current;
                    //iterator.Remove();

                    // LUCENENET NOTE: Remove, but don't advance the i position (since removing will advance to the next item)
                    termList.RemoveAt(i);
                }
                else
                {
                    if (previous != null)
                    {
                        previous.SetNext(first);
                    }
                    previous   = first = current;
                    currentPos = current.Position;

                    // LUCENENET NOTE: Only increment the position if we don't do a delete.
                    i++;
                }
            }

            if (previous != null)
            {
                previous.SetNext(first);
            }
        }
Пример #8
0
 /// <summary>
 /// Puts a <see cref="TermInfo"/> onto the top of the stack
 /// </summary>
 /// <param name="termInfo">the <see cref="TermInfo"/> object to be put on the top of the stack</param>
 public virtual void Push(TermInfo termInfo)
 {
     termList.Insert(0, termInfo);
 }