Example #1
0
		/// <exception cref="ParseException">throw in overridden method to disallow
		/// </exception>
		public /*protected internal*/ virtual Query GetFieldQuery(System.String field, System.String queryText)
		{
			// Use the analyzer to get all the tokens, and then build a TermQuery,
			// PhraseQuery, or nothing based on the term count
			
			TokenStream source;
			try
			{
				source = analyzer.ReusableTokenStream(field, new System.IO.StringReader(queryText));
				source.Reset();
			}
			catch (System.IO.IOException e)
			{
				source = analyzer.TokenStream(field, new System.IO.StringReader(queryText));
			}
			CachingTokenFilter buffer = new CachingTokenFilter(source);
			TermAttribute termAtt = null;
			PositionIncrementAttribute posIncrAtt = null;
			int numTokens = 0;
			
			bool success = false;
			try
			{
				buffer.Reset();
				success = true;
			}
			catch (System.IO.IOException e)
			{
				// success==false if we hit an exception
			}
			if (success)
			{
				if (buffer.HasAttribute(typeof(TermAttribute)))
				{
					termAtt = (TermAttribute) buffer.GetAttribute(typeof(TermAttribute));
				}
				if (buffer.HasAttribute(typeof(PositionIncrementAttribute)))
				{
					posIncrAtt = (PositionIncrementAttribute) buffer.GetAttribute(typeof(PositionIncrementAttribute));
				}
			}
			
			int positionCount = 0;
			bool severalTokensAtSamePosition = false;
			
			bool hasMoreTokens = false;
			if (termAtt != null)
			{
				try
				{
					hasMoreTokens = buffer.IncrementToken();
					while (hasMoreTokens)
					{
						numTokens++;
						int positionIncrement = (posIncrAtt != null)?posIncrAtt.GetPositionIncrement():1;
						if (positionIncrement != 0)
						{
							positionCount += positionIncrement;
						}
						else
						{
							severalTokensAtSamePosition = true;
						}
						hasMoreTokens = buffer.IncrementToken();
					}
				}
				catch (System.IO.IOException e)
				{
					// ignore
				}
			}
			try
			{
				// rewind the buffer stream
				buffer.Reset();
				
				// close original stream - all tokens buffered
				source.Close();
			}
			catch (System.IO.IOException e)
			{
				// ignore
			}
			
			if (numTokens == 0)
				return null;
			else if (numTokens == 1)
			{
				System.String term = null;
				try
				{
					bool hasNext = buffer.IncrementToken();
					System.Diagnostics.Debug.Assert(hasNext == true);
					term = termAtt.Term();
				}
				catch (System.IO.IOException e)
				{
					// safe to ignore, because we know the number of tokens
				}
				return NewTermQuery(new Term(field, term));
			}
			else
			{
				if (severalTokensAtSamePosition)
				{
					if (positionCount == 1)
					{
						// no phrase query:
						BooleanQuery q = NewBooleanQuery(true);
						for (int i = 0; i < numTokens; i++)
						{
							System.String term = null;
							try
							{
								bool hasNext = buffer.IncrementToken();
								System.Diagnostics.Debug.Assert(hasNext == true);
								term = termAtt.Term();
							}
							catch (System.IO.IOException e)
							{
								// safe to ignore, because we know the number of tokens
							}
							
							Query currentQuery = NewTermQuery(new Term(field, term));
							q.Add(currentQuery, BooleanClause.Occur.SHOULD);
						}
						return q;
					}
					else
					{
						// phrase query:
						MultiPhraseQuery mpq = NewMultiPhraseQuery();
						mpq.SetSlop(phraseSlop);
						System.Collections.ArrayList multiTerms = new System.Collections.ArrayList();
						int position = - 1;
						for (int i = 0; i < numTokens; i++)
						{
							System.String term = null;
							int positionIncrement = 1;
							try
							{
								bool hasNext = buffer.IncrementToken();
								System.Diagnostics.Debug.Assert(hasNext == true);
								term = termAtt.Term();
								if (posIncrAtt != null)
								{
									positionIncrement = posIncrAtt.GetPositionIncrement();
								}
							}
							catch (System.IO.IOException e)
							{
								// safe to ignore, because we know the number of tokens
							}
							
							if (positionIncrement > 0 && multiTerms.Count > 0)
							{
								if (enablePositionIncrements)
								{
                                    mpq.Add((Term[]) multiTerms.ToArray(typeof(Term)), position);
								}
								else
								{
                                    mpq.Add((Term[]) multiTerms.ToArray(typeof(Term)));
								}
								multiTerms.Clear();
							}
							position += positionIncrement;
							multiTerms.Add(new Term(field, term));
						}
						if (enablePositionIncrements)
						{
                            mpq.Add((Term[]) multiTerms.ToArray(typeof(Term)), position);
						}
						else
						{
                            mpq.Add((Term[]) multiTerms.ToArray(typeof(Term)));
						}
						return mpq;
					}
				}
				else
				{
					PhraseQuery pq = NewPhraseQuery();
					pq.SetSlop(phraseSlop);
					int position = - 1;
					
					
					for (int i = 0; i < numTokens; i++)
					{
						System.String term = null;
						int positionIncrement = 1;
						
						try
						{
							bool hasNext = buffer.IncrementToken();
							System.Diagnostics.Debug.Assert(hasNext == true);
							term = termAtt.Term();
							if (posIncrAtt != null)
							{
								positionIncrement = posIncrAtt.GetPositionIncrement();
							}
						}
						catch (System.IO.IOException e)
						{
							// safe to ignore, because we know the number of tokens
						}
						
						if (enablePositionIncrements)
						{
							position += positionIncrement;
							pq.Add(new Term(field, term), position);
						}
						else
						{
							pq.Add(new Term(field, term));
						}
					}
					return pq;
				}
			}
		}
Example #2
0
		public virtual void  TestEndOffsetPositionWithCachingTokenFilter()
		{
			MockRAMDirectory dir = new MockRAMDirectory();
			Analyzer analyzer = new WhitespaceAnalyzer();
			IndexWriter w = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);
			Document doc = new Document();
			TokenStream stream = new CachingTokenFilter(analyzer.TokenStream("field", new System.IO.StringReader("abcd   ")));
			Field f = new Field("field", stream, Field.TermVector.WITH_POSITIONS_OFFSETS);
			doc.Add(f);
			doc.Add(f);
			w.AddDocument(doc);
			w.Close();
			
			IndexReader r = IndexReader.Open(dir);
			TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.GetTermFreqVector(0, "field")).GetOffsets(0);
			Assert.AreEqual(2, termOffsets.Length);
			Assert.AreEqual(0, termOffsets[0].GetStartOffset());
			Assert.AreEqual(4, termOffsets[0].GetEndOffset());
			Assert.AreEqual(8, termOffsets[1].GetStartOffset());
			Assert.AreEqual(12, termOffsets[1].GetEndOffset());
			r.Close();
			dir.Close();
		}
Example #3
0
        /// <summary>
        /// Creates a query from the analysis chain.
        /// <p>
        /// Expert: this is more useful for subclasses such as queryparsers.
        /// If using this class directly, just use <seealso cref="#createBooleanQuery(String, String)"/>
        /// and <seealso cref="#createPhraseQuery(String, String)"/> </summary>
        /// <param name="analyzer"> analyzer used for this query </param>
        /// <param name="operator"> default boolean operator used for this query </param>
        /// <param name="field"> field to create queries against </param>
        /// <param name="queryText"> text to be passed to the analysis chain </param>
        /// <param name="quoted"> true if phrases should be generated when terms occur at more than one position </param>
        /// <param name="phraseSlop"> slop factor for phrase/multiphrase queries </param>
        protected internal Query CreateFieldQuery(Analyzer analyzer, BooleanClause.Occur @operator, string field, string queryText, bool quoted, int phraseSlop)
        {
            Debug.Assert(@operator == BooleanClause.Occur.SHOULD || @operator == BooleanClause.Occur.MUST);
            // Use the analyzer to get all the tokens, and then build a TermQuery,
            // PhraseQuery, or nothing based on the term count
            CachingTokenFilter buffer = null;
            ITermToBytesRefAttribute termAtt = null;
            IPositionIncrementAttribute posIncrAtt = null;
            int numTokens = 0;
            int positionCount = 0;
            bool severalTokensAtSamePosition = false;
            bool hasMoreTokens = false;

            TokenStream source = null;
            try
            {
                source = analyzer.TokenStream(field, new StringReader(queryText));
                source.Reset();
                buffer = new CachingTokenFilter(source);
                buffer.Reset();

                if (buffer.HasAttribute<ITermToBytesRefAttribute>())
                {
                    termAtt = buffer.GetAttribute<ITermToBytesRefAttribute>();
                }
                if (buffer.HasAttribute<IPositionIncrementAttribute>())
                {
                    posIncrAtt = buffer.GetAttribute<IPositionIncrementAttribute>();
                }

                if (termAtt != null)
                {
                    try
                    {
                        hasMoreTokens = buffer.IncrementToken();
                        while (hasMoreTokens)
                        {
                            numTokens++;
                            int positionIncrement = (posIncrAtt != null) ? posIncrAtt.PositionIncrement : 1;
                            if (positionIncrement != 0)
                            {
                                positionCount += positionIncrement;
                            }
                            else
                            {
                                severalTokensAtSamePosition = true;
                            }
                            hasMoreTokens = buffer.IncrementToken();
                        }
                    }
                    catch (System.IO.IOException)
                    {
                        // ignore
                    }
                }
            }
            catch (System.IO.IOException e)
            {
                throw new Exception("Error analyzing query text", e);
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(source);
            }

            // rewind the buffer stream
            buffer.Reset();

            BytesRef bytes = termAtt == null ? null : termAtt.BytesRef;

            if (numTokens == 0)
            {
                return null;
            }
            else if (numTokens == 1)
            {
                try
                {
                    bool hasNext = buffer.IncrementToken();
                    Debug.Assert(hasNext == true);
                    termAtt.FillBytesRef();
                }
                catch (System.IO.IOException)
                {
                    // safe to ignore, because we know the number of tokens
                }
                return NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes)));
            }
            else
            {
                if (severalTokensAtSamePosition || (!quoted))
                {
                    if (positionCount == 1 || (!quoted))
                    {
                        // no phrase query:

                        if (positionCount == 1)
                        {
                            // simple case: only one position, with synonyms
                            BooleanQuery q = NewBooleanQuery(true);
                            for (int i = 0; i < numTokens; i++)
                            {
                                try
                                {
                                    bool hasNext = buffer.IncrementToken();
                                    Debug.Assert(hasNext == true);
                                    termAtt.FillBytesRef();
                                }
                                catch (System.IO.IOException)
                                {
                                    // safe to ignore, because we know the number of tokens
                                }
                                Query currentQuery = NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes)));
                                q.Add(currentQuery, BooleanClause.Occur.SHOULD);
                            }
                            return q;
                        }
                        else
                        {
                            // multiple positions
                            BooleanQuery q = NewBooleanQuery(false);
                            Query currentQuery = null;
                            for (int i = 0; i < numTokens; i++)
                            {
                                try
                                {
                                    bool hasNext = buffer.IncrementToken();
                                    Debug.Assert(hasNext == true);
                                    termAtt.FillBytesRef();
                                }
                                catch (System.IO.IOException)
                                {
                                    // safe to ignore, because we know the number of tokens
                                }
                                if (posIncrAtt != null && posIncrAtt.PositionIncrement == 0)
                                {
                                    if (!(currentQuery is BooleanQuery))
                                    {
                                        Query t = currentQuery;
                                        currentQuery = NewBooleanQuery(true);
                                        ((BooleanQuery)currentQuery).Add(t, BooleanClause.Occur.SHOULD);
                                    }
                                    ((BooleanQuery)currentQuery).Add(NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes))), BooleanClause.Occur.SHOULD);
                                }
                                else
                                {
                                    if (currentQuery != null)
                                    {
                                        q.Add(currentQuery, @operator);
                                    }
                                    currentQuery = NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes)));
                                }
                            }
                            q.Add(currentQuery, @operator);
                            return q;
                        }
                    }
                    else
                    {
                        // phrase query:
                        MultiPhraseQuery mpq = NewMultiPhraseQuery();
                        mpq.Slop = phraseSlop;
                        IList<Term> multiTerms = new List<Term>();
                        int position = -1;
                        for (int i = 0; i < numTokens; i++)
                        {
                            int positionIncrement = 1;
                            try
                            {
                                bool hasNext = buffer.IncrementToken();
                                Debug.Assert(hasNext == true);
                                termAtt.FillBytesRef();
                                if (posIncrAtt != null)
                                {
                                    positionIncrement = posIncrAtt.PositionIncrement;
                                }
                            }
                            catch (System.IO.IOException)
                            {
                                // safe to ignore, because we know the number of tokens
                            }

                            if (positionIncrement > 0 && multiTerms.Count > 0)
                            {
                                if (EnablePositionIncrements_Renamed)
                                {
                                    mpq.Add(multiTerms.ToArray(), position);
                                }
                                else
                                {
                                    mpq.Add(multiTerms.ToArray());
                                }
                                multiTerms.Clear();
                            }
                            position += positionIncrement;
                            multiTerms.Add(new Term(field, BytesRef.DeepCopyOf(bytes)));
                        }
                        if (EnablePositionIncrements_Renamed)
                        {
                            mpq.Add(multiTerms.ToArray(), position);
                        }
                        else
                        {
                            mpq.Add(multiTerms.ToArray());
                        }
                        return mpq;
                    }
                }
                else
                {
                    PhraseQuery pq = NewPhraseQuery();
                    pq.Slop = phraseSlop;
                    int position = -1;

                    for (int i = 0; i < numTokens; i++)
                    {
                        int positionIncrement = 1;

                        try
                        {
                            bool hasNext = buffer.IncrementToken();
                            Debug.Assert(hasNext == true);
                            termAtt.FillBytesRef();
                            if (posIncrAtt != null)
                            {
                                positionIncrement = posIncrAtt.PositionIncrement;
                            }
                        }
                        catch (System.IO.IOException)
                        {
                            // safe to ignore, because we know the number of tokens
                        }

                        if (EnablePositionIncrements_Renamed)
                        {
                            position += positionIncrement;
                            pq.Add(new Term(field, BytesRef.DeepCopyOf(bytes)), position);
                        }
                        else
                        {
                            pq.Add(new Term(field, BytesRef.DeepCopyOf(bytes)));
                        }
                    }
                    return pq;
                }
            }
        }
        public virtual void TestEndOffsetPositionWithCachingTokenFilter()
        {
            Directory dir = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document doc = new Document();
            IOException priorException = null;
            TokenStream stream = analyzer.TokenStream("field", new StringReader("abcd   "));
            try
            {
                stream.Reset(); // TODO: weird to reset before wrapping with CachingTokenFilter... correct?
                TokenStream cachedStream = new CachingTokenFilter(stream);
                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.StoreTermVectors = true;
                customType.StoreTermVectorPositions = true;
                customType.StoreTermVectorOffsets = true;
                Field f = new Field("field", cachedStream, customType);
                doc.Add(f);
                doc.Add(f);
                w.AddDocument(doc);
            }
            catch (IOException e)
            {
                priorException = e;
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(priorException, stream);
            }
            w.Dispose();

            IndexReader r = DirectoryReader.Open(dir);
            TermsEnum termsEnum = r.GetTermVectors(0).Terms("field").Iterator(null);
            Assert.IsNotNull(termsEnum.Next());
            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
            Assert.AreEqual(2, termsEnum.TotalTermFreq());

            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            dpEnum.NextPosition();
            Assert.AreEqual(0, dpEnum.StartOffset());
            Assert.AreEqual(4, dpEnum.EndOffset());

            dpEnum.NextPosition();
            Assert.AreEqual(8, dpEnum.StartOffset());
            Assert.AreEqual(12, dpEnum.EndOffset());
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());

            r.Dispose();
            dir.Dispose();
        }