Esempio n. 1
0
        public QueryTermVector(System.String queryString, Analyzer analyzer)
        {
            if (analyzer != null)
            {
                TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString));
                if (stream != null)
                {
                    System.Collections.ArrayList terms = new System.Collections.ArrayList();
                    try
                    {
                        bool hasMoreTokens = false;

                        stream.Reset();
                        TermAttribute termAtt = (TermAttribute)stream.AddAttribute(typeof(TermAttribute));

                        hasMoreTokens = stream.IncrementToken();
                        while (hasMoreTokens)
                        {
                            terms.Add(termAtt.Term());
                            hasMoreTokens = stream.IncrementToken();
                        }
                        ProcessTerms((System.String[])terms.ToArray(typeof(System.String)));
                    }
                    catch (System.IO.IOException e)
                    {
                    }
                }
            }
        }
Esempio n. 2
0
        /// <summary> Create a tokenized and indexed field that is not stored, optionally with
        /// storing term vectors.  This is useful for pre-analyzed fields.
        /// The TokenStream is read only when the Document is added to the index,
        /// i.e. you may not close the TokenStream until {@link IndexWriter#AddDocument(Document)}
        /// has been called.
        ///
        /// </summary>
        /// <param name="name">The name of the field
        /// </param>
        /// <param name="tokenStream">The TokenStream with the content
        /// </param>
        /// <param name="termVector">Whether term vector should be stored
        /// </param>
        /// <throws>  NullPointerException if name or tokenStream is <code>null</code> </throws>
        public Field(System.String name, TokenStream tokenStream, TermVector termVector)
        {
            if (name == null)
            {
                throw new System.NullReferenceException("name cannot be null");
            }
            if (tokenStream == null)
            {
                throw new System.NullReferenceException("tokenStream cannot be null");
            }

            this.name        = StringHelper.Intern(name);      // field names are interned
            this.fieldsData  = null;
            this.tokenStream = tokenStream;

            this.isStored     = false;
            this.isCompressed = false;

            this.isIndexed   = true;
            this.isTokenized = true;

            this.isBinary = false;

            SetStoreTermVector(termVector);
        }
Esempio n. 3
0
 public void  SetValue(TokenStream value_Renamed)
 {
     if (isBinary)
     {
         throw new System.ArgumentException("cannot set a TokenStream value on a binary field");
     }
     if (isStored)
     {
         throw new System.ArgumentException("cannot set a TokenStream value on a stored field");
     }
     fieldsData  = null;
     tokenStream = value_Renamed;
 }
Esempio n. 4
0
        public override void  ProcessFields(Fieldable[] fields, int count)
        {
            fieldState.Reset(docState.doc.GetBoost());

            int maxFieldLength = docState.maxFieldLength;

            bool doInvert = consumer.Start(fields, count);

            for (int i = 0; i < count; i++)
            {
                Fieldable field = fields[i];

                // TODO FI: this should be "genericized" to querying
                // consumer if it wants to see this particular field
                // tokenized.
                if (field.IsIndexed() && doInvert)
                {
                    bool anyToken;

                    if (fieldState.length > 0)
                    {
                        fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name);
                    }

                    if (!field.IsTokenized())
                    {
                        // un-tokenized field
                        System.String stringValue = field.StringValue();
                        int           valueLength = stringValue.Length;
                        perThread.singleTokenTokenStream.Reinit(stringValue, 0, valueLength);
                        fieldState.attributeSource = perThread.singleTokenTokenStream;
                        consumer.Start(field);

                        bool success = false;
                        try
                        {
                            consumer.Add();
                            success = true;
                        }
                        finally
                        {
                            if (!success)
                            {
                                docState.docWriter.SetAborting();
                            }
                        }
                        fieldState.offset += valueLength;
                        fieldState.length++;
                        fieldState.position++;
                        anyToken = valueLength > 0;
                    }
                    else
                    {
                        // tokenized field
                        TokenStream stream;
                        TokenStream streamValue = field.TokenStreamValue();

                        if (streamValue != null)
                        {
                            stream = streamValue;
                        }
                        else
                        {
                            // the field does not have a TokenStream,
                            // so we have to obtain one from the analyzer
                            System.IO.TextReader reader;                             // find or make Reader
                            System.IO.TextReader readerValue = field.ReaderValue();

                            if (readerValue != null)
                            {
                                reader = readerValue;
                            }
                            else
                            {
                                System.String stringValue = field.StringValue();
                                if (stringValue == null)
                                {
                                    throw new System.ArgumentException("field must have either TokenStream, String or Reader value");
                                }
                                perThread.stringReader.Init(stringValue);
                                reader = perThread.stringReader;
                            }

                            // Tokenize field and add to postingTable
                            stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader);
                        }

                        // reset the TokenStream to the first token
                        stream.Reset();

                        int startLength = fieldState.length;

                        // deprecated
                        bool allowMinus1Position = docState.allowMinus1Position;

                        try
                        {
                            int offsetEnd = fieldState.offset - 1;

                            bool hasMoreTokens = stream.IncrementToken();

                            fieldState.attributeSource = stream;

                            OffsetAttribute            offsetAttribute  = (OffsetAttribute)fieldState.attributeSource.AddAttribute(typeof(OffsetAttribute));
                            PositionIncrementAttribute posIncrAttribute = (PositionIncrementAttribute)fieldState.attributeSource.AddAttribute(typeof(PositionIncrementAttribute));

                            consumer.Start(field);

                            for (; ;)
                            {
                                // If we hit an exception in stream.next below
                                // (which is fairly common, eg if analyzer
                                // chokes on a given document), then it's
                                // non-aborting and (above) this one document
                                // will be marked as deleted, but still
                                // consume a docID

                                if (!hasMoreTokens)
                                {
                                    break;
                                }

                                int posIncr = posIncrAttribute.GetPositionIncrement();
                                fieldState.position += posIncr;
                                if (allowMinus1Position || fieldState.position > 0)
                                {
                                    fieldState.position--;
                                }

                                if (posIncr == 0)
                                {
                                    fieldState.numOverlap++;
                                }

                                bool success = false;
                                try
                                {
                                    // If we hit an exception in here, we abort
                                    // all buffered documents since the last
                                    // flush, on the likelihood that the
                                    // internal state of the consumer is now
                                    // corrupt and should not be flushed to a
                                    // new segment:
                                    consumer.Add();
                                    success = true;
                                }
                                finally
                                {
                                    if (!success)
                                    {
                                        docState.docWriter.SetAborting();
                                    }
                                }
                                fieldState.position++;
                                offsetEnd = fieldState.offset + offsetAttribute.EndOffset();
                                if (++fieldState.length >= maxFieldLength)
                                {
                                    if (docState.infoStream != null)
                                    {
                                        docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens");
                                    }
                                    break;
                                }

                                hasMoreTokens = stream.IncrementToken();
                            }
                            // trigger streams to perform end-of-stream operations
                            stream.End();

                            fieldState.offset += offsetAttribute.EndOffset();
                            anyToken           = fieldState.length > startLength;
                        }
                        finally
                        {
                            stream.Close();
                        }
                    }

                    if (anyToken)
                    {
                        fieldState.offset += docState.analyzer.GetOffsetGap(field);
                    }
                    fieldState.boost *= field.GetBoost();
                }

                // LUCENE-2387: don't hang onto the field, so GC can
                // reclaim
                fields[i] = null;
            }

            consumer.Finish();
            endConsumer.Finish();
        }
Esempio n. 5
0
		/// <summary>Construct filtering <i>in</i>. </summary>
		public StandardFilter(TokenStream in_Renamed):base(in_Renamed)
		{
			termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
			typeAtt = (TypeAttribute) AddAttribute(typeof(TypeAttribute));
		}
Esempio n. 6
0
 /// <summary>Construct filtering <i>in</i>. </summary>
 public StandardFilter(TokenStream in_Renamed) : base(in_Renamed)
 {
     termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
     typeAtt = (TypeAttribute)AddAttribute(typeof(TypeAttribute));
 }
Esempio n. 7
0
 /// <summary> Create a tokenized and indexed field that is not stored. Term vectors will
 /// not be stored. This is useful for pre-analyzed fields.
 /// The TokenStream is read only when the Document is added to the index,
 /// i.e. you may not close the TokenStream until {@link IndexWriter#AddDocument(Document)}
 /// has been called.
 ///
 /// </summary>
 /// <param name="name">The name of the field
 /// </param>
 /// <param name="tokenStream">The TokenStream with the content
 /// </param>
 /// <throws>  NullPointerException if name or tokenStream is <code>null</code> </throws>
 public Field(System.String name, TokenStream tokenStream) : this(name, tokenStream, TermVector.NO)
 {
 }
Esempio n. 8
0
 /// <summary>Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true.
 /// May be combined with stored values from stringValue() or binaryValue()
 /// </summary>
 public void  SetTokenStream(TokenStream tokenStream)
 {
     this.isIndexed   = true;
     this.isTokenized = true;
     this.tokenStream = tokenStream;
 }
Esempio n. 9
0
		/// <summary> Create a tokenized and indexed field that is not stored, optionally with 
		/// storing term vectors.  This is useful for pre-analyzed fields.
		/// The TokenStream is read only when the Document is added to the index,
		/// i.e. you may not close the TokenStream until {@link IndexWriter#AddDocument(Document)}
		/// has been called.
		/// 
		/// </summary>
		/// <param name="name">The name of the field
		/// </param>
		/// <param name="tokenStream">The TokenStream with the content
		/// </param>
		/// <param name="termVector">Whether term vector should be stored
		/// </param>
		/// <throws>  NullPointerException if name or tokenStream is <code>null</code> </throws>
		public Field(System.String name, TokenStream tokenStream, TermVector termVector)
		{
			if (name == null)
				throw new System.NullReferenceException("name cannot be null");
			if (tokenStream == null)
				throw new System.NullReferenceException("tokenStream cannot be null");
			
			this.name = StringHelper.Intern(name); // field names are interned
			this.fieldsData = null;
			this.tokenStream = tokenStream;
			
			this.isStored = false;
			this.isCompressed = false;
			
			this.isIndexed = true;
			this.isTokenized = true;
			
			this.isBinary = false;
			
			SetStoreTermVector(termVector);
		}
Esempio n. 10
0
		/// <summary> Create a tokenized and indexed field that is not stored. Term vectors will
		/// not be stored. This is useful for pre-analyzed fields.
		/// The TokenStream is read only when the Document is added to the index,
		/// i.e. you may not close the TokenStream until {@link IndexWriter#AddDocument(Document)}
		/// has been called.
		/// 
		/// </summary>
		/// <param name="name">The name of the field
		/// </param>
		/// <param name="tokenStream">The TokenStream with the content
		/// </param>
		/// <throws>  NullPointerException if name or tokenStream is <code>null</code> </throws>
		public Field(System.String name, TokenStream tokenStream):this(name, tokenStream, TermVector.NO)
		{
		}
Esempio n. 11
0
		/// <summary>Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true.
		/// May be combined with stored values from stringValue() or binaryValue() 
		/// </summary>
		public void  SetTokenStream(TokenStream tokenStream)
		{
			this.isIndexed = true;
			this.isTokenized = true;
			this.tokenStream = tokenStream;
		}
Esempio n. 12
0
		public void  SetValue(TokenStream value_Renamed)
		{
			if (isBinary)
			{
				throw new System.ArgumentException("cannot set a TokenStream value on a binary field");
			}
			if (isStored)
			{
				throw new System.ArgumentException("cannot set a TokenStream value on a stored field");
			}
			fieldsData = null;
			tokenStream = value_Renamed;
		}