public QueryTermVector(System.String queryString, Analyzer analyzer) { if (analyzer != null) { TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString)); if (stream != null) { System.Collections.ArrayList terms = new System.Collections.ArrayList(); try { bool hasMoreTokens = false; stream.Reset(); TermAttribute termAtt = (TermAttribute)stream.AddAttribute(typeof(TermAttribute)); hasMoreTokens = stream.IncrementToken(); while (hasMoreTokens) { terms.Add(termAtt.Term()); hasMoreTokens = stream.IncrementToken(); } ProcessTerms((System.String[])terms.ToArray(typeof(System.String))); } catch (System.IO.IOException e) { } } } }
/// <summary> Create a tokenized and indexed field that is not stored, optionally with /// storing term vectors. This is useful for pre-analyzed fields. /// The TokenStream is read only when the Document is added to the index, /// i.e. you may not close the TokenStream until {@link IndexWriter#AddDocument(Document)} /// has been called. /// /// </summary> /// <param name="name">The name of the field /// </param> /// <param name="tokenStream">The TokenStream with the content /// </param> /// <param name="termVector">Whether term vector should be stored /// </param> /// <throws> NullPointerException if name or tokenStream is <code>null</code> </throws> public Field(System.String name, TokenStream tokenStream, TermVector termVector) { if (name == null) { throw new System.NullReferenceException("name cannot be null"); } if (tokenStream == null) { throw new System.NullReferenceException("tokenStream cannot be null"); } this.name = StringHelper.Intern(name); // field names are interned this.fieldsData = null; this.tokenStream = tokenStream; this.isStored = false; this.isCompressed = false; this.isIndexed = true; this.isTokenized = true; this.isBinary = false; SetStoreTermVector(termVector); }
public void SetValue(TokenStream value_Renamed) { if (isBinary) { throw new System.ArgumentException("cannot set a TokenStream value on a binary field"); } if (isStored) { throw new System.ArgumentException("cannot set a TokenStream value on a stored field"); } fieldsData = null; tokenStream = value_Renamed; }
public override void ProcessFields(Fieldable[] fields, int count) { fieldState.Reset(docState.doc.GetBoost()); int maxFieldLength = docState.maxFieldLength; bool doInvert = consumer.Start(fields, count); for (int i = 0; i < count; i++) { Fieldable field = fields[i]; // TODO FI: this should be "genericized" to querying // consumer if it wants to see this particular field // tokenized. if (field.IsIndexed() && doInvert) { bool anyToken; if (fieldState.length > 0) { fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name); } if (!field.IsTokenized()) { // un-tokenized field System.String stringValue = field.StringValue(); int valueLength = stringValue.Length; perThread.singleTokenTokenStream.Reinit(stringValue, 0, valueLength); fieldState.attributeSource = perThread.singleTokenTokenStream; consumer.Start(field); bool success = false; try { consumer.Add(); success = true; } finally { if (!success) { docState.docWriter.SetAborting(); } } fieldState.offset += valueLength; fieldState.length++; fieldState.position++; anyToken = valueLength > 0; } else { // tokenized field TokenStream stream; TokenStream streamValue = field.TokenStreamValue(); if (streamValue != null) { stream = streamValue; } else { // the field does not have a TokenStream, // so we have to obtain one from the analyzer System.IO.TextReader reader; // find or make Reader System.IO.TextReader readerValue = field.ReaderValue(); if (readerValue != null) { reader = readerValue; } else { System.String stringValue = field.StringValue(); if (stringValue == null) { throw new System.ArgumentException("field must have either TokenStream, String or Reader value"); } perThread.stringReader.Init(stringValue); reader = perThread.stringReader; } // Tokenize field and add to postingTable stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader); } // reset the TokenStream to the first token stream.Reset(); int startLength = fieldState.length; // deprecated bool allowMinus1Position = docState.allowMinus1Position; try { int offsetEnd = fieldState.offset - 1; bool hasMoreTokens = stream.IncrementToken(); fieldState.attributeSource = stream; OffsetAttribute offsetAttribute = (OffsetAttribute)fieldState.attributeSource.AddAttribute(typeof(OffsetAttribute)); PositionIncrementAttribute posIncrAttribute = (PositionIncrementAttribute)fieldState.attributeSource.AddAttribute(typeof(PositionIncrementAttribute)); consumer.Start(field); for (; ;) { // If we hit an exception in stream.next below // (which is fairly common, eg if analyzer // chokes on a given document), then it's // non-aborting and (above) this one document // will be marked as deleted, but still // consume a docID if (!hasMoreTokens) { break; } int posIncr = posIncrAttribute.GetPositionIncrement(); fieldState.position += posIncr; if (allowMinus1Position || fieldState.position > 0) { fieldState.position--; } if (posIncr == 0) { fieldState.numOverlap++; } bool success = false; try { // If we hit an exception in here, we abort // all buffered documents since the last // flush, on the likelihood that the // internal state of the consumer is now // corrupt and should not be flushed to a // new segment: consumer.Add(); success = true; } finally { if (!success) { docState.docWriter.SetAborting(); } } fieldState.position++; offsetEnd = fieldState.offset + offsetAttribute.EndOffset(); if (++fieldState.length >= maxFieldLength) { if (docState.infoStream != null) { docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens"); } break; } hasMoreTokens = stream.IncrementToken(); } // trigger streams to perform end-of-stream operations stream.End(); fieldState.offset += offsetAttribute.EndOffset(); anyToken = fieldState.length > startLength; } finally { stream.Close(); } } if (anyToken) { fieldState.offset += docState.analyzer.GetOffsetGap(field); } fieldState.boost *= field.GetBoost(); } // LUCENE-2387: don't hang onto the field, so GC can // reclaim fields[i] = null; } consumer.Finish(); endConsumer.Finish(); }
/// <summary>Construct filtering <i>in</i>. </summary> public StandardFilter(TokenStream in_Renamed):base(in_Renamed) { termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); typeAtt = (TypeAttribute) AddAttribute(typeof(TypeAttribute)); }
/// <summary>Construct filtering <i>in</i>. </summary> public StandardFilter(TokenStream in_Renamed) : base(in_Renamed) { termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); typeAtt = (TypeAttribute)AddAttribute(typeof(TypeAttribute)); }
/// <summary> Create a tokenized and indexed field that is not stored. Term vectors will /// not be stored. This is useful for pre-analyzed fields. /// The TokenStream is read only when the Document is added to the index, /// i.e. you may not close the TokenStream until {@link IndexWriter#AddDocument(Document)} /// has been called. /// /// </summary> /// <param name="name">The name of the field /// </param> /// <param name="tokenStream">The TokenStream with the content /// </param> /// <throws> NullPointerException if name or tokenStream is <code>null</code> </throws> public Field(System.String name, TokenStream tokenStream) : this(name, tokenStream, TermVector.NO) { }
/// <summary>Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true. /// May be combined with stored values from stringValue() or binaryValue() /// </summary> public void SetTokenStream(TokenStream tokenStream) { this.isIndexed = true; this.isTokenized = true; this.tokenStream = tokenStream; }
/// <summary> Create a tokenized and indexed field that is not stored, optionally with /// storing term vectors. This is useful for pre-analyzed fields. /// The TokenStream is read only when the Document is added to the index, /// i.e. you may not close the TokenStream until {@link IndexWriter#AddDocument(Document)} /// has been called. /// /// </summary> /// <param name="name">The name of the field /// </param> /// <param name="tokenStream">The TokenStream with the content /// </param> /// <param name="termVector">Whether term vector should be stored /// </param> /// <throws> NullPointerException if name or tokenStream is <code>null</code> </throws> public Field(System.String name, TokenStream tokenStream, TermVector termVector) { if (name == null) throw new System.NullReferenceException("name cannot be null"); if (tokenStream == null) throw new System.NullReferenceException("tokenStream cannot be null"); this.name = StringHelper.Intern(name); // field names are interned this.fieldsData = null; this.tokenStream = tokenStream; this.isStored = false; this.isCompressed = false; this.isIndexed = true; this.isTokenized = true; this.isBinary = false; SetStoreTermVector(termVector); }
/// <summary> Create a tokenized and indexed field that is not stored. Term vectors will /// not be stored. This is useful for pre-analyzed fields. /// The TokenStream is read only when the Document is added to the index, /// i.e. you may not close the TokenStream until {@link IndexWriter#AddDocument(Document)} /// has been called. /// /// </summary> /// <param name="name">The name of the field /// </param> /// <param name="tokenStream">The TokenStream with the content /// </param> /// <throws> NullPointerException if name or tokenStream is <code>null</code> </throws> public Field(System.String name, TokenStream tokenStream):this(name, tokenStream, TermVector.NO) { }
/// <summary>Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true. /// May be combined with stored values from stringValue() or binaryValue() /// </summary> public void SetTokenStream(TokenStream tokenStream) { this.isIndexed = true; this.isTokenized = true; this.tokenStream = tokenStream; }
public void SetValue(TokenStream value_Renamed) { if (isBinary) { throw new System.ArgumentException("cannot set a TokenStream value on a binary field"); } if (isStored) { throw new System.ArgumentException("cannot set a TokenStream value on a stored field"); } fieldsData = null; tokenStream = value_Renamed; }