internal override bool Start(Fieldable[] fields, int count) { for (int i = 0; i < count; i++) if (fields[i].IsIndexed()) return true; return false; }
internal void AddDocument(Document doc) { indexStream.WriteLong(fieldsStream.GetFilePointer()); int storedCount = 0; System.Collections.IEnumerator fieldIterator = doc.GetFields().GetEnumerator(); while (fieldIterator.MoveNext()) { Fieldable field = (Fieldable)fieldIterator.Current; if (field.IsStored()) { storedCount++; } } fieldsStream.WriteVInt(storedCount); fieldIterator = doc.GetFields().GetEnumerator(); while (fieldIterator.MoveNext()) { Fieldable field = (Fieldable)fieldIterator.Current; if (field.IsStored()) { WriteField(fieldInfos.FieldInfo(field.Name()), field); } } }
internal override void Start(Fieldable f) { termAtt = (TermAttribute)fieldState.attributeSource.AddAttribute(typeof(TermAttribute)); consumer.Start(f); if (nextPerField != null) { nextPerField.Start(f); } }
/// <summary> Return the offsetGap from the analyzer assigned to field </summary> public override int GetOffsetGap(Mono.Lucene.Net.Documents.Fieldable field) { Analyzer analyzer = (Analyzer)analyzerMap[field.Name()]; if (analyzer == null) { analyzer = defaultAnalyzer; } return(analyzer.GetOffsetGap(field)); }
internal override void Start(Fieldable f) { if (doVectorOffsets) { offsetAttribute = (OffsetAttribute)fieldState.attributeSource.AddAttribute(typeof(OffsetAttribute)); } else { offsetAttribute = null; } }
internal override void Start(Fieldable f) { if (fieldState.attributeSource.HasAttribute(typeof(PayloadAttribute))) { payloadAttribute = (PayloadAttribute)fieldState.attributeSource.GetAttribute(typeof(PayloadAttribute)); } else { payloadAttribute = null; } }
/// <summary> Just like {@link #getPositionIncrementGap}, except for /// Token offsets instead. By default this returns 1 for /// tokenized fields and, as if the fields were joined /// with an extra space character, and 0 for un-tokenized /// fields. This method is only called if the field /// produced at least one token for indexing. /// /// </summary> /// <param name="field">the field just indexed /// </param> /// <returns> offset gap, added to the next token emitted from {@link #TokenStream(String,Reader)} /// </returns> public virtual int GetOffsetGap(Fieldable field) { if (field.IsTokenized()) { return(1); } else { return(0); } }
/// <summary>Adds field info for a Document. </summary> public void Add(Document doc) { lock (this) { System.Collections.IList fields = doc.GetFields(); System.Collections.IEnumerator fieldIterator = fields.GetEnumerator(); while (fieldIterator.MoveNext()) { Fieldable field = (Fieldable)fieldIterator.Current; Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf()); } } }
public void AddField(Fieldable field, FieldInfo fieldInfo) { if (doc == null) { doc = storedFieldsWriter.GetPerDoc(); doc.docID = docState.docID; localFieldsWriter.SetFieldsStream(doc.fdt); System.Diagnostics.Debug.Assert(doc.numStoredFields == 0, "doc.numStoredFields=" + doc.numStoredFields); System.Diagnostics.Debug.Assert(0 == doc.fdt.Length()); System.Diagnostics.Debug.Assert(0 == doc.fdt.GetFilePointer()); } localFieldsWriter.WriteField(fieldInfo, field); System.Diagnostics.Debug.Assert(docState.TestPoint("StoredFieldsWriterPerThread.processFields.writeField")); doc.numStoredFields++; }
internal override bool Start(Fieldable[] fields, int count) { doVectors = false; doVectorPositions = false; doVectorOffsets = false; for (int i = 0; i < count; i++) { Fieldable field = fields[i]; if (field.IsIndexed() && field.IsTermVectorStored()) { doVectors = true; doVectorPositions |= field.IsStorePositionWithTermVector(); doVectorOffsets |= field.IsStoreOffsetWithTermVector(); } } if (doVectors) { if (perThread.doc == null) { perThread.doc = termsWriter.GetPerDoc(); perThread.doc.docID = docState.docID; System.Diagnostics.Debug.Assert(perThread.doc.numVectorFields == 0); System.Diagnostics.Debug.Assert(0 == perThread.doc.perDocTvf.Length()); System.Diagnostics.Debug.Assert(0 == perThread.doc.perDocTvf.GetFilePointer()); } System.Diagnostics.Debug.Assert(perThread.doc.docID == docState.docID); if (termsHashPerField.numPostings != 0) { // Only necessary if previous doc hit a // non-aborting exception while writing vectors in // this field: termsHashPerField.Reset(); perThread.termsHashPerThread.Reset(false); } } // TODO: only if needed for performance //perThread.postingsCount = 0; return doVectors; }
internal override bool Start(Fieldable[] fields, int count) { doVectors = false; doVectorPositions = false; doVectorOffsets = false; for (int i = 0; i < count; i++) { Fieldable field = fields[i]; if (field.IsIndexed() && field.IsTermVectorStored()) { doVectors = true; doVectorPositions |= field.IsStorePositionWithTermVector(); doVectorOffsets |= field.IsStoreOffsetWithTermVector(); } } if (doVectors) { if (perThread.doc == null) { perThread.doc = termsWriter.GetPerDoc(); perThread.doc.docID = docState.docID; System.Diagnostics.Debug.Assert(perThread.doc.numVectorFields == 0); System.Diagnostics.Debug.Assert(0 == perThread.doc.perDocTvf.Length()); System.Diagnostics.Debug.Assert(0 == perThread.doc.perDocTvf.GetFilePointer()); } System.Diagnostics.Debug.Assert(perThread.doc.docID == docState.docID); if (termsHashPerField.numPostings != 0) { // Only necessary if previous doc hit a // non-aborting exception while writing vectors in // this field: termsHashPerField.Reset(); perThread.termsHashPerThread.Reset(false); } } // TODO: only if needed for performance //perThread.postingsCount = 0; return(doVectors); }
/// <summary>Processes all occurrences of a single field </summary> public abstract void ProcessFields(Fieldable[] fields, int count);
internal void WriteField(FieldInfo fi, Fieldable field) { // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode // and field.binaryValue() already returns the compressed value for a field // with isCompressed()==true, so we disable compression in that case bool disableCompression = (field is FieldsReader.FieldForMerge); fieldsStream.WriteVInt(fi.number); byte bits = 0; if (field.IsTokenized()) { bits |= FieldsWriter.FIELD_IS_TOKENIZED; } if (field.IsBinary()) { bits |= FieldsWriter.FIELD_IS_BINARY; } if (field.IsCompressed()) { bits |= FieldsWriter.FIELD_IS_COMPRESSED; } fieldsStream.WriteByte(bits); if (field.IsCompressed()) { // compression is enabled for the current field byte[] data; int len; int offset; if (disableCompression) { // optimized case for merging, the data // is already compressed data = field.GetBinaryValue(); System.Diagnostics.Debug.Assert(data != null); len = field.GetBinaryLength(); offset = field.GetBinaryOffset(); } else { // check if it is a binary field if (field.IsBinary()) { data = CompressionTools.Compress(field.GetBinaryValue(), field.GetBinaryOffset(), field.GetBinaryLength()); } else { byte[] x = System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue()); data = CompressionTools.Compress(x, 0, x.Length); } len = data.Length; offset = 0; } fieldsStream.WriteVInt(len); fieldsStream.WriteBytes(data, offset, len); } else { // compression is disabled for the current field if (field.IsBinary()) { byte[] data; int len; int offset; data = field.GetBinaryValue(); len = field.GetBinaryLength(); offset = field.GetBinaryOffset(); fieldsStream.WriteVInt(len); fieldsStream.WriteBytes(data, offset, len); } else { fieldsStream.WriteString(field.StringValue()); } } }
internal override bool Start(Fieldable[] fields, int count) { doCall = consumer.Start(fields, count); if (nextPerField != null) doNextCall = nextPerField.Start(fields, count); return doCall || doNextCall; }
internal override void Start(Fieldable f) { if (fieldState.attributeSource.HasAttribute(typeof(PayloadAttribute))) { payloadAttribute = (PayloadAttribute) fieldState.attributeSource.GetAttribute(typeof(PayloadAttribute)); } else { payloadAttribute = null; } }
public override void ProcessFields(Fieldable[] fields, int count) { one.ProcessFields(fields, count); two.ProcessFields(fields, count); }
/// <summary> Just like {@link #getPositionIncrementGap}, except for /// Token offsets instead. By default this returns 1 for /// tokenized fields and, as if the fields were joined /// with an extra space character, and 0 for un-tokenized /// fields. This method is only called if the field /// produced at least one token for indexing. /// /// </summary> /// <param name="field">the field just indexed /// </param> /// <returns> offset gap, added to the next token emitted from {@link #TokenStream(String,Reader)} /// </returns> public virtual int GetOffsetGap(Fieldable field) { if (field.IsTokenized()) return 1; else return 0; }
public override void ProcessFields(Fieldable[] fields, int count) { fieldState.Reset(docState.doc.GetBoost()); int maxFieldLength = docState.maxFieldLength; bool doInvert = consumer.Start(fields, count); for (int i = 0; i < count; i++) { Fieldable field = fields[i]; // TODO FI: this should be "genericized" to querying // consumer if it wants to see this particular field // tokenized. if (field.IsIndexed() && doInvert) { bool anyToken; if (fieldState.length > 0) { fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name); } if (!field.IsTokenized()) { // un-tokenized field System.String stringValue = field.StringValue(); int valueLength = stringValue.Length; perThread.singleTokenTokenStream.Reinit(stringValue, 0, valueLength); fieldState.attributeSource = perThread.singleTokenTokenStream; consumer.Start(field); bool success = false; try { consumer.Add(); success = true; } finally { if (!success) { docState.docWriter.SetAborting(); } } fieldState.offset += valueLength; fieldState.length++; fieldState.position++; anyToken = valueLength > 0; } else { // tokenized field TokenStream stream; TokenStream streamValue = field.TokenStreamValue(); if (streamValue != null) { stream = streamValue; } else { // the field does not have a TokenStream, // so we have to obtain one from the analyzer System.IO.TextReader reader; // find or make Reader System.IO.TextReader readerValue = field.ReaderValue(); if (readerValue != null) { reader = readerValue; } else { System.String stringValue = field.StringValue(); if (stringValue == null) { throw new System.ArgumentException("field must have either TokenStream, String or Reader value"); } perThread.stringReader.Init(stringValue); reader = perThread.stringReader; } // Tokenize field and add to postingTable stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader); } // reset the TokenStream to the first token stream.Reset(); int startLength = fieldState.length; // deprecated bool allowMinus1Position = docState.allowMinus1Position; try { int offsetEnd = fieldState.offset - 1; bool hasMoreTokens = stream.IncrementToken(); fieldState.attributeSource = stream; OffsetAttribute offsetAttribute = (OffsetAttribute)fieldState.attributeSource.AddAttribute(typeof(OffsetAttribute)); PositionIncrementAttribute posIncrAttribute = (PositionIncrementAttribute)fieldState.attributeSource.AddAttribute(typeof(PositionIncrementAttribute)); consumer.Start(field); for (; ;) { // If we hit an exception in stream.next below // (which is fairly common, eg if analyzer // chokes on a given document), then it's // non-aborting and (above) this one document // will be marked as deleted, but still // consume a docID if (!hasMoreTokens) { break; } int posIncr = posIncrAttribute.GetPositionIncrement(); fieldState.position += posIncr; if (allowMinus1Position || fieldState.position > 0) { fieldState.position--; } if (posIncr == 0) { fieldState.numOverlap++; } bool success = false; try { // If we hit an exception in here, we abort // all buffered documents since the last // flush, on the likelihood that the // internal state of the consumer is now // corrupt and should not be flushed to a // new segment: consumer.Add(); success = true; } finally { if (!success) { docState.docWriter.SetAborting(); } } fieldState.position++; offsetEnd = fieldState.offset + offsetAttribute.EndOffset(); if (++fieldState.length >= maxFieldLength) { if (docState.infoStream != null) { docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens"); } break; } hasMoreTokens = stream.IncrementToken(); } // trigger streams to perform end-of-stream operations stream.End(); fieldState.offset += offsetAttribute.EndOffset(); anyToken = fieldState.length > startLength; } finally { stream.Close(); } } if (anyToken) { fieldState.offset += docState.analyzer.GetOffsetGap(field); } fieldState.boost *= field.GetBoost(); } // LUCENE-2387: don't hang onto the field, so GC can // reclaim fields[i] = null; } consumer.Finish(); endConsumer.Finish(); }
public override DocumentsWriter.DocWriter ProcessDocument() { consumer.StartDocument(); fieldsWriter.StartDocument(); Document doc = docState.doc; System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start")); fieldCount = 0; int thisFieldGen = fieldGen++; System.Collections.IList docFields = doc.GetFields(); int numDocFields = docFields.Count; // Absorb any new fields first seen in this document. // Also absorb any changes to fields we had already // seen before (eg suddenly turning on norms or // vectors, etc.): for (int i = 0; i < numDocFields; i++) { Fieldable field = (Fieldable) docFields[i]; System.String fieldName = field.Name(); // Make sure we have a PerField allocated int hashPos = fieldName.GetHashCode() & hashMask; DocFieldProcessorPerField fp = fieldHash[hashPos]; while (fp != null && !fp.fieldInfo.name.Equals(fieldName)) fp = fp.next; if (fp == null) { // TODO FI: we need to genericize the "flags" that a // field holds, and, how these flags are merged; it // needs to be more "pluggable" such that if I want // to have a new "thing" my Fields can do, I can // easily add it FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf()); fp = new DocFieldProcessorPerField(this, fi); fp.next = fieldHash[hashPos]; fieldHash[hashPos] = fp; totalFieldCount++; if (totalFieldCount >= fieldHash.Length / 2) Rehash(); } else fp.fieldInfo.Update(field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf()); if (thisFieldGen != fp.lastGen) { // First time we're seeing this field for this doc fp.fieldCount = 0; if (fieldCount == fields.Length) { int newSize = fields.Length * 2; DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize]; Array.Copy(fields, 0, newArray, 0, fieldCount); fields = newArray; } fields[fieldCount++] = fp; fp.lastGen = thisFieldGen; } if (fp.fieldCount == fp.fields.Length) { Fieldable[] newArray = new Fieldable[fp.fields.Length * 2]; Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount); fp.fields = newArray; } fp.fields[fp.fieldCount++] = field; if (field.IsStored()) { fieldsWriter.AddField(field, fp.fieldInfo); } } // If we are writing vectors then we must visit // fields in sorted order so they are written in // sorted order. TODO: we actually only need to // sort the subset of fields that have vectors // enabled; we could save [small amount of] CPU // here. QuickSort(fields, 0, fieldCount - 1); for (int i = 0; i < fieldCount; i++) fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount); if (docState.maxTermPrefix != null && docState.infoStream != null) { docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'"); docState.maxTermPrefix = null; } DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument(); DocumentsWriter.DocWriter two = consumer.FinishDocument(); if (one == null) { return two; } else if (two == null) { return one; } else { PerDoc both = GetPerDoc(); both.docID = docState.docID; System.Diagnostics.Debug.Assert(one.docID == docState.docID); System.Diagnostics.Debug.Assert(two.docID == docState.docID); both.one = one; both.two = two; return both; } }
internal void WriteField(FieldInfo fi, Fieldable field) { // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode // and field.binaryValue() already returns the compressed value for a field // with isCompressed()==true, so we disable compression in that case bool disableCompression = (field is FieldsReader.FieldForMerge); fieldsStream.WriteVInt(fi.number); byte bits = 0; if (field.IsTokenized()) bits |= FieldsWriter.FIELD_IS_TOKENIZED; if (field.IsBinary()) bits |= FieldsWriter.FIELD_IS_BINARY; if (field.IsCompressed()) bits |= FieldsWriter.FIELD_IS_COMPRESSED; fieldsStream.WriteByte(bits); if (field.IsCompressed()) { // compression is enabled for the current field byte[] data; int len; int offset; if (disableCompression) { // optimized case for merging, the data // is already compressed data = field.GetBinaryValue(); System.Diagnostics.Debug.Assert(data != null); len = field.GetBinaryLength(); offset = field.GetBinaryOffset(); } else { // check if it is a binary field if (field.IsBinary()) { data = CompressionTools.Compress(field.GetBinaryValue(), field.GetBinaryOffset(), field.GetBinaryLength()); } else { byte[] x = System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue()); data = CompressionTools.Compress(x, 0, x.Length); } len = data.Length; offset = 0; } fieldsStream.WriteVInt(len); fieldsStream.WriteBytes(data, offset, len); } else { // compression is disabled for the current field if (field.IsBinary()) { byte[] data; int len; int offset; data = field.GetBinaryValue(); len = field.GetBinaryLength(); offset = field.GetBinaryOffset(); fieldsStream.WriteVInt(len); fieldsStream.WriteBytes(data, offset, len); } else { fieldsStream.WriteString(field.StringValue()); } } }
internal abstract void Start(Fieldable field);
// Called once per field, and is given all Fieldable // occurrences for this field in the document. Return // true if you wish to see inverted tokens for these // fields: internal abstract bool Start(Fieldable[] fields, int count);
// Called before a field instance is being processed internal abstract void Start(Fieldable field);
public override DocumentsWriter.DocWriter ProcessDocument() { consumer.StartDocument(); fieldsWriter.StartDocument(); Document doc = docState.doc; System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start")); fieldCount = 0; int thisFieldGen = fieldGen++; System.Collections.IList docFields = doc.GetFields(); int numDocFields = docFields.Count; // Absorb any new fields first seen in this document. // Also absorb any changes to fields we had already // seen before (eg suddenly turning on norms or // vectors, etc.): for (int i = 0; i < numDocFields; i++) { Fieldable field = (Fieldable)docFields[i]; System.String fieldName = field.Name(); // Make sure we have a PerField allocated int hashPos = fieldName.GetHashCode() & hashMask; DocFieldProcessorPerField fp = fieldHash[hashPos]; while (fp != null && !fp.fieldInfo.name.Equals(fieldName)) { fp = fp.next; } if (fp == null) { // TODO FI: we need to genericize the "flags" that a // field holds, and, how these flags are merged; it // needs to be more "pluggable" such that if I want // to have a new "thing" my Fields can do, I can // easily add it FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf()); fp = new DocFieldProcessorPerField(this, fi); fp.next = fieldHash[hashPos]; fieldHash[hashPos] = fp; totalFieldCount++; if (totalFieldCount >= fieldHash.Length / 2) { Rehash(); } } else { fp.fieldInfo.Update(field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf()); } if (thisFieldGen != fp.lastGen) { // First time we're seeing this field for this doc fp.fieldCount = 0; if (fieldCount == fields.Length) { int newSize = fields.Length * 2; DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize]; Array.Copy(fields, 0, newArray, 0, fieldCount); fields = newArray; } fields[fieldCount++] = fp; fp.lastGen = thisFieldGen; } if (fp.fieldCount == fp.fields.Length) { Fieldable[] newArray = new Fieldable[fp.fields.Length * 2]; Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount); fp.fields = newArray; } fp.fields[fp.fieldCount++] = field; if (field.IsStored()) { fieldsWriter.AddField(field, fp.fieldInfo); } } // If we are writing vectors then we must visit // fields in sorted order so they are written in // sorted order. TODO: we actually only need to // sort the subset of fields that have vectors // enabled; we could save [small amount of] CPU // here. QuickSort(fields, 0, fieldCount - 1); for (int i = 0; i < fieldCount; i++) { fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount); } if (docState.maxTermPrefix != null && docState.infoStream != null) { docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'"); docState.maxTermPrefix = null; } DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument(); DocumentsWriter.DocWriter two = consumer.FinishDocument(); if (one == null) { return(two); } else if (two == null) { return(one); } else { PerDoc both = GetPerDoc(); both.docID = docState.docID; System.Diagnostics.Debug.Assert(one.docID == docState.docID); System.Diagnostics.Debug.Assert(two.docID == docState.docID); both.one = one; both.two = two; return(both); } }
internal override void Start(Fieldable f) { termAtt = (TermAttribute) fieldState.attributeSource.AddAttribute(typeof(TermAttribute)); consumer.Start(f); if (nextPerField != null) { nextPerField.Start(f); } }
internal override void Start(Fieldable f) { if (doVectorOffsets) { offsetAttribute = (OffsetAttribute) fieldState.attributeSource.AddAttribute(typeof(OffsetAttribute)); } else { offsetAttribute = null; } }
public override void ProcessFields(Fieldable[] fields, int count) { fieldState.Reset(docState.doc.GetBoost()); int maxFieldLength = docState.maxFieldLength; bool doInvert = consumer.Start(fields, count); for (int i = 0; i < count; i++) { Fieldable field = fields[i]; // TODO FI: this should be "genericized" to querying // consumer if it wants to see this particular field // tokenized. if (field.IsIndexed() && doInvert) { bool anyToken; if (fieldState.length > 0) fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name); if (!field.IsTokenized()) { // un-tokenized field System.String stringValue = field.StringValue(); int valueLength = stringValue.Length; perThread.singleTokenTokenStream.Reinit(stringValue, 0, valueLength); fieldState.attributeSource = perThread.singleTokenTokenStream; consumer.Start(field); bool success = false; try { consumer.Add(); success = true; } finally { if (!success) docState.docWriter.SetAborting(); } fieldState.offset += valueLength; fieldState.length++; fieldState.position++; anyToken = valueLength > 0; } else { // tokenized field TokenStream stream; TokenStream streamValue = field.TokenStreamValue(); if (streamValue != null) stream = streamValue; else { // the field does not have a TokenStream, // so we have to obtain one from the analyzer System.IO.TextReader reader; // find or make Reader System.IO.TextReader readerValue = field.ReaderValue(); if (readerValue != null) reader = readerValue; else { System.String stringValue = field.StringValue(); if (stringValue == null) throw new System.ArgumentException("field must have either TokenStream, String or Reader value"); perThread.stringReader.Init(stringValue); reader = perThread.stringReader; } // Tokenize field and add to postingTable stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader); } // reset the TokenStream to the first token stream.Reset(); int startLength = fieldState.length; // deprecated bool allowMinus1Position = docState.allowMinus1Position; try { int offsetEnd = fieldState.offset - 1; bool hasMoreTokens = stream.IncrementToken(); fieldState.attributeSource = stream; OffsetAttribute offsetAttribute = (OffsetAttribute) fieldState.attributeSource.AddAttribute(typeof(OffsetAttribute)); PositionIncrementAttribute posIncrAttribute = (PositionIncrementAttribute) fieldState.attributeSource.AddAttribute(typeof(PositionIncrementAttribute)); consumer.Start(field); for (; ; ) { // If we hit an exception in stream.next below // (which is fairly common, eg if analyzer // chokes on a given document), then it's // non-aborting and (above) this one document // will be marked as deleted, but still // consume a docID if (!hasMoreTokens) break; int posIncr = posIncrAttribute.GetPositionIncrement(); fieldState.position += posIncr; if (allowMinus1Position || fieldState.position > 0) { fieldState.position--; } if (posIncr == 0) fieldState.numOverlap++; bool success = false; try { // If we hit an exception in here, we abort // all buffered documents since the last // flush, on the likelihood that the // internal state of the consumer is now // corrupt and should not be flushed to a // new segment: consumer.Add(); success = true; } finally { if (!success) docState.docWriter.SetAborting(); } fieldState.position++; offsetEnd = fieldState.offset + offsetAttribute.EndOffset(); if (++fieldState.length >= maxFieldLength) { if (docState.infoStream != null) docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens"); break; } hasMoreTokens = stream.IncrementToken(); } // trigger streams to perform end-of-stream operations stream.End(); fieldState.offset += offsetAttribute.EndOffset(); anyToken = fieldState.length > startLength; } finally { stream.Close(); } } if (anyToken) fieldState.offset += docState.analyzer.GetOffsetGap(field); fieldState.boost *= field.GetBoost(); } // LUCENE-2387: don't hang onto the field, so GC can // reclaim fields[i] = null; } consumer.Finish(); endConsumer.Finish(); }