Esempi di codice in C# (CSharp) per Lucene.Net.Documents.Fieldable

Esempio n. 1

0

Mostra file

File: StoredFieldsWriterPerField.cs Progetto: cqm0609/lucene-file-finder

        // Process all occurrences of a single field in one doc;
        // count is 1 if a given field occurs only once in the
        // Document, which is the "typical" case
        internal override void processFields(Fieldable[] fields, int count)
        {
            StoredFieldsWriter.PerDoc doc;
            if (perThread.doc == null)
            {
                doc = perThread.doc = perThread.storedFieldsWriter.getPerDoc();
                doc.docID = docState.docID;
                perThread.localFieldsWriter.SetFieldsStream(doc.fdt);
                System.Diagnostics.Debug.Assert(doc.numStoredFields == 0, "doc.numStoredFields=" + doc.numStoredFields);
                System.Diagnostics.Debug.Assert(0 == doc.fdt.Length());
                System.Diagnostics.Debug.Assert(0 == doc.fdt.GetFilePointer());
            }
            else
            {
                doc = perThread.doc;
                System.Diagnostics.Debug.Assert(doc.docID == docState.docID, "doc.docID=" + doc.docID + " docState.docID=" + docState.docID);
            }

            for (int i = 0; i < count; i++)
            {
                Fieldable field = fields[i];
                if (field.IsStored())
                {
                    perThread.localFieldsWriter.WriteField(fieldInfo, field);
                    System.Diagnostics.Debug.Assert(docState.TestPoint("StoredFieldsWriterPerField.processFields.writeField"));
                    doc.numStoredFields++;
                }
            }
        }

Esempio n. 2

0

Mostra file

        internal void  AddDocument(Document doc)
        {
            indexStream.WriteLong(fieldsStream.GetFilePointer());

            int storedCount = 0;

            System.Collections.IEnumerator fieldIterator = doc.GetFields().GetEnumerator();
            while (fieldIterator.MoveNext())
            {
                Fieldable field = (Fieldable)fieldIterator.Current;
                if (field.IsStored())
                {
                    storedCount++;
                }
            }
            fieldsStream.WriteVInt(storedCount);

            fieldIterator = doc.GetFields().GetEnumerator();
            while (fieldIterator.MoveNext())
            {
                Fieldable field = (Fieldable)fieldIterator.Current;
                if (field.IsStored())
                {
                    WriteField(fieldInfos.FieldInfo(field.Name()), field);
                }
            }
        }

Esempio n. 3

0

Mostra file

File: FreqProxTermsWriterPerField.cs Progetto: Inzaghi2012/teamlab.v7.5

		internal override bool Start(Fieldable[] fields, int count)
		{
			for (int i = 0; i < count; i++)
				if (fields[i].IsIndexed())
					return true;
			return false;
		}

Esempio n. 4

0

Mostra file

File: Analyzer.cs Progetto: sinsay/SSE

 /// <summary> Just like <see cref="GetPositionIncrementGap" />, except for
 /// Token offsets instead.  By default this returns 1 for
 /// tokenized fields and, as if the fields were joined
 /// with an extra space character, and 0 for un-tokenized
 /// fields.  This method is only called if the field
 /// produced at least one token for indexing.
 /// 
 /// </summary>
 /// <param name="field">the field just indexed
 /// </param>
 /// <returns> offset gap, added to the next token emitted from <see cref="TokenStream(String,System.IO.TextReader)" />
 /// </returns>
 public virtual int GetOffsetGap(Fieldable field)
 {
     if (field.IsTokenized())
         return 1;
     else
         return 0;
 }

Esempio n. 5

0

Mostra file

File: StoredFieldsWriterPerField.cs Progetto: restran/lucene-file-finder

        // Process all occurrences of a single field in one doc;
        // count is 1 if a given field occurs only once in the
        // Document, which is the "typical" case
        internal override void processFields(Fieldable[] fields, int count)
        {
            StoredFieldsWriter.PerDoc doc;
            if (perThread.doc == null)
            {
                doc       = perThread.doc = perThread.storedFieldsWriter.getPerDoc();
                doc.docID = docState.docID;
                perThread.localFieldsWriter.SetFieldsStream(doc.fdt);
                System.Diagnostics.Debug.Assert(doc.numStoredFields == 0, "doc.numStoredFields=" + doc.numStoredFields);
                System.Diagnostics.Debug.Assert(0 == doc.fdt.Length());
                System.Diagnostics.Debug.Assert(0 == doc.fdt.GetFilePointer());
            }
            else
            {
                doc = perThread.doc;
                System.Diagnostics.Debug.Assert(doc.docID == docState.docID, "doc.docID=" + doc.docID + " docState.docID=" + docState.docID);
            }

            for (int i = 0; i < count; i++)
            {
                Fieldable field = fields[i];
                if (field.IsStored())
                {
                    perThread.localFieldsWriter.WriteField(fieldInfo, field);
                    System.Diagnostics.Debug.Assert(docState.TestPoint("StoredFieldsWriterPerField.processFields.writeField"));
                    doc.numStoredFields++;
                }
            }
        }

Esempio n. 6

0

Mostra file

File: FieldsWriter.cs Progetto: vikasraz/indexsearchutils

		internal void  WriteField(FieldInfo fi, Fieldable field)
		{
			// if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
			// and field.binaryValue() already returns the compressed value for a field
			// with isCompressed()==true, so we disable compression in that case
			bool disableCompression = (field is FieldsReader.FieldForMerge);
			fieldsStream.WriteVInt(fi.number);
			byte bits = 0;
			if (field.IsTokenized())
				bits |= FieldsWriter.FIELD_IS_TOKENIZED;
			if (field.IsBinary())
				bits |= FieldsWriter.FIELD_IS_BINARY;
			if (field.IsCompressed())
				bits |= FieldsWriter.FIELD_IS_COMPRESSED;
			
			fieldsStream.WriteByte(bits);
			
			if (field.IsCompressed())
			{
				// compression is enabled for the current field
				byte[] data = null;
				
				if (disableCompression)
				{
					// optimized case for merging, the data
					// is already compressed
					data = field.BinaryValue();
				}
				else
				{
					// check if it is a binary field
					if (field.IsBinary())
					{
						data = Compress(field.BinaryValue());
					}
					else
					{
						data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue()));
					}
				}
				int len = data.Length;
				fieldsStream.WriteVInt(len);
				fieldsStream.WriteBytes(data, len);
			}
			else
			{
				// compression is disabled for the current field
				if (field.IsBinary())
				{
					byte[] data = field.BinaryValue();
					int len = data.Length;
					fieldsStream.WriteVInt(len);
					fieldsStream.WriteBytes(data, len);
				}
				else
				{
					fieldsStream.WriteString(field.StringValue());
				}
			}
		}

Esempio n. 7

0

Mostra file

File: TermsHashPerField.cs Progetto: thinhtp/liteweb.info

 internal override void  Start(Fieldable f)
 {
     termAtt = (TermAttribute)fieldState.attributeSource.AddAttribute(typeof(TermAttribute));
     consumer.Start(f);
     if (nextPerField != null)
     {
         nextPerField.Start(f);
     }
 }

Esempio n. 8

0

Mostra file

 /// <summary>Adds field info for a Document. </summary>
 public void  Add(Document doc)
 {
     System.Collections.IList       fields        = doc.GetFields();
     System.Collections.IEnumerator fieldIterator = fields.GetEnumerator();
     while (fieldIterator.MoveNext())
     {
         Fieldable field = (Fieldable)fieldIterator.Current;
         Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms());
     }
 }

Esempio n. 9

0

Mostra file

        /// <summary> Return the offsetGap from the analyzer assigned to field </summary>
        public override int GetOffsetGap(Lucene.Net.Documents.Fieldable field)
        {
            Analyzer analyzer = (Analyzer)analyzerMap[field.Name()];

            if (analyzer == null)
            {
                analyzer = defaultAnalyzer;
            }
            return(analyzer.GetOffsetGap(field));
        }

Esempio n. 10

0

Mostra file

File: FreqProxTermsWriterPerField.cs Progetto: stgwilli/ravendb

 internal override void  Start(Fieldable f)
 {
     if (fieldState.attributeSource.HasAttribute(typeof(PayloadAttribute)))
     {
         payloadAttribute = (PayloadAttribute)fieldState.attributeSource.GetAttribute(typeof(PayloadAttribute));
     }
     else
     {
         payloadAttribute = null;
     }
 }

Esempio n. 11

0

Mostra file

File: TermVectorsTermsWriterPerField.cs Progetto: stgwilli/ravendb

 internal override void  Start(Fieldable f)
 {
     if (doVectorOffsets)
     {
         offsetAttribute = (OffsetAttribute)fieldState.attributeSource.AddAttribute(typeof(OffsetAttribute));
     }
     else
     {
         offsetAttribute = null;
     }
 }

Esempio n. 12

0

Mostra file

File: Analyzer.cs Progetto: vivekshimpi01/lucene.net

 /// <summary> Just like {@link #getPositionIncrementGap}, except for
 /// Token offsets instead.  By default this returns 1 for
 /// tokenized fields and, as if the fields were joined
 /// with an extra space character, and 0 for un-tokenized
 /// fields.  This method is only called if the field
 /// produced at least one token for indexing.
 ///
 /// </summary>
 /// <param name="field">the field just indexed
 /// </param>
 /// <returns> offset gap, added to the next token emitted from {@link #TokenStream(String,Reader)}
 /// </returns>
 public virtual int GetOffsetGap(Fieldable field)
 {
     if (field.IsTokenized())
     {
         return(1);
     }
     else
     {
         return(0);
     }
 }

Esempio n. 13

0

Mostra file

File: StoredFieldsWriterPerThread.cs Progetto: thinhtp/liteweb.info

        public void  AddField(Fieldable field, FieldInfo fieldInfo)
        {
            if (doc == null)
            {
                doc       = storedFieldsWriter.GetPerDoc();
                doc.docID = docState.docID;
                localFieldsWriter.SetFieldsStream(doc.fdt);
                System.Diagnostics.Debug.Assert(doc.numStoredFields == 0, "doc.numStoredFields=" + doc.numStoredFields);
                System.Diagnostics.Debug.Assert(0 == doc.fdt.Length());
                System.Diagnostics.Debug.Assert(0 == doc.fdt.GetFilePointer());
            }

            localFieldsWriter.WriteField(fieldInfo, field);
            System.Diagnostics.Debug.Assert(docState.TestPoint("StoredFieldsWriterPerThread.processFields.writeField"));
            doc.numStoredFields++;
        }

Esempio n. 14

0

Mostra file

File: StoredFieldsWriterPerThread.cs Progetto: Rationalle/ravendb

		public void  AddField(Fieldable field, FieldInfo fieldInfo)
		{
			if (doc == null)
			{
				doc = storedFieldsWriter.GetPerDoc();
				doc.docID = docState.docID;
				localFieldsWriter.SetFieldsStream(doc.fdt);
				System.Diagnostics.Debug.Assert(doc.numStoredFields == 0, "doc.numStoredFields=" + doc.numStoredFields);
				System.Diagnostics.Debug.Assert(0 == doc.fdt.Length());
				System.Diagnostics.Debug.Assert(0 == doc.fdt.GetFilePointer());
			}
			
			localFieldsWriter.WriteField(fieldInfo, field);
			System.Diagnostics.Debug.Assert(docState.TestPoint("StoredFieldsWriterPerThread.processFields.writeField"));
			doc.numStoredFields++;
		}

Esempio n. 15

0

Mostra file

File: TermVectorsTermsWriterPerField.cs Progetto: stgwilli/ravendb

        internal override bool Start(Fieldable[] fields, int count)
        {
            doVectors         = false;
            doVectorPositions = false;
            doVectorOffsets   = false;

            for (int i = 0; i < count; i++)
            {
                Fieldable field = fields[i];
                if (field.IsIndexed() && field.IsTermVectorStored())
                {
                    doVectors          = true;
                    doVectorPositions |= field.IsStorePositionWithTermVector();
                    doVectorOffsets   |= field.IsStoreOffsetWithTermVector();
                }
            }

            if (doVectors)
            {
                if (perThread.doc == null)
                {
                    perThread.doc       = termsWriter.GetPerDoc();
                    perThread.doc.docID = docState.docID;
                    System.Diagnostics.Debug.Assert(perThread.doc.numVectorFields == 0);
                    System.Diagnostics.Debug.Assert(0 == perThread.doc.tvf.Length());
                    System.Diagnostics.Debug.Assert(0 == perThread.doc.tvf.GetFilePointer());
                }
                else
                {
                    System.Diagnostics.Debug.Assert(perThread.doc.docID == docState.docID);

                    if (termsHashPerField.numPostings != 0)
                    {
                        // Only necessary if previous doc hit a
                        // non-aborting exception while writing vectors in
                        // this field:
                        termsHashPerField.Reset();
                    }
                }
            }

            // TODO: only if needed for performance
            //perThread.postingsCount = 0;

            return(doVectors);
        }

Esempio n. 16

0

Mostra file

        public virtual void  TestDocument()
        {
            Assert.IsTrue(reader.NumDocs() == 1);
            Assert.IsTrue(reader.MaxDoc() >= 1);
            Document result = reader.Document(0);

            Assert.IsTrue(result != null);
            //There are 2 unstored fields on the document that are not preserved across writing
            Assert.IsTrue(DocHelper.NumFields(result) == DocHelper.NumFields(testDoc) - DocHelper.unstored.Count);

            System.Collections.IList fields = result.GetFields();
            for (System.Collections.IEnumerator iter = fields.GetEnumerator(); iter.MoveNext();)
            {
                Fieldable field = (Fieldable)iter.Current;
                Assert.IsTrue(field != null);
                Assert.IsTrue(DocHelper.nameValues.Contains(field.Name()));
            }
        }

Esempio n. 17

0

Mostra file

File: TermVectorsTermsWriterPerField.cs Progetto: Rationalle/ravendb

		internal override bool Start(Fieldable[] fields, int count)
		{
			doVectors = false;
			doVectorPositions = false;
			doVectorOffsets = false;
			
			for (int i = 0; i < count; i++)
			{
				Fieldable field = fields[i];
				if (field.IsIndexed() && field.IsTermVectorStored())
				{
					doVectors = true;
					doVectorPositions |= field.IsStorePositionWithTermVector();
					doVectorOffsets |= field.IsStoreOffsetWithTermVector();
				}
			}
			
			if (doVectors)
			{
				if (perThread.doc == null)
				{
					perThread.doc = termsWriter.GetPerDoc();
					perThread.doc.docID = docState.docID;
					System.Diagnostics.Debug.Assert(perThread.doc.numVectorFields == 0);
					System.Diagnostics.Debug.Assert(0 == perThread.doc.tvf.Length());
					System.Diagnostics.Debug.Assert(0 == perThread.doc.tvf.GetFilePointer());
				}
				else
				{
					System.Diagnostics.Debug.Assert(perThread.doc.docID == docState.docID);
					
					if (termsHashPerField.numPostings != 0)
					// Only necessary if previous doc hit a
					// non-aborting exception while writing vectors in
					// this field:
						termsHashPerField.Reset();
				}
			}
			
			// TODO: only if needed for performance
			//perThread.postingsCount = 0;
			
			return doVectors;
		}

Esempio n. 18

0

Mostra file

 public static void  CheckNorms(IndexReader reader)
 {
     // test omit norms
     for (int i = 0; i < DocHelper.fields.Length; i++)
     {
         Fieldable f = DocHelper.fields[i];
         if (f.IsIndexed())
         {
             Assert.AreEqual(reader.HasNorms(f.Name()), !f.GetOmitNorms());
             Assert.AreEqual(reader.HasNorms(f.Name()), !DocHelper.noNorms.Contains(f.Name()));
             if (!reader.HasNorms(f.Name()))
             {
                 // test for fake norms of 1.0 or null depending on the flag
                 byte[] norms = reader.Norms(f.Name());
                 byte   norm1 = DefaultSimilarity.EncodeNorm(1.0f);
                 if (reader.GetDisableFakeNorms())
                 {
                     Assert.IsNull(norms);
                 }
                 else
                 {
                     Assert.AreEqual(norms.Length, reader.MaxDoc());
                     for (int j = 0; j < reader.MaxDoc(); j++)
                     {
                         Assert.AreEqual(norms[j], norm1);
                     }
                 }
                 norms = new byte[reader.MaxDoc()];
                 reader.Norms(f.Name(), norms, 0);
                 for (int j = 0; j < reader.MaxDoc(); j++)
                 {
                     Assert.AreEqual(norms[j], norm1);
                 }
             }
         }
     }
 }

Esempio n. 19

0

Mostra file

 internal abstract void  Start(Fieldable field);

Esempio n. 20

0

Mostra file

File: DocInverterPerField.cs Progetto: sainabob/teamlab.v7.5

        public override void  ProcessFields(Fieldable[] fields, int count)
        {
            fieldState.Reset(docState.doc.GetBoost());

            int maxFieldLength = docState.maxFieldLength;

            bool doInvert = consumer.Start(fields, count);

            for (int i = 0; i < count; i++)
            {
                Fieldable field = fields[i];

                // TODO FI: this should be "genericized" to querying
                // consumer if it wants to see this particular field
                // tokenized.
                if (field.IsIndexed() && doInvert)
                {
                    bool anyToken;

                    if (fieldState.length > 0)
                    {
                        fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name);
                    }

                    if (!field.IsTokenized())
                    {
                        // un-tokenized field
                        System.String stringValue = field.StringValue();
                        int           valueLength = stringValue.Length;
                        perThread.singleTokenTokenStream.Reinit(stringValue, 0, valueLength);
                        fieldState.attributeSource = perThread.singleTokenTokenStream;
                        consumer.Start(field);

                        bool success = false;
                        try
                        {
                            consumer.Add();
                            success = true;
                        }
                        finally
                        {
                            if (!success)
                            {
                                docState.docWriter.SetAborting();
                            }
                        }
                        fieldState.offset += valueLength;
                        fieldState.length++;
                        fieldState.position++;
                        anyToken = valueLength > 0;
                    }
                    else
                    {
                        // tokenized field
                        TokenStream stream;
                        TokenStream streamValue = field.TokenStreamValue();

                        if (streamValue != null)
                        {
                            stream = streamValue;
                        }
                        else
                        {
                            // the field does not have a TokenStream,
                            // so we have to obtain one from the analyzer
                            System.IO.TextReader reader;                             // find or make Reader
                            System.IO.TextReader readerValue = field.ReaderValue();

                            if (readerValue != null)
                            {
                                reader = readerValue;
                            }
                            else
                            {
                                System.String stringValue = field.StringValue();
                                if (stringValue == null)
                                {
                                    throw new System.ArgumentException("field must have either TokenStream, String or Reader value");
                                }
                                perThread.stringReader.Init(stringValue);
                                reader = perThread.stringReader;
                            }

                            // Tokenize field and add to postingTable
                            stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader);
                        }

                        // reset the TokenStream to the first token
                        stream.Reset();

                        int startLength = fieldState.length;

                        // deprecated
                        bool allowMinus1Position = docState.allowMinus1Position;

                        try
                        {
                            int offsetEnd = fieldState.offset - 1;

                            bool hasMoreTokens = stream.IncrementToken();

                            fieldState.attributeSource = stream;

                            OffsetAttribute            offsetAttribute  = (OffsetAttribute)fieldState.attributeSource.AddAttribute(typeof(OffsetAttribute));
                            PositionIncrementAttribute posIncrAttribute = (PositionIncrementAttribute)fieldState.attributeSource.AddAttribute(typeof(PositionIncrementAttribute));

                            consumer.Start(field);

                            for (; ;)
                            {
                                // If we hit an exception in stream.next below
                                // (which is fairly common, eg if analyzer
                                // chokes on a given document), then it's
                                // non-aborting and (above) this one document
                                // will be marked as deleted, but still
                                // consume a docID

                                if (!hasMoreTokens)
                                {
                                    break;
                                }

                                int posIncr = posIncrAttribute.GetPositionIncrement();
                                fieldState.position += posIncr;
                                if (allowMinus1Position || fieldState.position > 0)
                                {
                                    fieldState.position--;
                                }

                                if (posIncr == 0)
                                {
                                    fieldState.numOverlap++;
                                }

                                bool success = false;
                                try
                                {
                                    // If we hit an exception in here, we abort
                                    // all buffered documents since the last
                                    // flush, on the likelihood that the
                                    // internal state of the consumer is now
                                    // corrupt and should not be flushed to a
                                    // new segment:
                                    consumer.Add();
                                    success = true;
                                }
                                finally
                                {
                                    if (!success)
                                    {
                                        docState.docWriter.SetAborting();
                                    }
                                }
                                fieldState.position++;
                                offsetEnd = fieldState.offset + offsetAttribute.EndOffset();
                                if (++fieldState.length >= maxFieldLength)
                                {
                                    if (docState.infoStream != null)
                                    {
                                        docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens");
                                    }
                                    break;
                                }

                                hasMoreTokens = stream.IncrementToken();
                            }
                            // trigger streams to perform end-of-stream operations
                            stream.End();

                            fieldState.offset += offsetAttribute.EndOffset();
                            anyToken           = fieldState.length > startLength;
                        }
                        finally
                        {
                            stream.Close();
                        }
                    }

                    if (anyToken)
                    {
                        fieldState.offset += docState.analyzer.GetOffsetGap(field);
                    }
                    fieldState.boost *= field.GetBoost();
                }

                // LUCENE-2387: don't hang onto the field, so GC can
                // reclaim
                fields[i] = null;
            }

            consumer.Finish();
            endConsumer.Finish();
        }

Esempio n. 21

0

Mostra file

File: DocFieldConsumerPerField.cs Progetto: Rationalle/ravendb

		/// <summary>Processes all occurrences of a single field </summary>
		public abstract void  ProcessFields(Fieldable[] fields, int count);

Esempio n. 22

0

Mostra file

File: InvertedDocConsumerPerField.cs Progetto: Inzaghi2012/teamlab.v7.5

		// Called once per field, and is given all Fieldable
		// occurrences for this field in the document.  Return
		// true if you wish to see inverted tokens for these
		// fields:
		internal abstract bool Start(Fieldable[] fields, int count);

Esempio n. 23

0

Mostra file

File: SnPerFieldAnalyzerWrapper.cs Progetto: kimduquan/DMIS

        /// <summary>Returns the offsetGap from the analyzer assigned to fiel</summary>
        public override int GetOffsetGap(Lucene.Net.Documents.Fieldable field)
        {
            var analyzer = GetAnalyzer(field.Name());

            return(analyzer.GetOffsetGap(field));
        }

Esempio n. 24

0

Mostra file

        public override DocumentsWriter.DocWriter ProcessDocument()
        {
            consumer.StartDocument();
            fieldsWriter.StartDocument();

            Document doc = docState.doc;

            System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start"));

            fieldCount = 0;

            int thisFieldGen = fieldGen++;

            System.Collections.IList docFields = doc.GetFields();
            int numDocFields = docFields.Count;

            // Absorb any new fields first seen in this document.
            // Also absorb any changes to fields we had already
            // seen before (eg suddenly turning on norms or
            // vectors, etc.):

            for (int i = 0; i < numDocFields; i++)
            {
                Fieldable     field     = (Fieldable)docFields[i];
                System.String fieldName = field.Name();

                // Make sure we have a PerField allocated
                int hashPos = fieldName.GetHashCode() & hashMask;
                DocFieldProcessorPerField fp = fieldHash[hashPos];
                while (fp != null && !fp.fieldInfo.name.Equals(fieldName))
                {
                    fp = fp.next;
                }

                if (fp == null)
                {
                    // TODO FI: we need to genericize the "flags" that a
                    // field holds, and, how these flags are merged; it
                    // needs to be more "pluggable" such that if I want
                    // to have a new "thing" my Fields can do, I can
                    // easily add it
                    FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf());

                    fp                 = new DocFieldProcessorPerField(this, fi);
                    fp.next            = fieldHash[hashPos];
                    fieldHash[hashPos] = fp;
                    totalFieldCount++;

                    if (totalFieldCount >= fieldHash.Length / 2)
                    {
                        Rehash();
                    }
                }
                else
                {
                    fp.fieldInfo.Update(field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf());
                }

                if (thisFieldGen != fp.lastGen)
                {
                    // First time we're seeing this field for this doc
                    fp.fieldCount = 0;

                    if (fieldCount == fields.Length)
                    {
                        int newSize = fields.Length * 2;
                        DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize];
                        Array.Copy(fields, 0, newArray, 0, fieldCount);
                        fields = newArray;
                    }

                    fields[fieldCount++] = fp;
                    fp.lastGen           = thisFieldGen;
                }

                if (fp.fieldCount == fp.fields.Length)
                {
                    Fieldable[] newArray = new Fieldable[fp.fields.Length * 2];
                    Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount);
                    fp.fields = newArray;
                }

                fp.fields[fp.fieldCount++] = field;
                if (field.IsStored())
                {
                    fieldsWriter.AddField(field, fp.fieldInfo);
                }
            }

            // If we are writing vectors then we must visit
            // fields in sorted order so they are written in
            // sorted order.  TODO: we actually only need to
            // sort the subset of fields that have vectors
            // enabled; we could save [small amount of] CPU
            // here.
            QuickSort(fields, 0, fieldCount - 1);

            for (int i = 0; i < fieldCount; i++)
            {
                fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount);
            }

            if (docState.maxTermPrefix != null && docState.infoStream != null)
            {
                docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
                docState.maxTermPrefix = null;
            }

            DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument();
            DocumentsWriter.DocWriter two = consumer.FinishDocument();
            if (one == null)
            {
                return(two);
            }
            else if (two == null)
            {
                return(one);
            }
            else
            {
                PerDoc both = GetPerDoc();
                both.docID = docState.docID;
                System.Diagnostics.Debug.Assert(one.docID == docState.docID);
                System.Diagnostics.Debug.Assert(two.docID == docState.docID);
                both.one = one;
                both.two = two;
                return(both);
            }
        }

Esempio n. 25

0

Mostra file

File: DocInverterPerField.cs Progetto: andylaudotnet/StockFoo

        public override void ProcessFields(Fieldable[] fields, int count)
        {
            fieldState.Reset(docState.doc.GetBoost());

            int maxFieldLength = docState.maxFieldLength;

            bool doInvert = consumer.Start(fields, count);

            for (int i = 0; i < count; i++)
            {

                Fieldable field = fields[i];

                // TODO FI: this should be "genericized" to querying
                // consumer if it wants to see this particular field
                // tokenized.
                if (field.IsIndexed() && doInvert)
                {

                    bool anyToken;

                    if (fieldState.length > 0)
                        fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name);

                    if (!field.IsTokenized())
                    {
                        // un-tokenized field
                        System.String stringValue = field.StringValue();
                        int valueLength = stringValue.Length;
                        perThread.singleTokenTokenStream.Reinit(stringValue, 0, valueLength);
                        fieldState.attributeSource = perThread.singleTokenTokenStream;
                        consumer.Start(field);

                        bool success = false;
                        try
                        {
                            consumer.Add();
                            success = true;
                        }
                        finally
                        {
                            if (!success)
                                docState.docWriter.SetAborting();
                        }
                        fieldState.offset += valueLength;
                        fieldState.length++;
                        fieldState.position++;
                        anyToken = valueLength > 0;
                    }
                    else
                    {
                        // tokenized field
                        TokenStream stream;
                        TokenStream streamValue = field.TokenStreamValue();

                        if (streamValue != null)
                            stream = streamValue;
                        else
                        {
                            // the field does not have a TokenStream,
                            // so we have to obtain one from the analyzer
                            System.IO.TextReader reader; // find or make Reader
                            System.IO.TextReader readerValue = field.ReaderValue();

                            if (readerValue != null)
                                reader = readerValue;
                            else
                            {
                                System.String stringValue = field.StringValue();
                                if (stringValue == null)
                                    throw new System.ArgumentException("field must have either TokenStream, String or Reader value");
                                perThread.stringReader.Init(stringValue);
                                reader = perThread.stringReader;
                            }

                            // Tokenize field and add to postingTable
                            stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader);
                        }

                        // reset the TokenStream to the first token
                        stream.Reset();

                        int startLength = fieldState.length;

                        // deprecated
                        bool allowMinus1Position = docState.allowMinus1Position;

                        try
                        {
                            int offsetEnd = fieldState.offset - 1;

                            bool hasMoreTokens = stream.IncrementToken();

                            fieldState.attributeSource = stream;

                            OffsetAttribute offsetAttribute = (OffsetAttribute) fieldState.attributeSource.AddAttribute(typeof(OffsetAttribute));
                            PositionIncrementAttribute posIncrAttribute = (PositionIncrementAttribute) fieldState.attributeSource.AddAttribute(typeof(PositionIncrementAttribute));

                            consumer.Start(field);

                            for (; ; )
                            {

                                // If we hit an exception in stream.next below
                                // (which is fairly common, eg if analyzer
                                // chokes on a given document), then it's
                                // non-aborting and (above) this one document
                                // will be marked as deleted, but still
                                // consume a docID

                                if (!hasMoreTokens)
                                    break;

                                int posIncr = posIncrAttribute.GetPositionIncrement();
                                fieldState.position += posIncr;
                                if (allowMinus1Position || fieldState.position > 0)
                                {
                                    fieldState.position--;
                                }

                                if (posIncr == 0)
                                    fieldState.numOverlap++;

                                bool success = false;
                                try
                                {
                                    // If we hit an exception in here, we abort
                                    // all buffered documents since the last
                                    // flush, on the likelihood that the
                                    // internal state of the consumer is now
                                    // corrupt and should not be flushed to a
                                    // new segment:
                                    consumer.Add();
                                    success = true;
                                }
                                finally
                                {
                                    if (!success)
                                        docState.docWriter.SetAborting();
                                }
                                fieldState.position++;
                                offsetEnd = fieldState.offset + offsetAttribute.EndOffset();
                                if (++fieldState.length >= maxFieldLength)
                                {
                                    if (docState.infoStream != null)
                                        docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens");
                                    break;
                                }

                                hasMoreTokens = stream.IncrementToken();
                            }
                            // trigger streams to perform end-of-stream operations
                            stream.End();

                            fieldState.offset += offsetAttribute.EndOffset();
                            anyToken = fieldState.length > startLength;
                        }
                        finally
                        {
                            stream.Close();
                        }
                    }

                    if (anyToken)
                        fieldState.offset += docState.analyzer.GetOffsetGap(field);
                    fieldState.boost *= field.GetBoost();
                }
            }

            consumer.Finish();
            endConsumer.Finish();
        }

Esempio n. 26

0

Mostra file

File: DocumentWriter.cs Progetto: ArsenShnurkov/beagle-1

        // Tokenizes the fields of a document into Postings.
        private void  InvertDocument(Document doc)
        {
            System.Collections.IEnumerator fieldIterator = doc.GetFields().GetEnumerator();
            while (fieldIterator.MoveNext())
            {
                Fieldable     field       = (Fieldable)fieldIterator.Current;
                System.String fieldName   = field.Name();
                int           fieldNumber = fieldInfos.FieldNumber(fieldName);

                int length   = fieldLengths[fieldNumber];               // length of field
                int position = fieldPositions[fieldNumber];             // position in field
                if (length > 0)
                {
                    position += analyzer.GetPositionIncrementGap(fieldName);
                }
                int offset = fieldOffsets[fieldNumber];                 // offset field

                if (field.IsIndexed())
                {
                    if (!field.IsTokenized())
                    {
                        // un-tokenized field
                        System.String stringValue = field.StringValue();
                        if (field.IsStoreOffsetWithTermVector())
                        {
                            AddPosition(fieldName, stringValue, position++, new TermVectorOffsetInfo(offset, offset + stringValue.Length));
                        }
                        else
                        {
                            AddPosition(fieldName, stringValue, position++, null);
                        }
                        offset += stringValue.Length;
                        length++;
                    }
                    else
                    {
                        System.IO.TextReader reader;                         // find or make Reader
                        if (field.ReaderValue() != null)
                        {
                            reader = field.ReaderValue();
                        }
                        else if (field.StringValue() != null)
                        {
                            reader = new System.IO.StringReader(field.StringValue());
                        }
                        else
                        {
                            throw new System.ArgumentException("field must have either String or Reader value");
                        }

                        // Tokenize field and add to postingTable
                        TokenStream stream = analyzer.TokenStream(fieldName, reader);
                        try
                        {
                            Token lastToken = null;
                            for (Token t = stream.Next(); t != null; t = stream.Next())
                            {
                                position += (t.GetPositionIncrement() - 1);

                                if (field.IsStoreOffsetWithTermVector())
                                {
                                    AddPosition(fieldName, t.TermText(), position++, new TermVectorOffsetInfo(offset + t.StartOffset(), offset + t.EndOffset()));
                                }
                                else
                                {
                                    AddPosition(fieldName, t.TermText(), position++, null);
                                }

                                lastToken = t;
                                if (++length >= maxFieldLength)
                                {
                                    if (infoStream != null)
                                    {
                                        infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached, ignoring following tokens");
                                    }
                                    break;
                                }
                            }

                            if (lastToken != null)
                            {
                                offset += lastToken.EndOffset() + 1;
                            }
                        }
                        finally
                        {
                            stream.Close();
                        }
                    }

                    fieldLengths[fieldNumber]   = length;                   // save field length
                    fieldPositions[fieldNumber] = position;                 // save field position
                    fieldBoosts[fieldNumber]   *= field.GetBoost();
                    fieldOffsets[fieldNumber]   = offset;
                }
            }
        }

Esempio n. 27

0

Mostra file

File: DocFieldProcessorPerThread.cs Progetto: VirtueMe/ravendb

		public override DocumentsWriter.DocWriter ProcessDocument()
		{
			
			consumer.StartDocument();
			fieldsWriter.StartDocument();
			
			Document doc = docState.doc;
			
			System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start"));
			
			fieldCount = 0;
			
			int thisFieldGen = fieldGen++;
			
			System.Collections.IList docFields = doc.GetFields();
			int numDocFields = docFields.Count;
			
			// Absorb any new fields first seen in this document.
			// Also absorb any changes to fields we had already
			// seen before (eg suddenly turning on norms or
			// vectors, etc.):
			
			for (int i = 0; i < numDocFields; i++)
			{
				Fieldable field = (Fieldable) docFields[i];
				System.String fieldName = field.Name();
				
				// Make sure we have a PerField allocated
				int hashPos = fieldName.GetHashCode() & hashMask;
				DocFieldProcessorPerField fp = fieldHash[hashPos];
				while (fp != null && !fp.fieldInfo.name.Equals(fieldName))
					fp = fp.next;
				
				if (fp == null)
				{
					
					// TODO FI: we need to genericize the "flags" that a
					// field holds, and, how these flags are merged; it
					// needs to be more "pluggable" such that if I want
					// to have a new "thing" my Fields can do, I can
					// easily add it
					FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf());
					
					fp = new DocFieldProcessorPerField(this, fi);
					fp.next = fieldHash[hashPos];
					fieldHash[hashPos] = fp;
					totalFieldCount++;
					
					if (totalFieldCount >= fieldHash.Length / 2)
						Rehash();
				}
				else
					fp.fieldInfo.Update(field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf());
				
				if (thisFieldGen != fp.lastGen)
				{
					
					// First time we're seeing this field for this doc
					fp.fieldCount = 0;
					
					if (fieldCount == fields.Length)
					{
						int newSize = fields.Length * 2;
						DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize];
						Array.Copy(fields, 0, newArray, 0, fieldCount);
						fields = newArray;
					}
					
					fields[fieldCount++] = fp;
					fp.lastGen = thisFieldGen;
				}
				
				if (fp.fieldCount == fp.fields.Length)
				{
					Fieldable[] newArray = new Fieldable[fp.fields.Length * 2];
					Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount);
					fp.fields = newArray;
				}
				
				fp.fields[fp.fieldCount++] = field;
				if (field.IsStored())
				{
					fieldsWriter.AddField(field, fp.fieldInfo);
				}
			}
			
			// If we are writing vectors then we must visit
			// fields in sorted order so they are written in
			// sorted order.  TODO: we actually only need to
			// sort the subset of fields that have vectors
			// enabled; we could save [small amount of] CPU
			// here.
			QuickSort(fields, 0, fieldCount - 1);
			
			for (int i = 0; i < fieldCount; i++)
				fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount);

            if (docState.maxTermPrefix != null && docState.infoStream != null)
            {
                docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
                docState.maxTermPrefix = null;
            }
			
			DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument();
			DocumentsWriter.DocWriter two = consumer.FinishDocument();
			if (one == null)
			{
				return two;
			}
			else if (two == null)
			{
				return one;
			}
			else
			{
				PerDoc both = GetPerDoc();
				both.docID = docState.docID;
				System.Diagnostics.Debug.Assert(one.docID == docState.docID);
				System.Diagnostics.Debug.Assert(two.docID == docState.docID);
				both.one = one;
				both.two = two;
				return both;
			}
		}

Esempio n. 28

0

Mostra file

        internal void  AddDocument(Document doc)
        {
            indexStream.WriteLong(fieldsStream.GetFilePointer());

            int storedCount = 0;

            System.Collections.IEnumerator fieldIterator = doc.GetFields().GetEnumerator();
            while (fieldIterator.MoveNext())
            {
                Fieldable field = (Fieldable)fieldIterator.Current;
                if (field.IsStored())
                {
                    storedCount++;
                }
            }
            fieldsStream.WriteVInt(storedCount);

            fieldIterator = doc.GetFields().GetEnumerator();
            while (fieldIterator.MoveNext())
            {
                Fieldable field = (Fieldable)fieldIterator.Current;
                // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
                // and field.binaryValue() already returns the compressed value for a field
                // with isCompressed()==true, so we disable compression in that case
                bool disableCompression = (field is FieldsReader.FieldForMerge);
                if (field.IsStored())
                {
                    fieldsStream.WriteVInt(fieldInfos.FieldNumber(field.Name()));

                    byte bits = 0;
                    if (field.IsTokenized())
                    {
                        bits |= FieldsWriter.FIELD_IS_TOKENIZED;
                    }
                    if (field.IsBinary())
                    {
                        bits |= FieldsWriter.FIELD_IS_BINARY;
                    }
                    if (field.IsCompressed())
                    {
                        bits |= FieldsWriter.FIELD_IS_COMPRESSED;
                    }

                    fieldsStream.WriteByte(bits);

                    if (field.IsCompressed())
                    {
                        // compression is enabled for the current field
                        byte[] data = null;

                        if (disableCompression)
                        {
                            // optimized case for merging, the data
                            // is already compressed
                            data = field.BinaryValue();
                        }
                        else
                        {
                            // check if it is a binary field
                            if (field.IsBinary())
                            {
                                data = Compress(field.BinaryValue());
                            }
                            else
                            {
                                data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue()));
                            }
                        }
                        int len = data.Length;
                        fieldsStream.WriteVInt(len);
                        fieldsStream.WriteBytes(data, len);
                    }
                    else
                    {
                        // compression is disabled for the current field
                        if (field.IsBinary())
                        {
                            byte[] data = field.BinaryValue();
                            int    len  = data.Length;
                            fieldsStream.WriteVInt(len);
                            fieldsStream.WriteBytes(data, len);
                        }
                        else
                        {
                            fieldsStream.WriteString(field.StringValue());
                        }
                    }
                }
            }
        }

Esempio n. 29

0

Mostra file

File: DocHelper.cs Progetto: stgwilli/ravendb

 private static void  Add(System.Collections.IDictionary map, Fieldable field)
 {
     map[field.Name()] = field;
 }

Esempio n. 30

0

Mostra file

File: DocFieldConsumersPerField.cs Progetto: BackupTheBerlios/lyra2-svn

 public override void ProcessFields(Fieldable[] fields, int count)
 {
     one.ProcessFields(fields, count);
     two.ProcessFields(fields, count);
 }

Esempio n. 31

0

Mostra file

File: TermVectorsTermsWriterPerField.cs Progetto: Rationalle/ravendb

		internal override void  Start(Fieldable f)
		{
			if (doVectorOffsets)
			{
				offsetAttribute = (OffsetAttribute) fieldState.attributeSource.AddAttribute(typeof(OffsetAttribute));
			}
			else
			{
				offsetAttribute = null;
			}
		}

Esempio n. 32

0

Mostra file

File: TermsHashPerField.cs Progetto: VirtueMe/ravendb

		internal override bool Start(Fieldable[] fields, int count)
		{
			doCall = consumer.Start(fields, count);
			if (nextPerField != null)
				doNextCall = nextPerField.Start(fields, count);
			return doCall || doNextCall;
		}

Esempio n. 33

0

Mostra file

File: FieldsWriter.cs Progetto: cqm0609/lucene-file-finder

        internal void WriteField(FieldInfo fi, Fieldable field)
        {
            // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
            // and field.binaryValue() already returns the compressed value for a field
            // with isCompressed()==true, so we disable compression in that case
            bool disableCompression = (field is FieldsReader.FieldForMerge);
            fieldsStream.WriteVInt(fi.number);
            byte bits = 0;
            if (field.IsTokenized())
                bits |= FieldsWriter.FIELD_IS_TOKENIZED;
            if (field.IsBinary())
                bits |= FieldsWriter.FIELD_IS_BINARY;
            if (field.IsCompressed())
                bits |= FieldsWriter.FIELD_IS_COMPRESSED;

            fieldsStream.WriteByte(bits);

            if (field.IsCompressed())
            {
                // compression is enabled for the current field
                byte[] data;
                int len;
                int offset;

                if (disableCompression)
                {
                    // optimized case for merging, the data
                    // is already compressed
                    data = field.GetBinaryValue();
                    System.Diagnostics.Debug.Assert(data != null);
                    len = field.GetBinaryLength();
                    offset = field.GetBinaryOffset();
                }
                else
                {
                    // check if it is a binary field
                    if (field.IsBinary())
                    {
                        data = Compress(field.GetBinaryValue(), field.GetBinaryOffset(), field.GetBinaryLength());
                    }
                    else
                    {
                        byte[] x = System.Text.Encoding.UTF8.GetBytes(field.StringValue());
                        data = Compress(x, 0, x.Length);
                    }
                    len = data.Length;
                    offset = 0;
                }

                fieldsStream.WriteVInt(len);
                fieldsStream.WriteBytes(data, offset, len);
            }
            else
            {
                // compression is disabled for the current field
                if (field.IsBinary())
                {
                    int length = field.GetBinaryLength();
                    fieldsStream.WriteVInt(length);
                    fieldsStream.WriteBytes(field.BinaryValue(), field.GetBinaryOffset(), length);
                }
                else
                {
                    fieldsStream.WriteString(field.StringValue());
                }
            }
        }

Esempio n. 34

0

Mostra file

File: DocInverterPerField.cs Progetto: restran/lucene-file-finder

        internal override void processFields(Fieldable[] fields,
                                             int count)
        {
            fieldState.reset(docState.doc.GetBoost());

            int maxFieldLength = docState.maxFieldLength;

            bool doInvert = consumer.start(fields, count);

            for (int i = 0; i < count; i++)
            {
                Fieldable field = fields[i];

                // TODO FI: this should be "genericized" to querying
                // consumer if it wants to see this particular field
                // tokenized.
                if (field.IsIndexed() && doInvert)
                {
                    if (fieldState.length > 0)
                    {
                        fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name);
                    }

                    if (!field.IsTokenized())
                    {             // un-tokenized field
                        string stringValue = field.StringValue();
                        int    valueLength = stringValue.Length;
                        Token  token       = perThread.localToken.Reinit(stringValue, fieldState.offset, fieldState.offset + valueLength);
                        bool   success     = false;
                        try
                        {
                            consumer.add(token);
                            success = true;
                        }
                        finally
                        {
                            if (!success)
                            {
                                docState.docWriter.SetAborting();
                            }
                        }
                        fieldState.offset += valueLength;
                        fieldState.length++;
                        fieldState.position++;
                    }
                    else
                    {                                  // tokenized field
                        TokenStream stream;
                        TokenStream streamValue = field.TokenStreamValue();

                        if (streamValue != null)
                        {
                            stream = streamValue;
                        }
                        else
                        {
                            // the field does not have a TokenStream,
                            // so we have to obtain one from the analyzer
                            System.IO.TextReader reader;                          // find or make Reader
                            System.IO.TextReader readerValue = field.ReaderValue();

                            if (readerValue != null)
                            {
                                reader = readerValue;
                            }
                            else
                            {
                                string stringValue = field.StringValue();
                                if (stringValue == null)
                                {
                                    throw new System.ArgumentException("field must have either TokenStream, string or Reader value");
                                }
                                perThread.stringReader.Init(stringValue);
                                reader = perThread.stringReader;
                            }

                            // Tokenize field and add to postingTable
                            stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader);
                        }

                        // reset the TokenStream to the first token
                        stream.Reset();

                        try
                        {
                            int   offsetEnd  = fieldState.offset - 1;
                            Token localToken = perThread.localToken;
                            for (; ;)
                            {
                                // If we hit an exception in stream.next below
                                // (which is fairly common, eg if analyzer
                                // chokes on a given document), then it's
                                // non-aborting and (above) this one document
                                // will be marked as deleted, but still
                                // consume a docID
                                Token token = stream.Next(localToken);

                                if (token == null)
                                {
                                    break;
                                }
                                fieldState.position += (token.GetPositionIncrement() - 1);
                                bool success = false;
                                try
                                {
                                    // If we hit an exception in here, we abort
                                    // all buffered documents since the last
                                    // flush, on the likelihood that the
                                    // internal state of the consumer is now
                                    // corrupt and should not be flushed to a
                                    // new segment:
                                    consumer.add(token);
                                    success = true;
                                }
                                finally
                                {
                                    if (!success)
                                    {
                                        docState.docWriter.SetAborting();
                                    }
                                }
                                fieldState.position++;
                                offsetEnd = fieldState.offset + token.EndOffset();

                                if (++fieldState.length >= maxFieldLength)
                                {
                                    if (docState.infoStream != null)
                                    {
                                        docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens");
                                    }
                                    break;
                                }
                            }
                            fieldState.offset = offsetEnd + 1;
                        }
                        finally
                        {
                            stream.Close();
                        }
                    }

                    fieldState.boost *= field.GetBoost();
                }
            }

            consumer.finish();
            endConsumer.finish();
        }

Esempio n. 35

0

Mostra file

File: DocumentsWriter.cs Progetto: vikasraz/indexsearchutils

			/// <summary>Initializes shared state for this new document </summary>
			internal void  Init(Document doc, int docID)
			{

                System.Diagnostics.Debug.Assert(!isIdle);
                System.Diagnostics.Debug.Assert(Enclosing_Instance.writer.TestPoint("DocumentsWriter.ThreadState.init start"));
				
				this.docID = docID;
				docBoost = doc.GetBoost();
				numStoredFields = 0;
				numFieldData = 0;
				numVectorFields = 0;
				maxTermPrefix = null;
				
				System.Diagnostics.Debug.Assert(0 == fdtLocal.Length());
				System.Diagnostics.Debug.Assert(0 == fdtLocal.GetFilePointer());
				System.Diagnostics.Debug.Assert(0 == tvfLocal.Length());
				System.Diagnostics.Debug.Assert(0 == tvfLocal.GetFilePointer());
				int thisFieldGen = fieldGen++;
				
				System.Collections.IList docFields = doc.GetFields();
				int numDocFields = docFields.Count;
				bool docHasVectors = false;
				
				// Absorb any new fields first seen in this document.
				// Also absorb any changes to fields we had already
				// seen before (eg suddenly turning on norms or
				// vectors, etc.):
				
				for (int i = 0; i < numDocFields; i++)
				{
					Fieldable field = (Fieldable) docFields[i];
					
					FieldInfo fi = Enclosing_Instance.fieldInfos.Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false);
					if (fi.isIndexed && !fi.omitNorms)
					{
						// Maybe grow our buffered norms
						if (Enclosing_Instance.norms.Length <= fi.number)
						{
							int newSize = (int) ((1 + fi.number) * 1.25);
							BufferedNorms[] newNorms = new BufferedNorms[newSize];
							Array.Copy(Enclosing_Instance.norms, 0, newNorms, 0, Enclosing_Instance.norms.Length);
							Enclosing_Instance.norms = newNorms;
						}
						
						if (Enclosing_Instance.norms[fi.number] == null)
							Enclosing_Instance.norms[fi.number] = new BufferedNorms();
						
						Enclosing_Instance.hasNorms = true;
					}
					
					// Make sure we have a FieldData allocated
					int hashPos = fi.name.GetHashCode() & fieldDataHashMask;
					FieldData fp = fieldDataHash[hashPos];
					while (fp != null && !fp.fieldInfo.name.Equals(fi.name))
						fp = fp.next;
					
					if (fp == null)
					{
						
						fp = new FieldData(this, fi);
						fp.next = fieldDataHash[hashPos];
						fieldDataHash[hashPos] = fp;
						
						if (numAllFieldData == allFieldDataArray.Length)
						{
							int newSize = (int) (allFieldDataArray.Length * 1.5);
							int newHashSize = fieldDataHash.Length * 2;
							
							FieldData[] newArray = new FieldData[newSize];
							FieldData[] newHashArray = new FieldData[newHashSize];
							Array.Copy(allFieldDataArray, 0, newArray, 0, numAllFieldData);
							
							// Rehash
							fieldDataHashMask = newSize - 1;
							for (int j = 0; j < fieldDataHash.Length; j++)
							{
								FieldData fp0 = fieldDataHash[j];
								while (fp0 != null)
								{
									hashPos = fp0.fieldInfo.name.GetHashCode() & fieldDataHashMask;
									FieldData nextFP0 = fp0.next;
									fp0.next = newHashArray[hashPos];
									newHashArray[hashPos] = fp0;
									fp0 = nextFP0;
								}
							}
							
							allFieldDataArray = newArray;
							fieldDataHash = newHashArray;
						}
						allFieldDataArray[numAllFieldData++] = fp;
					}
					else
					{
						System.Diagnostics.Debug.Assert(fp.fieldInfo == fi);
					}
					
					if (thisFieldGen != fp.lastGen)
					{
						
						// First time we're seeing this field for this doc
						fp.lastGen = thisFieldGen;
						fp.fieldCount = 0;
						fp.doVectors = fp.doVectorPositions = fp.doVectorOffsets = false;
						fp.doNorms = fi.isIndexed && !fi.omitNorms;
						
						if (numFieldData == fieldDataArray.Length)
						{
							int newSize = fieldDataArray.Length * 2;
							FieldData[] newArray = new FieldData[newSize];
							Array.Copy(fieldDataArray, 0, newArray, 0, numFieldData);
							fieldDataArray = newArray;
						}
						fieldDataArray[numFieldData++] = fp;
					}
					
					if (field.IsTermVectorStored())
					{
						if (!fp.doVectors && numVectorFields++ == vectorFieldPointers.Length)
						{
							int newSize = (int) (numVectorFields * 1.5);
							vectorFieldPointers = new long[newSize];
							vectorFieldNumbers = new int[newSize];
						}
						fp.doVectors = true;
						docHasVectors = true;
						
						fp.doVectorPositions |= field.IsStorePositionWithTermVector();
						fp.doVectorOffsets |= field.IsStoreOffsetWithTermVector();
					}
					
					if (fp.fieldCount == fp.docFields.Length)
					{
						Fieldable[] newArray = new Fieldable[fp.docFields.Length * 2];
						Array.Copy(fp.docFields, 0, newArray, 0, fp.docFields.Length);
						fp.docFields = newArray;
					}
					
					// Lazily allocate arrays for postings:
					if (field.IsIndexed() && fp.postingsHash == null)
						fp.InitPostingArrays();
					
					fp.docFields[fp.fieldCount++] = field;
				}
				
				// Maybe init the local & global fieldsWriter
				if (localFieldsWriter == null)
				{
					if (Enclosing_Instance.fieldsWriter == null)
					{
						System.Diagnostics.Debug.Assert(Enclosing_Instance.docStoreSegment == null);
						System.Diagnostics.Debug.Assert(Enclosing_Instance.segment != null);
						Enclosing_Instance.docStoreSegment = Enclosing_Instance.segment;
						// If we hit an exception while init'ing the
						// fieldsWriter, we must abort this segment
						// because those files will be in an unknown
						// state:
						try
						{
							Enclosing_Instance.fieldsWriter = new FieldsWriter(Enclosing_Instance.directory, Enclosing_Instance.docStoreSegment, Enclosing_Instance.fieldInfos);
						}
						catch (System.Exception t)
						{
							throw new AbortException(t, Enclosing_Instance);
						}
						Enclosing_Instance.files = null;
					}
					localFieldsWriter = new FieldsWriter(null, fdtLocal, Enclosing_Instance.fieldInfos);
				}
				
				// First time we see a doc that has field(s) with
				// stored vectors, we init our tvx writer
				if (docHasVectors)
				{
					if (Enclosing_Instance.tvx == null)
					{
						System.Diagnostics.Debug.Assert(Enclosing_Instance.docStoreSegment != null);
						// If we hit an exception while init'ing the term
						// vector output files, we must abort this segment
						// because those files will be in an unknown
						// state:
						try
						{
							Enclosing_Instance.tvx = Enclosing_Instance.directory.CreateOutput(Enclosing_Instance.docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
							Enclosing_Instance.tvx.WriteInt(TermVectorsReader.FORMAT_VERSION);
							Enclosing_Instance.tvd = Enclosing_Instance.directory.CreateOutput(Enclosing_Instance.docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
							Enclosing_Instance.tvd.WriteInt(TermVectorsReader.FORMAT_VERSION);
							Enclosing_Instance.tvf = Enclosing_Instance.directory.CreateOutput(Enclosing_Instance.docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
							Enclosing_Instance.tvf.WriteInt(TermVectorsReader.FORMAT_VERSION);
							
							// We must "catch up" for all docs before us
							// that had no vectors:
							for (int i = 0; i < Enclosing_Instance.numDocsInStore; i++)
							{
								Enclosing_Instance.tvx.WriteLong(Enclosing_Instance.tvd.GetFilePointer());
								Enclosing_Instance.tvd.WriteVInt(0);
							}
						}
						catch (System.Exception t)
						{
							throw new AbortException(t, Enclosing_Instance);
						}
						Enclosing_Instance.files = null;
					}
					
					numVectorFields = 0;
				}
			}

Esempio n. 36

0

Mostra file

File: DocFieldConsumersPerField.cs Progetto: cqm0609/lucene-file-finder

 internal override void processFields(Fieldable[] fields, int count)
 {
     one.processFields(fields, count);
     two.processFields(fields, count);
 }

Esempio n. 37

0

Mostra file

File: DocFieldConsumerPerField.cs Progetto: cqm0609/lucene-file-finder

/**  * Licensed to the Apache Software Foundation (ASF) under one or more  * contributor license agreements.  See the NOTICE file distributed with  * this work for additional information regarding copyright ownership.  * The ASF licenses this file to You under the Apache License, Version 2.0  * (the "License"); you may not use this file except in compliance with  * the License.  You may obtain a copy of the License at  *  *     http://www.apache.org/licenses/LICENSE-2.0  *  * Unless required by applicable law or agreed to in writing, software  * distributed under the License is distributed on an "AS IS" BASIS,  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  * See the License for the specific language governing permissions and  * limitations under the License.  */  using Fieldable = Lucene.Net.Documents.Fieldable;  namespace Lucene.Net.Index {     internal abstract class DocFieldConsumerPerField     {         /// <summary>         /// Processes all occurrences of a single field         /// </summary>         /// <param name="fields"></param>         /// <param name="count"></param>          internal abstract void processFields(Fieldable[] fields, int count);         internal abstract void abort();     } }

Esempio n. 38

0

Mostra file

File: FieldsWriter.cs Progetto: restran/lucene-file-finder

        internal void  WriteField(FieldInfo fi, Fieldable field)
        {
            // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
            // and field.binaryValue() already returns the compressed value for a field
            // with isCompressed()==true, so we disable compression in that case
            bool disableCompression = (field is FieldsReader.FieldForMerge);

            fieldsStream.WriteVInt(fi.number);
            byte bits = 0;

            if (field.IsTokenized())
            {
                bits |= FieldsWriter.FIELD_IS_TOKENIZED;
            }
            if (field.IsBinary())
            {
                bits |= FieldsWriter.FIELD_IS_BINARY;
            }
            if (field.IsCompressed())
            {
                bits |= FieldsWriter.FIELD_IS_COMPRESSED;
            }

            fieldsStream.WriteByte(bits);

            if (field.IsCompressed())
            {
                // compression is enabled for the current field
                byte[] data;
                int    len;
                int    offset;

                if (disableCompression)
                {
                    // optimized case for merging, the data
                    // is already compressed
                    data = field.GetBinaryValue();
                    System.Diagnostics.Debug.Assert(data != null);
                    len    = field.GetBinaryLength();
                    offset = field.GetBinaryOffset();
                }
                else
                {
                    // check if it is a binary field
                    if (field.IsBinary())
                    {
                        data = Compress(field.GetBinaryValue(), field.GetBinaryOffset(), field.GetBinaryLength());
                    }
                    else
                    {
                        byte[] x = System.Text.Encoding.UTF8.GetBytes(field.StringValue());
                        data = Compress(x, 0, x.Length);
                    }
                    len    = data.Length;
                    offset = 0;
                }

                fieldsStream.WriteVInt(len);
                fieldsStream.WriteBytes(data, offset, len);
            }
            else
            {
                // compression is disabled for the current field
                if (field.IsBinary())
                {
                    int length = field.GetBinaryLength();
                    fieldsStream.WriteVInt(length);
                    fieldsStream.WriteBytes(field.BinaryValue(), field.GetBinaryOffset(), length);
                }
                else
                {
                    fieldsStream.WriteString(field.StringValue());
                }
            }
        }

Esempio n. 39

0

Mostra file

File: DocumentsWriter.cs Progetto: vikasraz/indexsearchutils

				/* Invert one occurrence of one field in the document */
				public void  InvertField(Fieldable field, Analyzer analyzer, int maxFieldLength)
				{
					
					if (length > 0)
						position += analyzer.GetPositionIncrementGap(fieldInfo.name);
					
					if (!field.IsTokenized())
					{
						// un-tokenized field
						System.String stringValue = field.StringValue();
						int valueLength = stringValue.Length;
						Token token = localToken;
						token.Clear();
						char[] termBuffer = token.TermBuffer();
						if (termBuffer.Length < valueLength)
							termBuffer = token.ResizeTermBuffer(valueLength);
						DocumentsWriter.GetCharsFromString(stringValue, 0, valueLength, termBuffer, 0);
						token.SetTermLength(valueLength);
						token.SetStartOffset(offset);
						token.SetEndOffset(offset + stringValue.Length);
						AddPosition(token);
						offset += stringValue.Length;
						length++;
					}
					else
					{
						// tokenized field
						TokenStream stream;
						TokenStream streamValue = field.TokenStreamValue();
						
						if (streamValue != null)
							stream = streamValue;
						else
						{
							// the field does not have a TokenStream,
							// so we have to obtain one from the analyzer
							System.IO.TextReader reader; // find or make Reader
							System.IO.TextReader readerValue = field.ReaderValue();
							
							if (readerValue != null)
								reader = readerValue;
							else
							{
								System.String stringValue = field.StringValue();
								if (stringValue == null)
									throw new System.ArgumentException("field must have either TokenStream, String or Reader value");
								Enclosing_Instance.stringReader.Init(stringValue);
								reader = Enclosing_Instance.stringReader;
							}
							
							// Tokenize field and add to postingTable
							stream = analyzer.ReusableTokenStream(fieldInfo.name, reader);
						}
						
						// reset the TokenStream to the first token
						stream.Reset();
						
						try
						{
							offsetEnd = offset - 1;
							for (; ; )
							{
								Token token = stream.Next(localToken);
								if (token == null)
									break;
								position += (token.GetPositionIncrement() - 1);
								AddPosition(token);
								if (++length >= maxFieldLength)
								{
									if (Enclosing_Instance.Enclosing_Instance.infoStream != null)
										Enclosing_Instance.Enclosing_Instance.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens");
									break;
								}
							}
							offset = offsetEnd + 1;
						}
						finally
						{
							stream.Close();
						}
					}
					
					boost *= field.GetBoost();
				}

Esempio n. 40

0

Mostra file

File: InvertedDocConsumerPerField.cs Progetto: Inzaghi2012/teamlab.v7.5

		// Called before a field instance is being processed
		internal abstract void  Start(Fieldable field);

Esempio n. 41

0

Mostra file

        internal void  WriteField(FieldInfo fi, Fieldable field)
        {
            // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
            // and field.binaryValue() already returns the compressed value for a field
            // with isCompressed()==true, so we disable compression in that case
            bool disableCompression = (field is FieldsReader.FieldForMerge);

            fieldsStream.WriteVInt(fi.number);
            byte bits = 0;

            if (field.IsTokenized())
            {
                bits |= FieldsWriter.FIELD_IS_TOKENIZED;
            }
            if (field.IsBinary())
            {
                bits |= FieldsWriter.FIELD_IS_BINARY;
            }
            if (field.IsCompressed())
            {
                bits |= FieldsWriter.FIELD_IS_COMPRESSED;
            }

            fieldsStream.WriteByte(bits);

            if (field.IsCompressed())
            {
                // compression is enabled for the current field
                byte[] data = null;

                if (disableCompression)
                {
                    // optimized case for merging, the data
                    // is already compressed
                    data = field.BinaryValue();
                }
                else
                {
                    // check if it is a binary field
                    if (field.IsBinary())
                    {
                        data = Compress(field.BinaryValue());
                    }
                    else
                    {
                        data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue()));
                    }
                }
                int len = data.Length;
                fieldsStream.WriteVInt(len);
                fieldsStream.WriteBytes(data, len);
            }
            else
            {
                // compression is disabled for the current field
                if (field.IsBinary())
                {
                    byte[] data = field.BinaryValue();
                    int    len  = data.Length;
                    fieldsStream.WriteVInt(len);
                    fieldsStream.WriteBytes(data, len);
                }
                else
                {
                    fieldsStream.WriteString(field.StringValue());
                }
            }
        }

Esempio n. 42

0

Mostra file

File: DocHelper.cs Progetto: stgwilli/ravendb

        static DocHelper()
        {
            textField1           = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
            textField2           = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            compressedTextField2 = new Field(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT, Field.Store.COMPRESS, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            textField3           = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED);
            {
                textField3.SetOmitNorms(true);
            }
            keyField     = new Field(KEYWORD_FIELD_KEY, KEYWORD_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED);
            noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
            noTFField    = new Field(NO_TF_KEY, NO_TF_TEXT, Field.Store.YES, Field.Index.ANALYZED);
            {
                noTFField.SetOmitTermFreqAndPositions(true);
            }
            unIndField     = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT, Field.Store.YES, Field.Index.NO);
            unStoredField1 = new Field(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
            unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
            lazyField      = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED);
            textUtfField1  = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
            textUtfField2  = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            fields         = new Field[] { textField1, textField2, textField3, compressedTextField2, keyField, noNormsField, noTFField, unIndField, unStoredField1, unStoredField2, textUtfField1, textUtfField2, lazyField, lazyFieldBinary, largeLazyField };
            {
                //Initialize the large Lazy Field
                System.Text.StringBuilder buffer = new System.Text.StringBuilder();
                for (int i = 0; i < 10000; i++)
                {
                    buffer.Append("Lazily loading lengths of language in lieu of laughing ");
                }

                try
                {
                    LAZY_FIELD_BINARY_BYTES = System.Text.Encoding.UTF8.GetBytes("These are some binary field bytes");
                }
                catch (System.IO.IOException e)
                {
                }
                lazyFieldBinary           = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES, Field.Store.YES);
                fields[fields.Length - 2] = lazyFieldBinary;
                LARGE_LAZY_FIELD_TEXT     = buffer.ToString();
                largeLazyField            = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED);

                fields[fields.Length - 1] = largeLazyField;
                for (int i = 0; i < fields.Length; i++)
                {
                    Fieldable f = fields[i];
                    Add(all, f);
                    if (f.IsIndexed())
                    {
                        Add(indexed, f);
                    }
                    else
                    {
                        Add(unindexed, f);
                    }
                    if (f.IsTermVectorStored())
                    {
                        Add(termvector, f);
                    }
                    if (f.IsIndexed() && !f.IsTermVectorStored())
                    {
                        Add(notermvector, f);
                    }
                    if (f.IsStored())
                    {
                        Add(stored, f);
                    }
                    else
                    {
                        Add(unstored, f);
                    }
                    if (f.GetOmitNorms())
                    {
                        Add(noNorms, f);
                    }
                    if (f.GetOmitTf())
                    {
                        Add(noTf, f);
                    }
                    if (f.IsLazy())
                    {
                        Add(lazy, f);
                    }
                }
            }
            {
                nameValues = new System.Collections.Hashtable();
                nameValues[TEXT_FIELD_1_KEY]            = FIELD_1_TEXT;
                nameValues[TEXT_FIELD_2_KEY]            = FIELD_2_TEXT;
                nameValues[COMPRESSED_TEXT_FIELD_2_KEY] = FIELD_2_COMPRESSED_TEXT;
                nameValues[TEXT_FIELD_3_KEY]            = FIELD_3_TEXT;
                nameValues[KEYWORD_FIELD_KEY]           = KEYWORD_TEXT;
                nameValues[NO_NORMS_KEY]          = NO_NORMS_TEXT;
                nameValues[NO_TF_KEY]             = NO_TF_TEXT;
                nameValues[UNINDEXED_FIELD_KEY]   = UNINDEXED_FIELD_TEXT;
                nameValues[UNSTORED_FIELD_1_KEY]  = UNSTORED_1_FIELD_TEXT;
                nameValues[UNSTORED_FIELD_2_KEY]  = UNSTORED_2_FIELD_TEXT;
                nameValues[LAZY_FIELD_KEY]        = LAZY_FIELD_TEXT;
                nameValues[LAZY_FIELD_BINARY_KEY] = LAZY_FIELD_BINARY_BYTES;
                nameValues[LARGE_LAZY_FIELD_KEY]  = LARGE_LAZY_FIELD_TEXT;
                nameValues[TEXT_FIELD_UTF1_KEY]   = FIELD_UTF1_TEXT;
                nameValues[TEXT_FIELD_UTF2_KEY]   = FIELD_UTF2_TEXT;
            }
        }

Esempio n. 43

0

Mostra file

File: DocHelper.cs Progetto: Rationalle/ravendb

		private static void  Add(System.Collections.IDictionary map, Fieldable field)
		{
			map[field.Name()] = field;
		}

Esempio n. 44

0

Mostra file

File: FreqProxTermsWriterPerField.cs Progetto: BackupTheBerlios/lyra2-svn

 internal override void Start(Fieldable f)
 {
     if (fieldState.attributeSource.HasAttribute(typeof(PayloadAttribute)))
     {
         payloadAttribute = (PayloadAttribute) fieldState.attributeSource.GetAttribute(typeof(PayloadAttribute));
     }
     else
     {
         payloadAttribute = null;
     }
 }

Esempio n. 45

0

Mostra file

File: TermsHashPerField.cs Progetto: VirtueMe/ravendb

		internal override void  Start(Fieldable f)
		{
			termAtt = (TermAttribute) fieldState.attributeSource.AddAttribute(typeof(TermAttribute));
			consumer.Start(f);
			if (nextPerField != null)
			{
				nextPerField.Start(f);
			}
		}

Esempio n. 46

0

Mostra file

File: DocInverterPerField.cs Progetto: cqm0609/lucene-file-finder

        internal override void processFields(Fieldable[] fields,
                                  int count)
        {
            fieldState.reset(docState.doc.GetBoost());

            int maxFieldLength = docState.maxFieldLength;

            bool doInvert = consumer.start(fields, count);

            for (int i = 0; i < count; i++)
            {

                Fieldable field = fields[i];

                // TODO FI: this should be "genericized" to querying
                // consumer if it wants to see this particular field
                // tokenized.
                if (field.IsIndexed() && doInvert)
                {

                    if (fieldState.length > 0)
                        fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name);

                    if (!field.IsTokenized())
                    {		  // un-tokenized field
                        string stringValue = field.StringValue();
                        int valueLength = stringValue.Length;
                        Token token = perThread.localToken.Reinit(stringValue, fieldState.offset, fieldState.offset + valueLength);
                        bool success = false;
                        try
                        {
                            consumer.add(token);
                            success = true;
                        }
                        finally
                        {
                            if (!success)
                                docState.docWriter.SetAborting();
                        }
                        fieldState.offset += valueLength;
                        fieldState.length++;
                        fieldState.position++;
                    }
                    else
                    {                                  // tokenized field
                        TokenStream stream;
                        TokenStream streamValue = field.TokenStreamValue();

                        if (streamValue != null)
                            stream = streamValue;
                        else
                        {
                            // the field does not have a TokenStream,
                            // so we have to obtain one from the analyzer
                            System.IO.TextReader reader;			  // find or make Reader
                            System.IO.TextReader readerValue = field.ReaderValue();

                            if (readerValue != null)
                                reader = readerValue;
                            else
                            {
                                string stringValue = field.StringValue();
                                if (stringValue == null)
                                    throw new System.ArgumentException("field must have either TokenStream, string or Reader value");
                                perThread.stringReader.Init(stringValue);
                                reader = perThread.stringReader;
                            }

                            // Tokenize field and add to postingTable
                            stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader);
                        }

                        // reset the TokenStream to the first token
                        stream.Reset();

                        try
                        {
                            int offsetEnd = fieldState.offset - 1;
                            Token localToken = perThread.localToken;
                            for (; ; )
                            {

                                // If we hit an exception in stream.next below
                                // (which is fairly common, eg if analyzer
                                // chokes on a given document), then it's
                                // non-aborting and (above) this one document
                                // will be marked as deleted, but still
                                // consume a docID
                                Token token = stream.Next(localToken);

                                if (token == null) break;
                                fieldState.position += (token.GetPositionIncrement() - 1);
                                bool success = false;
                                try
                                {
                                    // If we hit an exception in here, we abort
                                    // all buffered documents since the last
                                    // flush, on the likelihood that the
                                    // internal state of the consumer is now
                                    // corrupt and should not be flushed to a
                                    // new segment:
                                    consumer.add(token);
                                    success = true;
                                }
                                finally
                                {
                                    if (!success)
                                        docState.docWriter.SetAborting();
                                }
                                fieldState.position++;
                                offsetEnd = fieldState.offset + token.EndOffset();

                                if (++fieldState.length >= maxFieldLength)
                                {
                                    if (docState.infoStream != null)
                                        docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens");
                                    break;
                                }
                            }
                            fieldState.offset = offsetEnd + 1;
                        }
                        finally
                        {
                            stream.Close();
                        }
                    }

                    fieldState.boost *= field.GetBoost();
                }
            }

            consumer.finish();
            endConsumer.finish();
        }

Esempio n. 47

0

Mostra file

File: DocHelper.cs Progetto: vikasraz/indexsearchutils

		private static void  Add(System.Collections.IDictionary map, Fieldable field)
		{
			if (field == null) System.Console.WriteLine("FIELD IS NULL!!!");
			if (field == null) System.Console.WriteLine("FIELD IS NULL!!!");
			if (field == null) System.Console.WriteLine("FIELD IS NULL!!!");
			if (map == null) System.Console.WriteLine("MAP IS NULL!!!");
			if (map == null) System.Console.WriteLine("MAP IS NULL!!!");
			if (map == null) System.Console.WriteLine("MAP IS NULL!!!");
			if (field.Name() == null) System.Console.WriteLine("FIELD NAME IS NULL!!!");
			if (field.Name() == null) System.Console.WriteLine("FIELD NAME IS NULL!!!");
			if (field.Name() == null) System.Console.WriteLine("FIELD NAME IS NULL!!!");
			map[field.Name()] = field;
		}

Esempi in C# (CSharp) per Lucene.Net.Documents.Fieldable